summaryrefslogtreecommitdiff
path: root/pieni-to-mail-archive
blob: 97a0a4cd4065e952fc5f8976ce66c2d06ec9f77d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/python


import cliapp
import email.parser
import glob
import mailbox
import os
import re


received_date = re.compile(
    r'^from (|\n)*; (Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+'
        r'(?P<day>\d+) '
        r'(?P<mon>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) '
        r'(?P<year>\d+)',
    re.MULTILINE)

date_date = re.compile(
    r'^((Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+)?'
        r'(?P<day>\d+) '
        r'(?P<mon>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) '
        r'(?P<year>\d+)')

montab = {
    'Jan': 1,
    'Feb': 2,
    'Mar': 3,
    'Apr': 4,
    'May': 5,
    'Jun': 6,
    'Jul': 7,
    'Aug': 8,
    'Sep': 9,
    'Oct': 10,
    'Nov': 11,
    'Dec': 12,
}


class PieniToMailArchive(cliapp.Application):

    def add_settings(self):
        self.settings.string(
            ['mbox-dir'],
            'where to put archived mail mboxes',
            default='mboxes')

    def setup(self):
        self.mbox = None
        self.mbox_filename = None

    def cleanup(self):
        print 'closing'
        if self.mbox != None:
            self.mbox.close()

    def process_args(self, args):
        # FIXME: Rsync from pieni here.

        for maildir in args:
            print 'processing:', maildir
            self.archive_mails_in_maildir(maildir)
            self.remove_maildir_if_empty(maildir)
    
    def archive_mails_in_maildir(self, maildir):
        md = mailbox.Maildir(maildir, factory=None)
        parser = email.parser.Parser()
        for msg in md.itervalues():
            y, m, d = self.date_of(msg)
            
            filename = os.path.join(self.settings['mbox-dir'], '%04d.mbox' % y)
            existing = glob.glob(filename + '.*')
            if existing:
                self.output.write(
                    'WARNING: %s already has compressed(?) versions: %s' % 
                        (filename, ' '.join(existing)))
                continue

            if filename != self.mbox_filename:
                if self.mbox is not None:
                    self.mbox.close()
                self.mbox = mailbox.mbox(filename, create=True)
                self.mbox_filename = filename
            self.mbox.add(msg)
        
    def date_of(self, msg):
        year = mon = day = 0

        rs = msg.get_all('Received')
        if rs:
            r = str(rs[0])
            m = received_date.match(r)
            if m:
                day = int(m.group('day'))
                mon = montab[m.group('mon')]
                year = int(m.group('year'))

        if year is 0 and 'Date' in msg:
            s = str(msg['Date'])
            m = date_date.match(s)
            if m:
                day = int(m.group('day'))
                mon = montab[m.group('mon')]
                year = int(m.group('year'))

        if 70 <= year < 100:
            year += 1900
        if year < 1988:
            year = mon = day = 0
        return year, mon, day

    def remove_maildir_if_empty(self, maildir):
        pass


PieniToMailArchive().run()