#!/usr/bin/python import cliapp import email.parser import glob import mailbox import os import re import shutil received_date = re.compile( r'^from (.|\n)*; (Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+' r'(?P\d+) ' r'(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ' r'(?P\d+)', re.MULTILINE) date_date = re.compile( r'^((Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+)?' r'(?P\d+) ' r'(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ' r'(?P\d+)') montab = { 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12, } class PieniToMailArchive(cliapp.Application): def add_settings(self): self.settings.boolean( ['rsync-from-pieni'], 'rsync new archived mails from pieni.net?') self.settings.string( ['maildirs-dir'], 'where to put maildirs from pieni.net', default='maildirs') self.settings.string( ['mbox-dir'], 'where to put archived mail mboxes', default='mboxes') def setup(self): self.mbox = None self.mbox_filename = None def cleanup(self): self.output.write('closing\n') if self.mbox != None: self.mbox.close() def process_args(self, args): if self.settings['rsync-from-pieni']: self.rsync_from_pieni() maildirs = self.settings['maildirs-dir'] for dirname in sorted(glob.glob(maildirs + '/*/')): if self.is_maildir(dirname): self.output.write('processing: %s\n' % dirname) self.archive_mails_in_maildir(dirname) self.remove_maildir_if_empty(dirname) else: self.output.write('SKIPPING (not a maildir): %s\n' % dirname) def is_maildir(self, dirname): subdirs = ['cur', 'new', 'tmp'] return all(os.path.exists(os.path.join(dirname, x)) for x in subdirs) def rsync_from_pieni(self): self.output.write('rsyncing from pieni.net\n') cliapp.runcmd( ['rsync', '-av', '--remove-source-files', 'pieni.net:backups/mail/.', self.settings['maildirs-dir'] + '/.'], stdout=self.output) def archive_mails_in_maildir(self, maildir): md = mailbox.Maildir(maildir, factory=None) parser = email.parser.Parser() for key, msg in md.iteritems(): y, m, d = self.date_of(msg) filename = os.path.join(self.settings['mbox-dir'], '%04d.mbox' % y) existing = glob.glob(filename + '.*') if existing: self.output.write( 'WARNING: %s already has compressed(?) versions: %s' % (filename, ' '.join(existing))) continue if filename != self.mbox_filename: if self.mbox is not None: self.mbox.close() self.mbox = mailbox.mbox(filename, create=True) self.mbox_filename = filename self.mbox.add(msg) md.remove(key) md.close() def date_of(self, msg): year = mon = day = 0 rs = msg.get_all('Received') if rs: r = str(rs[0]) m = received_date.match(r) if m: day = int(m.group('day')) mon = montab[m.group('mon')] year = int(m.group('year')) if year is 0 and 'Date' in msg: s = str(msg['Date']) m = date_date.match(s) if m: day = int(m.group('day')) mon = montab[m.group('mon')] year = int(m.group('year')) if 70 <= year < 100: year += 1900 if year < 1988: year = mon = day = 0 return year, mon, day def remove_maildir_if_empty(self, maildir): md = mailbox.Maildir(maildir, factory=None) remove = len(md) == 0 md.close() if remove: shutil.rmtree(maildir) PieniToMailArchive().run()