diff options
author | Lars Wirzenius <liw@liw.fi> | 2013-03-31 18:30:27 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2013-03-31 18:30:27 +0100 |
commit | 16222f9dadce4b13d43fe61bddba9afb16390c72 (patch) | |
tree | ef72ba719a460d0f2da7fb65726dce1646bbad96 | |
download | mail-archiver-16222f9dadce4b13d43fe61bddba9afb16390c72.tar.gz |
Initial
-rwxr-xr-x | mk-test-data | 50 | ||||
-rwxr-xr-x | pieni-to-mail-archive | 117 |
2 files changed, 167 insertions, 0 deletions
diff --git a/mk-test-data b/mk-test-data new file mode 100755 index 0000000..f28e64c --- /dev/null +++ b/mk-test-data @@ -0,0 +1,50 @@ +#!/bin/sh + +set -eu + + +mkmaildir() +{ + mkdir "$1" "$1/cur" "$1/tmp" "$1/new" +} + +mkmail() +{ + cat > "$1/tmp/newmail" + local sum=$(md5sum "$1/tmp/newmail" | awk '{ print $1 }') + mv "$1/tmp/newmail" "$1/new/$sum" +} + +mkdir maildirs mboxes +mkmaildir maildirs/foo +mkmaildir maildirs/bar + +mkmail maildirs/foo <<EOF +From: foo +To: bar +Subject: yo +Received: from example.com (lists.example.com [10.0.0.1]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by pieni.net (Postfix) with ESMTPS id E2ACD23344 + for <liw@liw.fi>; Mon, 7 Jan 2013 09:49:29 +0100 (CET) +Date: Mon, 7 Jan 2013 09:49:29 +0100 (CET) + +Hello, there. +EOF + + +mkmail maildirs/bar <<EOF +From: foo +To: bar +Subject: plugh +Received: from example.com (lists.example.com [10.0.0.1]) + (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) + (No client certificate requested) + by pieni.net (Postfix) with ESMTPS id E2ACD23344 + for <liw@liw.fi>; Mon, 7 Jan 2012 09:49:29 +0100 (CET) +Date: Mon, 7 Jan 2012 09:49:29 +0100 (CET) + +Hi. +EOF + diff --git a/pieni-to-mail-archive b/pieni-to-mail-archive new file mode 100755 index 0000000..97a0a4c --- /dev/null +++ b/pieni-to-mail-archive @@ -0,0 +1,117 @@ +#!/usr/bin/python + + +import cliapp +import email.parser +import glob +import mailbox +import os +import re + + +received_date = re.compile( + r'^from (|\n)*; (Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+' + r'(?P<day>\d+) ' + r'(?P<mon>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ' + r'(?P<year>\d+)', + re.MULTILINE) + +date_date = re.compile( + r'^((Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+)?' + r'(?P<day>\d+) ' + r'(?P<mon>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) ' + r'(?P<year>\d+)') + +montab = { + 'Jan': 1, + 'Feb': 2, + 'Mar': 3, + 'Apr': 4, + 'May': 5, + 'Jun': 6, + 'Jul': 7, + 'Aug': 8, + 'Sep': 9, + 'Oct': 10, + 'Nov': 11, + 'Dec': 12, +} + + +class PieniToMailArchive(cliapp.Application): + + def add_settings(self): + self.settings.string( + ['mbox-dir'], + 'where to put archived mail mboxes', + default='mboxes') + + def setup(self): + self.mbox = None + self.mbox_filename = None + + def cleanup(self): + print 'closing' + if self.mbox != None: + self.mbox.close() + + def process_args(self, args): + # FIXME: Rsync from pieni here. + + for maildir in args: + print 'processing:', maildir + self.archive_mails_in_maildir(maildir) + self.remove_maildir_if_empty(maildir) + + def archive_mails_in_maildir(self, maildir): + md = mailbox.Maildir(maildir, factory=None) + parser = email.parser.Parser() + for msg in md.itervalues(): + y, m, d = self.date_of(msg) + + filename = os.path.join(self.settings['mbox-dir'], '%04d.mbox' % y) + existing = glob.glob(filename + '.*') + if existing: + self.output.write( + 'WARNING: %s already has compressed(?) versions: %s' % + (filename, ' '.join(existing))) + continue + + if filename != self.mbox_filename: + if self.mbox is not None: + self.mbox.close() + self.mbox = mailbox.mbox(filename, create=True) + self.mbox_filename = filename + self.mbox.add(msg) + + def date_of(self, msg): + year = mon = day = 0 + + rs = msg.get_all('Received') + if rs: + r = str(rs[0]) + m = received_date.match(r) + if m: + day = int(m.group('day')) + mon = montab[m.group('mon')] + year = int(m.group('year')) + + if year is 0 and 'Date' in msg: + s = str(msg['Date']) + m = date_date.match(s) + if m: + day = int(m.group('day')) + mon = montab[m.group('mon')] + year = int(m.group('year')) + + if 70 <= year < 100: + year += 1900 + if year < 1988: + year = mon = day = 0 + return year, mon, day + + def remove_maildir_if_empty(self, maildir): + pass + + +PieniToMailArchive().run() |