summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2013-03-31 18:30:27 +0100
committerLars Wirzenius <liw@liw.fi>2013-03-31 18:30:27 +0100
commit16222f9dadce4b13d43fe61bddba9afb16390c72 (patch)
treeef72ba719a460d0f2da7fb65726dce1646bbad96
downloadmail-archiver-16222f9dadce4b13d43fe61bddba9afb16390c72.tar.gz
Initial
-rwxr-xr-xmk-test-data50
-rwxr-xr-xpieni-to-mail-archive117
2 files changed, 167 insertions, 0 deletions
diff --git a/mk-test-data b/mk-test-data
new file mode 100755
index 0000000..f28e64c
--- /dev/null
+++ b/mk-test-data
@@ -0,0 +1,50 @@
+#!/bin/sh
+
+set -eu
+
+
+mkmaildir()
+{
+ mkdir "$1" "$1/cur" "$1/tmp" "$1/new"
+}
+
+mkmail()
+{
+ cat > "$1/tmp/newmail"
+ local sum=$(md5sum "$1/tmp/newmail" | awk '{ print $1 }')
+ mv "$1/tmp/newmail" "$1/new/$sum"
+}
+
+mkdir maildirs mboxes
+mkmaildir maildirs/foo
+mkmaildir maildirs/bar
+
+mkmail maildirs/foo <<EOF
+From: foo
+To: bar
+Subject: yo
+Received: from example.com (lists.example.com [10.0.0.1])
+ (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
+ (No client certificate requested)
+ by pieni.net (Postfix) with ESMTPS id E2ACD23344
+ for <liw@liw.fi>; Mon, 7 Jan 2013 09:49:29 +0100 (CET)
+Date: Mon, 7 Jan 2013 09:49:29 +0100 (CET)
+
+Hello, there.
+EOF
+
+
+mkmail maildirs/bar <<EOF
+From: foo
+To: bar
+Subject: plugh
+Received: from example.com (lists.example.com [10.0.0.1])
+ (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))
+ (No client certificate requested)
+ by pieni.net (Postfix) with ESMTPS id E2ACD23344
+ for <liw@liw.fi>; Mon, 7 Jan 2012 09:49:29 +0100 (CET)
+Date: Mon, 7 Jan 2012 09:49:29 +0100 (CET)
+
+Hi.
+EOF
+
diff --git a/pieni-to-mail-archive b/pieni-to-mail-archive
new file mode 100755
index 0000000..97a0a4c
--- /dev/null
+++ b/pieni-to-mail-archive
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+
+
+import cliapp
+import email.parser
+import glob
+import mailbox
+import os
+import re
+
+
+received_date = re.compile(
+ r'^from (|\n)*; (Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+'
+ r'(?P<day>\d+) '
+ r'(?P<mon>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) '
+ r'(?P<year>\d+)',
+ re.MULTILINE)
+
+date_date = re.compile(
+ r'^((Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s+)?'
+ r'(?P<day>\d+) '
+ r'(?P<mon>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) '
+ r'(?P<year>\d+)')
+
+montab = {
+ 'Jan': 1,
+ 'Feb': 2,
+ 'Mar': 3,
+ 'Apr': 4,
+ 'May': 5,
+ 'Jun': 6,
+ 'Jul': 7,
+ 'Aug': 8,
+ 'Sep': 9,
+ 'Oct': 10,
+ 'Nov': 11,
+ 'Dec': 12,
+}
+
+
+class PieniToMailArchive(cliapp.Application):
+
+ def add_settings(self):
+ self.settings.string(
+ ['mbox-dir'],
+ 'where to put archived mail mboxes',
+ default='mboxes')
+
+ def setup(self):
+ self.mbox = None
+ self.mbox_filename = None
+
+ def cleanup(self):
+ print 'closing'
+ if self.mbox != None:
+ self.mbox.close()
+
+ def process_args(self, args):
+ # FIXME: Rsync from pieni here.
+
+ for maildir in args:
+ print 'processing:', maildir
+ self.archive_mails_in_maildir(maildir)
+ self.remove_maildir_if_empty(maildir)
+
+ def archive_mails_in_maildir(self, maildir):
+ md = mailbox.Maildir(maildir, factory=None)
+ parser = email.parser.Parser()
+ for msg in md.itervalues():
+ y, m, d = self.date_of(msg)
+
+ filename = os.path.join(self.settings['mbox-dir'], '%04d.mbox' % y)
+ existing = glob.glob(filename + '.*')
+ if existing:
+ self.output.write(
+ 'WARNING: %s already has compressed(?) versions: %s' %
+ (filename, ' '.join(existing)))
+ continue
+
+ if filename != self.mbox_filename:
+ if self.mbox is not None:
+ self.mbox.close()
+ self.mbox = mailbox.mbox(filename, create=True)
+ self.mbox_filename = filename
+ self.mbox.add(msg)
+
+ def date_of(self, msg):
+ year = mon = day = 0
+
+ rs = msg.get_all('Received')
+ if rs:
+ r = str(rs[0])
+ m = received_date.match(r)
+ if m:
+ day = int(m.group('day'))
+ mon = montab[m.group('mon')]
+ year = int(m.group('year'))
+
+ if year is 0 and 'Date' in msg:
+ s = str(msg['Date'])
+ m = date_date.match(s)
+ if m:
+ day = int(m.group('day'))
+ mon = montab[m.group('mon')]
+ year = int(m.group('year'))
+
+ if 70 <= year < 100:
+ year += 1900
+ if year < 1988:
+ year = mon = day = 0
+ return year, mon, day
+
+ def remove_maildir_if_empty(self, maildir):
+ pass
+
+
+PieniToMailArchive().run()