From 2c3e5c0ab2eade34bc69588c864eb7d73db8619f Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Tue, 4 May 2010 18:39:54 +1200 Subject: Add splitmboxdaily. --- splitmboxdaily | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100755 splitmboxdaily (limited to 'splitmboxdaily') diff --git a/splitmboxdaily b/splitmboxdaily new file mode 100755 index 0000000..d337cd2 --- /dev/null +++ b/splitmboxdaily @@ -0,0 +1,81 @@ +#!/usr/bin/python2.5 +# splitmboxdaily - split an mbox into new ones, one per day +# Copyright 2008-2010 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import mailbox, sys, re, time, os + +pat = re.compile(r"((Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?" + r" ?(?P\d\d?) " + r"(?P(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)) " + r"(?P\d\d(\d\d)?) ") + +montab = { + "Jan": "01", + "Feb": "02", + "Mar": "03", + "Apr": "04", + "May": "05", + "Jun": "06", + "Jul": "07", + "Aug": "08", + "Sep": "09", + "Oct": "10", + "Nov": "11", + "Dec": "12", +} + +outbox = {} + +for filename in sys.argv[1:]: + mbox = mailbox.mbox(filename) + count = 0 + start = time.time() + for key in sorted(mbox.keys()): + msg = mbox[key] + m = pat.match(msg["Date"] or "") + if m: + day = "%02d" % int(m.group("day")) + mon = montab[m.group("mon")] + year = m.group("year") + if int(year) < 10: + year = "20" + year + elif 88 <= int(year) < 100: + year = "19" + year + else: + year = "unknown" + mon = "00" + day = "00" + + name = "%s/%s-%s-%s.mbox" % (year, year, mon, day) + if not os.path.isdir("%s" % year): + os.mkdir("%s" % year) + if name not in outbox: + outbox[name] = mailbox.mbox(name) + outbox[name].add(msg) + + count += 1 + if (count % 10) == 0 or count == len(mbox): + sys.stdout.write("\r%5.1f done" % + (100.0 * count/len(mbox))) + duration = (time.time() - start) or 1.0 + sys.stdout.write(" %6.1f msgs/s" % (count/duration)) + sys.stdout.write(" %s" % filename) + sys.stdout.flush() + + print + for box in outbox.values(): + box.close() + outbox = {} -- cgit v1.2.1