#!/usr/bin/python2.5 # splitmboxdaily - split an mbox into new ones, one per day # Copyright 2008-2010 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import mailbox, sys, re, time, os pat = re.compile(r"((Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?" r" ?(?P\d\d?) " r"(?P(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)) " r"(?P\d\d(\d\d)?) ") montab = { "Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06", "Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12", } outbox = {} for filename in sys.argv[1:]: mbox = mailbox.mbox(filename) count = 0 start = time.time() for key in sorted(mbox.keys()): msg = mbox[key] m = pat.match(msg["Date"] or "") if m: day = "%02d" % int(m.group("day")) mon = montab[m.group("mon")] year = m.group("year") if int(year) < 10: year = "20" + year elif 88 <= int(year) < 100: year = "19" + year else: year = "unknown" mon = "00" day = "00" name = "%s/%s-%s-%s.mbox" % (year, year, mon, day) if not os.path.isdir("%s" % year): os.mkdir("%s" % year) if name not in outbox: outbox[name] = mailbox.mbox(name) outbox[name].add(msg) count += 1 if (count % 10) == 0 or count == len(mbox): sys.stdout.write("\r%5.1f done" % (100.0 * count/len(mbox))) duration = (time.time() - start) or 1.0 sys.stdout.write(" %6.1f msgs/s" % (count/duration)) sys.stdout.write(" %s" % filename) sys.stdout.flush() print for box in outbox.values(): box.close() outbox = {}