#!/usr/bin/python
# splitmboxdaily - split an mbox into new ones, one per day
# Copyright 2008-2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import mailbox, sys, re, time, os
pat = re.compile(r"((Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?"
r" ?(?P\d\d?) "
r"(?P(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)) "
r"(?P\d\d(\d\d)?) ")
montab = {
"Jan": "01",
"Feb": "02",
"Mar": "03",
"Apr": "04",
"May": "05",
"Jun": "06",
"Jul": "07",
"Aug": "08",
"Sep": "09",
"Oct": "10",
"Nov": "11",
"Dec": "12",
}
outbox = {}
for filename in sys.argv[1:]:
mbox = mailbox.mbox(filename)
count = 0
start = time.time()
for key in sorted(mbox.keys()):
msg = mbox[key]
m = pat.match(msg["Date"] or "")
if m:
day = "%02d" % int(m.group("day"))
mon = montab[m.group("mon")]
year = m.group("year")
if int(year) < 10:
year = "20" + year
elif 88 <= int(year) < 100:
year = "19" + year
else:
year = "unknown"
mon = "00"
day = "00"
name = "%s/%s-%s-%s.mbox" % (year, year, mon, day)
if not os.path.isdir("%s" % year):
os.mkdir("%s" % year)
if name not in outbox:
outbox[name] = mailbox.mbox(name)
outbox[name].add(msg)
count += 1
if (count % 10) == 0 or count == len(mbox):
sys.stdout.write("\r%5.1f done" %
(100.0 * count/len(mbox)))
duration = (time.time() - start) or 1.0
sys.stdout.write(" %6.1f msgs/s" % (count/duration))
sys.stdout.write(" %s" % filename)
sys.stdout.flush()
print
for box in outbox.values():
box.close()
outbox = {}