summaryrefslogtreecommitdiff
path: root/splitmboxdaily
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-05-04 18:39:54 +1200
committerLars Wirzenius <liw@liw.fi>2010-05-04 18:39:54 +1200
commit2c3e5c0ab2eade34bc69588c864eb7d73db8619f (patch)
tree1ea319db0ffb1e65a7725beaa2567265bfa31057 /splitmboxdaily
parent3aa05422615a1b3d6c76ed3f0d41c63c78896573 (diff)
downloadextrautils-2c3e5c0ab2eade34bc69588c864eb7d73db8619f.tar.gz
Add splitmboxdaily.
Diffstat (limited to 'splitmboxdaily')
-rwxr-xr-xsplitmboxdaily81
1 files changed, 81 insertions, 0 deletions
diff --git a/splitmboxdaily b/splitmboxdaily
new file mode 100755
index 0000000..d337cd2
--- /dev/null
+++ b/splitmboxdaily
@@ -0,0 +1,81 @@
+#!/usr/bin/python2.5
+# splitmboxdaily - split an mbox into new ones, one per day
+# Copyright 2008-2010 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import mailbox, sys, re, time, os
+
+pat = re.compile(r"((Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?"
+ r" ?(?P<day>\d\d?) "
+ r"(?P<mon>(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)) "
+ r"(?P<year>\d\d(\d\d)?) ")
+
+montab = {
+ "Jan": "01",
+ "Feb": "02",
+ "Mar": "03",
+ "Apr": "04",
+ "May": "05",
+ "Jun": "06",
+ "Jul": "07",
+ "Aug": "08",
+ "Sep": "09",
+ "Oct": "10",
+ "Nov": "11",
+ "Dec": "12",
+}
+
+outbox = {}
+
+for filename in sys.argv[1:]:
+ mbox = mailbox.mbox(filename)
+ count = 0
+ start = time.time()
+ for key in sorted(mbox.keys()):
+ msg = mbox[key]
+ m = pat.match(msg["Date"] or "")
+ if m:
+ day = "%02d" % int(m.group("day"))
+ mon = montab[m.group("mon")]
+ year = m.group("year")
+ if int(year) < 10:
+ year = "20" + year
+ elif 88 <= int(year) < 100:
+ year = "19" + year
+ else:
+ year = "unknown"
+ mon = "00"
+ day = "00"
+
+ name = "%s/%s-%s-%s.mbox" % (year, year, mon, day)
+ if not os.path.isdir("%s" % year):
+ os.mkdir("%s" % year)
+ if name not in outbox:
+ outbox[name] = mailbox.mbox(name)
+ outbox[name].add(msg)
+
+ count += 1
+ if (count % 10) == 0 or count == len(mbox):
+ sys.stdout.write("\r%5.1f done" %
+ (100.0 * count/len(mbox)))
+ duration = (time.time() - start) or 1.0
+ sys.stdout.write(" %6.1f msgs/s" % (count/duration))
+ sys.stdout.write(" %s" % filename)
+ sys.stdout.flush()
+
+ print
+ for box in outbox.values():
+ box.close()
+ outbox = {}