diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-05-04 18:39:54 +1200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-05-04 18:39:54 +1200 |
commit | 2c3e5c0ab2eade34bc69588c864eb7d73db8619f (patch) | |
tree | 1ea319db0ffb1e65a7725beaa2567265bfa31057 | |
parent | 3aa05422615a1b3d6c76ed3f0d41c63c78896573 (diff) | |
download | extrautils-2c3e5c0ab2eade34bc69588c864eb7d73db8619f.tar.gz |
Add splitmboxdaily.
-rw-r--r-- | debian/changelog | 6 | ||||
-rwxr-xr-x | splitmboxdaily | 81 | ||||
-rw-r--r-- | splitmboxdaily.1 | 26 |
3 files changed, 113 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog index 65d928f..e618e43 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +extrautils (1.4) squeeze; urgency=low + + * Add splitmboxdaily. + + -- Lars Wirzenius <liw@liw.fi> Tue, 04 May 2010 18:39:34 +1200 + extrautils (1.3.1) squeeze; urgency=low * Build for squeeze. diff --git a/splitmboxdaily b/splitmboxdaily new file mode 100755 index 0000000..d337cd2 --- /dev/null +++ b/splitmboxdaily @@ -0,0 +1,81 @@ +#!/usr/bin/python2.5 +# splitmboxdaily - split an mbox into new ones, one per day +# Copyright 2008-2010 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import mailbox, sys, re, time, os + +pat = re.compile(r"((Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?" + r" ?(?P<day>\d\d?) " + r"(?P<mon>(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)) " + r"(?P<year>\d\d(\d\d)?) ") + +montab = { + "Jan": "01", + "Feb": "02", + "Mar": "03", + "Apr": "04", + "May": "05", + "Jun": "06", + "Jul": "07", + "Aug": "08", + "Sep": "09", + "Oct": "10", + "Nov": "11", + "Dec": "12", +} + +outbox = {} + +for filename in sys.argv[1:]: + mbox = mailbox.mbox(filename) + count = 0 + start = time.time() + for key in sorted(mbox.keys()): + msg = mbox[key] + m = pat.match(msg["Date"] or "") + if m: + day = "%02d" % int(m.group("day")) + mon = montab[m.group("mon")] + year = m.group("year") + if int(year) < 10: + year = "20" + year + elif 88 <= int(year) < 100: + year = "19" + year + else: + year = "unknown" + mon = "00" + day = "00" + + name = "%s/%s-%s-%s.mbox" % (year, year, mon, day) + if not os.path.isdir("%s" % year): + os.mkdir("%s" % year) + if name not in outbox: + outbox[name] = mailbox.mbox(name) + outbox[name].add(msg) + + count += 1 + if (count % 10) == 0 or count == len(mbox): + sys.stdout.write("\r%5.1f done" % + (100.0 * count/len(mbox))) + duration = (time.time() - start) or 1.0 + sys.stdout.write(" %6.1f msgs/s" % (count/duration)) + sys.stdout.write(" %s" % filename) + sys.stdout.flush() + + print + for box in outbox.values(): + box.close() + outbox = {} diff --git a/splitmboxdaily.1 b/splitmboxdaily.1 new file mode 100644 index 0000000..2b115c5 --- /dev/null +++ b/splitmboxdaily.1 @@ -0,0 +1,26 @@ +.\" Copyright 2010 Lars Wirzenius +.\" +.\" This program is free software: you can redistribute it and/or modify +.\" it under the terms of the GNU General Public License as published by +.\" the Free Software Foundation, either version 3 of the License, or +.\" (at your option) any later version. +.\" +.\" This program is distributed in the hope that it will be useful, +.\" but WITHOUT ANY WARRANTY; without even the implied warranty of +.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +.\" GNU General Public License for more details. +.\" +.\" You should have received a copy of the GNU General Public License +.\" along with this program. If not, see <http://www.gnu.org/licenses/>. +.TH SPLITMBOXDAILY 8 2010-05-04 +.SH NAME +splitmboxdaily \- split a mailbox into smaller ones, one per day +.SH SYNOPSIS +.B splitmboxdaily +.IR mboxfile ... +.SH DESCRIPTION +.B splitmboxdaily +reads one or more mailboxes (standard Unix mbox format), +and writes the mails to a set of new ones. +It will collect all mails for each day into one output mailbox. +It blindly relies on the Date: header to decide the output mailbox. |