summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2011-06-11 20:15:04 +0100
committerLars Wirzenius <liw@liw.fi>2011-06-11 20:15:04 +0100
commit7fd201a6fe71e40a273f5b638b0c592fcea2b0db (patch)
treee70a602097e27c7cd605ed0711d69f95654a8520
parentdbd009da9ed28c3f20d64eaa78031ed3e1126211 (diff)
parente2a190cf766cd9cacd9dbeb94c0361f3445c2845 (diff)
downloadsummain-7fd201a6fe71e40a273f5b638b0c592fcea2b0db.tar.gz
Support multiple checksum types.
-rw-r--r--Makefile7
-rwxr-xr-xsummain16
-rw-r--r--summain.1.in (renamed from summain.1)45
-rw-r--r--summainlib.py59
-rw-r--r--summainlib_tests.py50
5 files changed, 106 insertions, 71 deletions
diff --git a/Makefile b/Makefile
index 4db3341..3d07544 100644
--- a/Makefile
+++ b/Makefile
@@ -14,11 +14,14 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-all:
+all: summain.1
+
+summain.1: summain.1.in
+ python summain --generate-manpage=summain.1.in > summain.1
check:
python -m CoverageTestRunner
rm .coverage
clean:
- rm -f *.pyc .coverage
+ rm -f *.pyc .coverage summain.1
diff --git a/summain b/summain
index 2ff1b21..92ad5f3 100755
--- a/summain
+++ b/summain
@@ -32,6 +32,11 @@ class Summain(cliapp.Application):
self.settings.string_list(['exclude'],
'do not output or compute FIELD',
metavar='FIELD')
+ self.settings.string_list(['checksum', 'c'],
+ 'which checksums to compute: '
+ 'MD5, SHA1, SHA224, SHA256, SHA384, SHA512; '
+ 'use once per checksum type '
+ '(default is SHA1)')
def files(self, root):
if os.path.isdir(root):
@@ -50,9 +55,18 @@ class Summain(cliapp.Application):
pn = summainlib.PathNormalizer()
else:
pn = summainlib.SamePath()
+ checksums = [x.upper()
+ for x in self.settings['checksum'] or ['SHA1']]
+ o = summainlib.FilesystemObject('.', nn, pn, exclude, checksums)
+ for checksum in checksums:
+ try:
+ o[checksum]
+ except KeyError:
+ raise cliapp.AppException('Unknown checksum %s' % checksum)
for root in args:
for filename in self.files(root):
- o = summainlib.FilesystemObject(filename, nn, pn, exclude)
+ o = summainlib.FilesystemObject(filename, nn, pn, exclude,
+ checksums)
self.output.write(o.format(root if relative else None))
self.output.write('\n')
diff --git a/summain.1 b/summain.1.in
index 90ccb01..7f53734 100644
--- a/summain.1
+++ b/summain.1.in
@@ -2,13 +2,6 @@
.SH NAME
summain \- gather file checksums and metadata
.SH SYNOPSIS
-.B summain
-.RB [ \-m ]
-.RB [ \-\-mangle\-paths ]
-.RB [ \-r ]
-.RB [ \-\-relative\-paths ]
-.RB [ \-\-exclude =\fIFIELD\fR]
-.RI [ file ...]
.SH DESCRIPTION
.B summain
gathers metadata about files,
@@ -24,7 +17,7 @@ The manifest looks like this:
.IP
.nf
Name: foo/bar/foobar
-Sha-1: 1234123413241324
+SHA1: 1234123413241324
Mtime: 2010-01-01 02:08:00.127651 +0000
Mode: 1755
.fi
@@ -50,38 +43,4 @@ The numbers are reported so that hard links can be checked.
.PP
Directories named on the command line will be recursed automatically.
.SH OPTIONS
-.TP
-.BR \-r ", " \-\-relative\-paths
-Print pathnames relative to the command line argument they derive from.
-If the command line argument is the directory called
-.I foo
-and there is a file called
-.IR bar ,
-normally
-.I foo/bar
-is printed as the name.
-With this option,
-.I bar
-is printed instead.
-This can be handy for normalizing the paths for comparing two copies of the
-same directory tree in two different locations on disk:
-run
-.B summain
-on both,
-and compare the output with
-.BR diff (1).
-.TP
-.BR \-m ", " \-\-mangle\-paths
-Mangle or obfuscate all paths in the output.
-This is good if you need to share the output with someone else,
-and your filenames may contain sensitive information,
-such a client names.
-.TP
-.BR \-\-exclude =\fIFIELD
-Don't write out the given
-.IR FIELD .
-Can be given multiple times.
-The field value is also not computed,
-so excluding the
-.I SHA-1
-field will result in much speedup.
+
diff --git a/summainlib.py b/summainlib.py
index 6b3a6a8..bb7d668 100644
--- a/summainlib.py
+++ b/summainlib.py
@@ -160,13 +160,21 @@ class FilesystemObject(object):
'''
- def __init__(self, filename, nn, pn, exclude, stat_result=None, sha1=None,
- open_file=None, readlink=None):
+ def __init__(self, filename, nn, pn, exclude, checksums,
+ stat_result=None, sha1=None, sha224=None,
+ sha256=None, sha384=None, sha512=None,
+ md5=None, open_file=None, readlink=None):
self._filename = filename
self._exclude = set(self._normalize_key(k) for k in exclude)
+ self._checksums = checksums
self._pn = pn
self._nn = nn
+ self._md5 = md5 or hashlib.md5()
self._sha1 = sha1 or hashlib.sha1()
+ self._sha224 = sha224 or hashlib.sha224()
+ self._sha256 = sha256 or hashlib.sha256()
+ self._sha384 = sha384 or hashlib.sha384()
+ self._sha512 = sha512 or hashlib.sha512()
self._stat_result = stat_result or os.lstat(filename)
self.open_file = open_file or file
self.readlink = readlink or os.readlink
@@ -206,9 +214,23 @@ class FilesystemObject(object):
def _compute_group(self):
return self.lookup_group(self._stat_result.st_gid)
- def _compute_sha_1(self):
- if stat.S_ISREG(self._stat_result.st_mode):
- return self.compute_sha1(self._filename, self._sha1)
+ def _compute_md5(self):
+ return self.compute_checksum(self._filename, self._md5)
+
+ def _compute_sha1(self):
+ return self.compute_checksum(self._filename, self._sha1)
+
+ def _compute_sha224(self):
+ return self.compute_checksum(self._filename, self._sha224)
+
+ def _compute_sha256(self):
+ return self.compute_checksum(self._filename, self._sha256)
+
+ def _compute_sha384(self):
+ return self.compute_checksum(self._filename, self._sha384)
+
+ def _compute_sha512(self):
+ return self.compute_checksum(self._filename, self._sha512)
def _compute_target(self):
if stat.S_ISLNK(self._stat_result.st_mode):
@@ -227,15 +249,17 @@ class FilesystemObject(object):
def lookup_group(self, gid):
return grp.getgrgid(gid).gr_name
- def compute_sha1(self, filename, sha1):
- f = self.open_file(filename)
- while True:
- data = f.read(64*1024) # 64 KiB seems reasonable.
- if not data:
- break
- sha1.update(data)
- f.close()
- return sha1.hexdigest()
+ def compute_checksum(self, filename, checksummer):
+ if stat.S_ISREG(self._stat_result.st_mode):
+ with self.open_file(filename) as f:
+ while True:
+ data = f.read(64*1024) # 64 KiB seems reasonable.
+ if not data:
+ break
+ checksummer.update(data)
+ return checksummer.hexdigest()
+ else:
+ return ''
def _normalize_key(self, key):
key = key.lower()
@@ -252,6 +276,8 @@ class FilesystemObject(object):
value = getattr(self, method)()
if value is not None:
self.values[key] = value
+ else:
+ raise KeyError(key)
return self.values.get(key, '')
def _isdir(self):
@@ -285,8 +311,9 @@ class FilesystemObject(object):
else:
name = self.relative_path(root)
- keys = ['Mtime', 'Mode', 'Ino', 'Dev', 'Nlink', 'Size',
- 'Uid', 'Username', 'Gid', 'Group', 'Sha-1', 'Target']
+ keys = (['Mtime', 'Mode', 'Ino', 'Dev', 'Nlink', 'Size',
+ 'Uid', 'Username', 'Gid', 'Group', 'Target'] +
+ self._checksums)
values = [('Name', name)]
values += [(k, self[k]) for k in keys if self[k] != '']
return ''.join('%s: %s\n' % (k, v) for k, v in values if v != '')
diff --git a/summainlib_tests.py b/summainlib_tests.py
index 5b1b0f4..085dd2b 100644
--- a/summainlib_tests.py
+++ b/summainlib_tests.py
@@ -27,7 +27,7 @@ class FakeStatResult(object):
setattr(self, name, value)
-class FakeSha1(object):
+class FakeChecksummer(object):
def update(self, data):
pass
@@ -50,6 +50,12 @@ class FakeOpenFile(object):
def close(self):
pass
+ def __enter__(self):
+ return self
+
+ def __exit__(self, a, b, c):
+ pass
+
class FakeReadlink(object):
@@ -79,16 +85,26 @@ class FilesystemObjectTests(unittest.TestCase):
self.nn = summainlib.NumberNormalizer()
self.pn = summainlib.SamePath()
self.exclude = []
+ self.checksums = ['SHA1']
def new(self, name, mode=None):
if mode is not None:
self.st.st_mode = mode
return summainlib.FilesystemObject(name, self.nn, self.pn,
- self.exclude,
+ self.exclude, self.checksums,
stat_result=self.st,
- sha1=FakeSha1(),
+ sha1=FakeChecksummer(),
+ sha224=FakeChecksummer(),
+ sha256=FakeChecksummer(),
+ sha384=FakeChecksummer(),
+ sha512=FakeChecksummer(),
+ md5=FakeChecksummer(),
open_file=FakeOpenFile(),
readlink=FakeReadlink(self))
+
+ def test_raises_keyerror_for_unknown_field(self):
+ self.assertRaises(KeyError, self.new('foo').__getitem__,
+ 'UNKNOWNHASH')
def test_formats_simple_name_identically(self):
self.assertEqual(self.new('foo')['Name'], 'foo')
@@ -129,12 +145,22 @@ class FilesystemObjectTests(unittest.TestCase):
def test_formats_group_correctly(self):
self.assertEqual(self.new('foo')['Group'], 'root')
- def test_formats_sha1_correctly_for_regular_file(self):
- self.assertEqual(self.new('foo')['Sha-1'], 'abc')
+ def test_formats_checksums_correctly_for_regular_file(self):
+ self.assertEqual(self.new('foo')['MD5'], 'abc')
+ self.assertEqual(self.new('foo')['SHA1'], 'abc')
+ self.assertEqual(self.new('foo')['SHA224'], 'abc')
+ self.assertEqual(self.new('foo')['SHA256'], 'abc')
+ self.assertEqual(self.new('foo')['SHA384'], 'abc')
+ self.assertEqual(self.new('foo')['SHA512'], 'abc')
- def test_formats_sha1_correctly_for_special_file(self):
+ def test_formats_checksums_correctly_for_special_file(self):
self.st.st_mode = stat.S_IFDIR | 0755
- self.assertEqual(self.new('foo')['Sha-1'], '')
+ self.assertEqual(self.new('foo')['MD5'], '')
+ self.assertEqual(self.new('foo')['SHA1'], '')
+ self.assertEqual(self.new('foo')['SHA224'], '')
+ self.assertEqual(self.new('foo')['SHA256'], '')
+ self.assertEqual(self.new('foo')['SHA384'], '')
+ self.assertEqual(self.new('foo')['SHA512'], '')
def test_formats_target_correctly_for_symlink(self):
self.st.st_mode = stat.S_IFLNK | 0777
@@ -186,6 +212,7 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase):
self.nn = summainlib.NumberNormalizer()
self.pn = summainlib.SamePath()
self.exclude = []
+ self.checksums = ['SHA1']
def reset(self):
self.dev += 1
@@ -197,9 +224,14 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase):
st_uid=0, st_gid=0)
self.ino += 1
return summainlib.FilesystemObject(name, self.nn, self.pn,
- self.exclude,
+ self.exclude, self.checksums,
stat_result=st,
- sha1=FakeSha1(),
+ sha1=FakeChecksummer(),
+ sha224=FakeChecksummer(),
+ sha256=FakeChecksummer(),
+ sha384=FakeChecksummer(),
+ sha512=FakeChecksummer(),
+ md5=FakeChecksummer(),
open_file=FakeOpenFile(),
readlink=FakeReadlink(self))