diff options
author | Lars Wirzenius <liw@liw.fi> | 2011-06-11 20:15:04 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2011-06-11 20:15:04 +0100 |
commit | 7fd201a6fe71e40a273f5b638b0c592fcea2b0db (patch) | |
tree | e70a602097e27c7cd605ed0711d69f95654a8520 | |
parent | dbd009da9ed28c3f20d64eaa78031ed3e1126211 (diff) | |
parent | e2a190cf766cd9cacd9dbeb94c0361f3445c2845 (diff) | |
download | summain-7fd201a6fe71e40a273f5b638b0c592fcea2b0db.tar.gz |
Support multiple checksum types.
-rw-r--r-- | Makefile | 7 | ||||
-rwxr-xr-x | summain | 16 | ||||
-rw-r--r-- | summain.1.in (renamed from summain.1) | 45 | ||||
-rw-r--r-- | summainlib.py | 59 | ||||
-rw-r--r-- | summainlib_tests.py | 50 |
5 files changed, 106 insertions, 71 deletions
@@ -14,11 +14,14 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -all: +all: summain.1 + +summain.1: summain.1.in + python summain --generate-manpage=summain.1.in > summain.1 check: python -m CoverageTestRunner rm .coverage clean: - rm -f *.pyc .coverage + rm -f *.pyc .coverage summain.1 @@ -32,6 +32,11 @@ class Summain(cliapp.Application): self.settings.string_list(['exclude'], 'do not output or compute FIELD', metavar='FIELD') + self.settings.string_list(['checksum', 'c'], + 'which checksums to compute: ' + 'MD5, SHA1, SHA224, SHA256, SHA384, SHA512; ' + 'use once per checksum type ' + '(default is SHA1)') def files(self, root): if os.path.isdir(root): @@ -50,9 +55,18 @@ class Summain(cliapp.Application): pn = summainlib.PathNormalizer() else: pn = summainlib.SamePath() + checksums = [x.upper() + for x in self.settings['checksum'] or ['SHA1']] + o = summainlib.FilesystemObject('.', nn, pn, exclude, checksums) + for checksum in checksums: + try: + o[checksum] + except KeyError: + raise cliapp.AppException('Unknown checksum %s' % checksum) for root in args: for filename in self.files(root): - o = summainlib.FilesystemObject(filename, nn, pn, exclude) + o = summainlib.FilesystemObject(filename, nn, pn, exclude, + checksums) self.output.write(o.format(root if relative else None)) self.output.write('\n') @@ -2,13 +2,6 @@ .SH NAME summain \- gather file checksums and metadata .SH SYNOPSIS -.B summain -.RB [ \-m ] -.RB [ \-\-mangle\-paths ] -.RB [ \-r ] -.RB [ \-\-relative\-paths ] -.RB [ \-\-exclude =\fIFIELD\fR] -.RI [ file ...] .SH DESCRIPTION .B summain gathers metadata about files, @@ -24,7 +17,7 @@ The manifest looks like this: .IP .nf Name: foo/bar/foobar -Sha-1: 1234123413241324 +SHA1: 1234123413241324 Mtime: 2010-01-01 02:08:00.127651 +0000 Mode: 1755 .fi @@ -50,38 +43,4 @@ The numbers are reported so that hard links can be checked. .PP Directories named on the command line will be recursed automatically. .SH OPTIONS -.TP -.BR \-r ", " \-\-relative\-paths -Print pathnames relative to the command line argument they derive from. -If the command line argument is the directory called -.I foo -and there is a file called -.IR bar , -normally -.I foo/bar -is printed as the name. -With this option, -.I bar -is printed instead. -This can be handy for normalizing the paths for comparing two copies of the -same directory tree in two different locations on disk: -run -.B summain -on both, -and compare the output with -.BR diff (1). -.TP -.BR \-m ", " \-\-mangle\-paths -Mangle or obfuscate all paths in the output. -This is good if you need to share the output with someone else, -and your filenames may contain sensitive information, -such a client names. -.TP -.BR \-\-exclude =\fIFIELD -Don't write out the given -.IR FIELD . -Can be given multiple times. -The field value is also not computed, -so excluding the -.I SHA-1 -field will result in much speedup. + diff --git a/summainlib.py b/summainlib.py index 6b3a6a8..bb7d668 100644 --- a/summainlib.py +++ b/summainlib.py @@ -160,13 +160,21 @@ class FilesystemObject(object): ''' - def __init__(self, filename, nn, pn, exclude, stat_result=None, sha1=None, - open_file=None, readlink=None): + def __init__(self, filename, nn, pn, exclude, checksums, + stat_result=None, sha1=None, sha224=None, + sha256=None, sha384=None, sha512=None, + md5=None, open_file=None, readlink=None): self._filename = filename self._exclude = set(self._normalize_key(k) for k in exclude) + self._checksums = checksums self._pn = pn self._nn = nn + self._md5 = md5 or hashlib.md5() self._sha1 = sha1 or hashlib.sha1() + self._sha224 = sha224 or hashlib.sha224() + self._sha256 = sha256 or hashlib.sha256() + self._sha384 = sha384 or hashlib.sha384() + self._sha512 = sha512 or hashlib.sha512() self._stat_result = stat_result or os.lstat(filename) self.open_file = open_file or file self.readlink = readlink or os.readlink @@ -206,9 +214,23 @@ class FilesystemObject(object): def _compute_group(self): return self.lookup_group(self._stat_result.st_gid) - def _compute_sha_1(self): - if stat.S_ISREG(self._stat_result.st_mode): - return self.compute_sha1(self._filename, self._sha1) + def _compute_md5(self): + return self.compute_checksum(self._filename, self._md5) + + def _compute_sha1(self): + return self.compute_checksum(self._filename, self._sha1) + + def _compute_sha224(self): + return self.compute_checksum(self._filename, self._sha224) + + def _compute_sha256(self): + return self.compute_checksum(self._filename, self._sha256) + + def _compute_sha384(self): + return self.compute_checksum(self._filename, self._sha384) + + def _compute_sha512(self): + return self.compute_checksum(self._filename, self._sha512) def _compute_target(self): if stat.S_ISLNK(self._stat_result.st_mode): @@ -227,15 +249,17 @@ class FilesystemObject(object): def lookup_group(self, gid): return grp.getgrgid(gid).gr_name - def compute_sha1(self, filename, sha1): - f = self.open_file(filename) - while True: - data = f.read(64*1024) # 64 KiB seems reasonable. - if not data: - break - sha1.update(data) - f.close() - return sha1.hexdigest() + def compute_checksum(self, filename, checksummer): + if stat.S_ISREG(self._stat_result.st_mode): + with self.open_file(filename) as f: + while True: + data = f.read(64*1024) # 64 KiB seems reasonable. + if not data: + break + checksummer.update(data) + return checksummer.hexdigest() + else: + return '' def _normalize_key(self, key): key = key.lower() @@ -252,6 +276,8 @@ class FilesystemObject(object): value = getattr(self, method)() if value is not None: self.values[key] = value + else: + raise KeyError(key) return self.values.get(key, '') def _isdir(self): @@ -285,8 +311,9 @@ class FilesystemObject(object): else: name = self.relative_path(root) - keys = ['Mtime', 'Mode', 'Ino', 'Dev', 'Nlink', 'Size', - 'Uid', 'Username', 'Gid', 'Group', 'Sha-1', 'Target'] + keys = (['Mtime', 'Mode', 'Ino', 'Dev', 'Nlink', 'Size', + 'Uid', 'Username', 'Gid', 'Group', 'Target'] + + self._checksums) values = [('Name', name)] values += [(k, self[k]) for k in keys if self[k] != ''] return ''.join('%s: %s\n' % (k, v) for k, v in values if v != '') diff --git a/summainlib_tests.py b/summainlib_tests.py index 5b1b0f4..085dd2b 100644 --- a/summainlib_tests.py +++ b/summainlib_tests.py @@ -27,7 +27,7 @@ class FakeStatResult(object): setattr(self, name, value) -class FakeSha1(object): +class FakeChecksummer(object): def update(self, data): pass @@ -50,6 +50,12 @@ class FakeOpenFile(object): def close(self): pass + def __enter__(self): + return self + + def __exit__(self, a, b, c): + pass + class FakeReadlink(object): @@ -79,16 +85,26 @@ class FilesystemObjectTests(unittest.TestCase): self.nn = summainlib.NumberNormalizer() self.pn = summainlib.SamePath() self.exclude = [] + self.checksums = ['SHA1'] def new(self, name, mode=None): if mode is not None: self.st.st_mode = mode return summainlib.FilesystemObject(name, self.nn, self.pn, - self.exclude, + self.exclude, self.checksums, stat_result=self.st, - sha1=FakeSha1(), + sha1=FakeChecksummer(), + sha224=FakeChecksummer(), + sha256=FakeChecksummer(), + sha384=FakeChecksummer(), + sha512=FakeChecksummer(), + md5=FakeChecksummer(), open_file=FakeOpenFile(), readlink=FakeReadlink(self)) + + def test_raises_keyerror_for_unknown_field(self): + self.assertRaises(KeyError, self.new('foo').__getitem__, + 'UNKNOWNHASH') def test_formats_simple_name_identically(self): self.assertEqual(self.new('foo')['Name'], 'foo') @@ -129,12 +145,22 @@ class FilesystemObjectTests(unittest.TestCase): def test_formats_group_correctly(self): self.assertEqual(self.new('foo')['Group'], 'root') - def test_formats_sha1_correctly_for_regular_file(self): - self.assertEqual(self.new('foo')['Sha-1'], 'abc') + def test_formats_checksums_correctly_for_regular_file(self): + self.assertEqual(self.new('foo')['MD5'], 'abc') + self.assertEqual(self.new('foo')['SHA1'], 'abc') + self.assertEqual(self.new('foo')['SHA224'], 'abc') + self.assertEqual(self.new('foo')['SHA256'], 'abc') + self.assertEqual(self.new('foo')['SHA384'], 'abc') + self.assertEqual(self.new('foo')['SHA512'], 'abc') - def test_formats_sha1_correctly_for_special_file(self): + def test_formats_checksums_correctly_for_special_file(self): self.st.st_mode = stat.S_IFDIR | 0755 - self.assertEqual(self.new('foo')['Sha-1'], '') + self.assertEqual(self.new('foo')['MD5'], '') + self.assertEqual(self.new('foo')['SHA1'], '') + self.assertEqual(self.new('foo')['SHA224'], '') + self.assertEqual(self.new('foo')['SHA256'], '') + self.assertEqual(self.new('foo')['SHA384'], '') + self.assertEqual(self.new('foo')['SHA512'], '') def test_formats_target_correctly_for_symlink(self): self.st.st_mode = stat.S_IFLNK | 0777 @@ -186,6 +212,7 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase): self.nn = summainlib.NumberNormalizer() self.pn = summainlib.SamePath() self.exclude = [] + self.checksums = ['SHA1'] def reset(self): self.dev += 1 @@ -197,9 +224,14 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase): st_uid=0, st_gid=0) self.ino += 1 return summainlib.FilesystemObject(name, self.nn, self.pn, - self.exclude, + self.exclude, self.checksums, stat_result=st, - sha1=FakeSha1(), + sha1=FakeChecksummer(), + sha224=FakeChecksummer(), + sha256=FakeChecksummer(), + sha384=FakeChecksummer(), + sha512=FakeChecksummer(), + md5=FakeChecksummer(), open_file=FakeOpenFile(), readlink=FakeReadlink(self)) |