diff options
author | Lars Wirzenius <liw@liw.fi> | 2011-06-12 14:45:29 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2011-06-12 14:45:29 +0100 |
commit | 24303bb1e7575d9c629ce3acc65b12a3b91a151c (patch) | |
tree | 74d1c4d4a1ddc3c9d4174d86cb96bd90e7800b76 | |
parent | dd5350490b68a2119d02568fc5cf813377daddce (diff) | |
parent | cb99a3a420e6abc42a9104895b1b4bd60d9faca1 (diff) | |
download | summain-24303bb1e7575d9c629ce3acc65b12a3b91a151c.tar.gz |
Mangle filenames using hashing.
-rwxr-xr-x | summain | 6 | ||||
-rw-r--r-- | summainlib.py | 60 | ||||
-rw-r--r-- | summainlib_tests.py | 56 |
3 files changed, 15 insertions, 107 deletions
@@ -29,6 +29,8 @@ class Summain(cliapp.Application): 'print paths relative to arguments') self.settings.boolean(['mangle-paths', 'm'], 'mangle (obfuscate) paths') + self.settings.string(['secret'], + 'use SECRET to make mangled paths unguessable') self.settings.string_list(['exclude'], 'do not output or compute FIELD', metavar='FIELD') @@ -42,7 +44,7 @@ class Summain(cliapp.Application): if os.path.isdir(root): for dirname, dirnames, filenames in os.walk(root): yield dirname - dirname.sort() + dirnames.sort() for filename in sorted(filenames): yield os.path.join(dirname, filename) else: @@ -53,7 +55,7 @@ class Summain(cliapp.Application): exclude = self.settings['exclude'] nn = summainlib.NumberNormalizer() if self.settings['mangle-paths']: - pn = summainlib.PathNormalizer() + pn = summainlib.PathNormalizer(self.settings['secret']) else: pn = summainlib.SamePath() checksums = [x.upper() diff --git a/summainlib.py b/summainlib.py index 41ff5ce..3de9517 100644 --- a/summainlib.py +++ b/summainlib.py @@ -16,6 +16,7 @@ import grp import hashlib +import hmac import math import os import pwd @@ -82,65 +83,16 @@ class PathNormalizer(object): '''Normalize a filesystem path. - For every input path, a new output path is given. The same output - path is given every time the same input path is given. The output - path has the same number of elements as the input path, and if - /foo/bar results in /a/b, then /foo/yo results in /a/b, i.e., the - tree structure is the same. + Paths are normalized by using SHA-1 on a secret plus the real path. + The checksum is the normalized path. ''' - def __init__(self): - self._counter = 0 - self._dict = dict() - self._dict[os.sep] = os.sep - self._dict['.'] = '.' - self._dict['..'] = '..' - - def split(self, path): - if path == os.sep: - return [os.sep] - parts = path.split(os.sep) - if parts: - if parts[0] == '': - parts[0] = os.sep - if parts[-1] == '': - parts[-1] = os.sep - return parts - - def _base26(self, n): - if n == 0: - digits = [0] - else: - digits = [] - while n > 0: - digits.append(n % 26) - n /= 26 - - letters = 'abcdefghijklmnopqrstuvwxyz' - assert len(letters) == 26 - return ''.join(letters[x] for x in reversed(digits)) - - def normalize_part(self, part): - if part not in self._dict: - self._dict[part] = self._base26(self._counter) - self._counter += 1 - return self._dict[part] + def __init__(self, secret): + self._secret = secret def normalize(self, path): - parts = self.split(path) - normalized = [self.normalize_part(x) for x in parts] - result = '' - for x in normalized: - if not result: - result = x - elif x == os.sep: - result += x - elif result.endswith(os.sep): - result += x - else: - result += os.sep + x - return result + return hmac.new(self._secret, path).hexdigest() class SamePath(object): # pragma: no cover diff --git a/summainlib_tests.py b/summainlib_tests.py index 085dd2b..6015541 100644 --- a/summainlib_tests.py +++ b/summainlib_tests.py @@ -269,59 +269,13 @@ class NumberNormalizerTests(unittest.TestCase): class PathNormalizerTests(unittest.TestCase): def setUp(self): - self.pn = summainlib.PathNormalizer() + self.pn = summainlib.PathNormalizer('secret') def test_returns_different_paths_for_different_inputs(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/ping/pong'), '/c/d') + self.assertNotEqual(self.pn.normalize('/foo/bar'), + self.pn.normalize('/ping/pong')) def test_returns_same_paths_for_same_input(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - - def test_returns_same_parent_path_for_siblings(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/foo/yo'), '/a/c') - self.assertEqual(self.pn.normalize('/foo'), '/a') - - def test_handles_trailing_slashes(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/foo/bar/'), '/a/b/') - - def test_handles_relative_paths(self): - self.assertEqual(self.pn.normalize('foo/bar'), 'a/b') - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - - def test_handles_dot(self): - self.assertEqual(self.pn.normalize('.'), '.') - - def test_handles_dotdot(self): - self.assertEqual(self.pn.normalize('..'), '..') - - def test_splits_root(self): - self.assertEqual(self.pn.split('/'), ['/']) - - def test_splits_absolute_path(self): - self.assertEqual(self.pn.split('/foo/bar'), ['/', 'foo', 'bar']) - - def test_splits_trailing_slash(self): - self.assertEqual(self.pn.split('/foo/'), ['/', 'foo', '/']) - self.assertEqual(self.pn.split('/foo/bar/'), ['/', 'foo', 'bar', '/']) - - def test_splits_relative_path(self): - self.assertEqual(self.pn.split('foo/bar'), ['foo', 'bar']) - - def test_normalizes_slash_to_itself(self): - self.assertEqual(self.pn.normalize_part('/'), '/') - - def test_normalizes_first_part_to_a(self): - self.assertEqual(self.pn.normalize_part('foo'), 'a') - - def test_normalizes_second_part_to_b(self): - self.pn.normalize_part('foo') - self.assertEqual(self.pn.normalize_part('bar'), 'b') - - def test_normalizes_same_part_twice_to_same_result(self): - self.pn.normalize_part('foo') - self.assertEqual(self.pn.normalize_part('foo'), 'a') + self.assertEqual(self.pn.normalize('/foo/bar'), + self.pn.normalize('/foo/bar')) |