diff options
author | Lars Wirzenius <liw@liw.fi> | 2011-06-12 14:39:29 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2011-06-12 14:39:29 +0100 |
commit | f09511fcbe4685c8794e24786aef7f438727491f (patch) | |
tree | d19333e6cfcd2467e880ddd356164411406048a4 | |
parent | dd5350490b68a2119d02568fc5cf813377daddce (diff) | |
download | summain-f09511fcbe4685c8794e24786aef7f438727491f.tar.gz |
Use checksumming for path normalization.
-rw-r--r-- | summainlib.py | 59 | ||||
-rw-r--r-- | summainlib_tests.py | 56 |
2 files changed, 10 insertions, 105 deletions
diff --git a/summainlib.py b/summainlib.py index 41ff5ce..c3c0bc0 100644 --- a/summainlib.py +++ b/summainlib.py @@ -82,65 +82,16 @@ class PathNormalizer(object): '''Normalize a filesystem path. - For every input path, a new output path is given. The same output - path is given every time the same input path is given. The output - path has the same number of elements as the input path, and if - /foo/bar results in /a/b, then /foo/yo results in /a/b, i.e., the - tree structure is the same. + Paths are normalized by using SHA-1 on a secret plus the real path. + The checksum is the normalized path. ''' - def __init__(self): - self._counter = 0 - self._dict = dict() - self._dict[os.sep] = os.sep - self._dict['.'] = '.' - self._dict['..'] = '..' - - def split(self, path): - if path == os.sep: - return [os.sep] - parts = path.split(os.sep) - if parts: - if parts[0] == '': - parts[0] = os.sep - if parts[-1] == '': - parts[-1] = os.sep - return parts - - def _base26(self, n): - if n == 0: - digits = [0] - else: - digits = [] - while n > 0: - digits.append(n % 26) - n /= 26 - - letters = 'abcdefghijklmnopqrstuvwxyz' - assert len(letters) == 26 - return ''.join(letters[x] for x in reversed(digits)) - - def normalize_part(self, part): - if part not in self._dict: - self._dict[part] = self._base26(self._counter) - self._counter += 1 - return self._dict[part] + def __init__(self, secret): + self._secret = secret def normalize(self, path): - parts = self.split(path) - normalized = [self.normalize_part(x) for x in parts] - result = '' - for x in normalized: - if not result: - result = x - elif x == os.sep: - result += x - elif result.endswith(os.sep): - result += x - else: - result += os.sep + x - return result + return hashlib.sha1(self._secret + path).hexdigest() class SamePath(object): # pragma: no cover diff --git a/summainlib_tests.py b/summainlib_tests.py index 085dd2b..6015541 100644 --- a/summainlib_tests.py +++ b/summainlib_tests.py @@ -269,59 +269,13 @@ class NumberNormalizerTests(unittest.TestCase): class PathNormalizerTests(unittest.TestCase): def setUp(self): - self.pn = summainlib.PathNormalizer() + self.pn = summainlib.PathNormalizer('secret') def test_returns_different_paths_for_different_inputs(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/ping/pong'), '/c/d') + self.assertNotEqual(self.pn.normalize('/foo/bar'), + self.pn.normalize('/ping/pong')) def test_returns_same_paths_for_same_input(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - - def test_returns_same_parent_path_for_siblings(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/foo/yo'), '/a/c') - self.assertEqual(self.pn.normalize('/foo'), '/a') - - def test_handles_trailing_slashes(self): - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - self.assertEqual(self.pn.normalize('/foo/bar/'), '/a/b/') - - def test_handles_relative_paths(self): - self.assertEqual(self.pn.normalize('foo/bar'), 'a/b') - self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b') - - def test_handles_dot(self): - self.assertEqual(self.pn.normalize('.'), '.') - - def test_handles_dotdot(self): - self.assertEqual(self.pn.normalize('..'), '..') - - def test_splits_root(self): - self.assertEqual(self.pn.split('/'), ['/']) - - def test_splits_absolute_path(self): - self.assertEqual(self.pn.split('/foo/bar'), ['/', 'foo', 'bar']) - - def test_splits_trailing_slash(self): - self.assertEqual(self.pn.split('/foo/'), ['/', 'foo', '/']) - self.assertEqual(self.pn.split('/foo/bar/'), ['/', 'foo', 'bar', '/']) - - def test_splits_relative_path(self): - self.assertEqual(self.pn.split('foo/bar'), ['foo', 'bar']) - - def test_normalizes_slash_to_itself(self): - self.assertEqual(self.pn.normalize_part('/'), '/') - - def test_normalizes_first_part_to_a(self): - self.assertEqual(self.pn.normalize_part('foo'), 'a') - - def test_normalizes_second_part_to_b(self): - self.pn.normalize_part('foo') - self.assertEqual(self.pn.normalize_part('bar'), 'b') - - def test_normalizes_same_part_twice_to_same_result(self): - self.pn.normalize_part('foo') - self.assertEqual(self.pn.normalize_part('foo'), 'a') + self.assertEqual(self.pn.normalize('/foo/bar'), + self.pn.normalize('/foo/bar')) |