summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2011-06-12 14:39:29 +0100
committerLars Wirzenius <liw@liw.fi>2011-06-12 14:39:29 +0100
commitf09511fcbe4685c8794e24786aef7f438727491f (patch)
treed19333e6cfcd2467e880ddd356164411406048a4
parentdd5350490b68a2119d02568fc5cf813377daddce (diff)
downloadsummain-f09511fcbe4685c8794e24786aef7f438727491f.tar.gz
Use checksumming for path normalization.
-rw-r--r--summainlib.py59
-rw-r--r--summainlib_tests.py56
2 files changed, 10 insertions, 105 deletions
diff --git a/summainlib.py b/summainlib.py
index 41ff5ce..c3c0bc0 100644
--- a/summainlib.py
+++ b/summainlib.py
@@ -82,65 +82,16 @@ class PathNormalizer(object):
'''Normalize a filesystem path.
- For every input path, a new output path is given. The same output
- path is given every time the same input path is given. The output
- path has the same number of elements as the input path, and if
- /foo/bar results in /a/b, then /foo/yo results in /a/b, i.e., the
- tree structure is the same.
+ Paths are normalized by using SHA-1 on a secret plus the real path.
+ The checksum is the normalized path.
'''
- def __init__(self):
- self._counter = 0
- self._dict = dict()
- self._dict[os.sep] = os.sep
- self._dict['.'] = '.'
- self._dict['..'] = '..'
-
- def split(self, path):
- if path == os.sep:
- return [os.sep]
- parts = path.split(os.sep)
- if parts:
- if parts[0] == '':
- parts[0] = os.sep
- if parts[-1] == '':
- parts[-1] = os.sep
- return parts
-
- def _base26(self, n):
- if n == 0:
- digits = [0]
- else:
- digits = []
- while n > 0:
- digits.append(n % 26)
- n /= 26
-
- letters = 'abcdefghijklmnopqrstuvwxyz'
- assert len(letters) == 26
- return ''.join(letters[x] for x in reversed(digits))
-
- def normalize_part(self, part):
- if part not in self._dict:
- self._dict[part] = self._base26(self._counter)
- self._counter += 1
- return self._dict[part]
+ def __init__(self, secret):
+ self._secret = secret
def normalize(self, path):
- parts = self.split(path)
- normalized = [self.normalize_part(x) for x in parts]
- result = ''
- for x in normalized:
- if not result:
- result = x
- elif x == os.sep:
- result += x
- elif result.endswith(os.sep):
- result += x
- else:
- result += os.sep + x
- return result
+ return hashlib.sha1(self._secret + path).hexdigest()
class SamePath(object): # pragma: no cover
diff --git a/summainlib_tests.py b/summainlib_tests.py
index 085dd2b..6015541 100644
--- a/summainlib_tests.py
+++ b/summainlib_tests.py
@@ -269,59 +269,13 @@ class NumberNormalizerTests(unittest.TestCase):
class PathNormalizerTests(unittest.TestCase):
def setUp(self):
- self.pn = summainlib.PathNormalizer()
+ self.pn = summainlib.PathNormalizer('secret')
def test_returns_different_paths_for_different_inputs(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/ping/pong'), '/c/d')
+ self.assertNotEqual(self.pn.normalize('/foo/bar'),
+ self.pn.normalize('/ping/pong'))
def test_returns_same_paths_for_same_input(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
-
- def test_returns_same_parent_path_for_siblings(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/foo/yo'), '/a/c')
- self.assertEqual(self.pn.normalize('/foo'), '/a')
-
- def test_handles_trailing_slashes(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/foo/bar/'), '/a/b/')
-
- def test_handles_relative_paths(self):
- self.assertEqual(self.pn.normalize('foo/bar'), 'a/b')
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
-
- def test_handles_dot(self):
- self.assertEqual(self.pn.normalize('.'), '.')
-
- def test_handles_dotdot(self):
- self.assertEqual(self.pn.normalize('..'), '..')
-
- def test_splits_root(self):
- self.assertEqual(self.pn.split('/'), ['/'])
-
- def test_splits_absolute_path(self):
- self.assertEqual(self.pn.split('/foo/bar'), ['/', 'foo', 'bar'])
-
- def test_splits_trailing_slash(self):
- self.assertEqual(self.pn.split('/foo/'), ['/', 'foo', '/'])
- self.assertEqual(self.pn.split('/foo/bar/'), ['/', 'foo', 'bar', '/'])
-
- def test_splits_relative_path(self):
- self.assertEqual(self.pn.split('foo/bar'), ['foo', 'bar'])
-
- def test_normalizes_slash_to_itself(self):
- self.assertEqual(self.pn.normalize_part('/'), '/')
-
- def test_normalizes_first_part_to_a(self):
- self.assertEqual(self.pn.normalize_part('foo'), 'a')
-
- def test_normalizes_second_part_to_b(self):
- self.pn.normalize_part('foo')
- self.assertEqual(self.pn.normalize_part('bar'), 'b')
-
- def test_normalizes_same_part_twice_to_same_result(self):
- self.pn.normalize_part('foo')
- self.assertEqual(self.pn.normalize_part('foo'), 'a')
+ self.assertEqual(self.pn.normalize('/foo/bar'),
+ self.pn.normalize('/foo/bar'))