summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2011-06-12 14:45:29 +0100
committerLars Wirzenius <liw@liw.fi>2011-06-12 14:45:29 +0100
commit24303bb1e7575d9c629ce3acc65b12a3b91a151c (patch)
tree74d1c4d4a1ddc3c9d4174d86cb96bd90e7800b76
parentdd5350490b68a2119d02568fc5cf813377daddce (diff)
parentcb99a3a420e6abc42a9104895b1b4bd60d9faca1 (diff)
downloadsummain-24303bb1e7575d9c629ce3acc65b12a3b91a151c.tar.gz
Mangle filenames using hashing.
-rwxr-xr-xsummain6
-rw-r--r--summainlib.py60
-rw-r--r--summainlib_tests.py56
3 files changed, 15 insertions, 107 deletions
diff --git a/summain b/summain
index 6931ae6..7082594 100755
--- a/summain
+++ b/summain
@@ -29,6 +29,8 @@ class Summain(cliapp.Application):
'print paths relative to arguments')
self.settings.boolean(['mangle-paths', 'm'],
'mangle (obfuscate) paths')
+ self.settings.string(['secret'],
+ 'use SECRET to make mangled paths unguessable')
self.settings.string_list(['exclude'],
'do not output or compute FIELD',
metavar='FIELD')
@@ -42,7 +44,7 @@ class Summain(cliapp.Application):
if os.path.isdir(root):
for dirname, dirnames, filenames in os.walk(root):
yield dirname
- dirname.sort()
+ dirnames.sort()
for filename in sorted(filenames):
yield os.path.join(dirname, filename)
else:
@@ -53,7 +55,7 @@ class Summain(cliapp.Application):
exclude = self.settings['exclude']
nn = summainlib.NumberNormalizer()
if self.settings['mangle-paths']:
- pn = summainlib.PathNormalizer()
+ pn = summainlib.PathNormalizer(self.settings['secret'])
else:
pn = summainlib.SamePath()
checksums = [x.upper()
diff --git a/summainlib.py b/summainlib.py
index 41ff5ce..3de9517 100644
--- a/summainlib.py
+++ b/summainlib.py
@@ -16,6 +16,7 @@
import grp
import hashlib
+import hmac
import math
import os
import pwd
@@ -82,65 +83,16 @@ class PathNormalizer(object):
'''Normalize a filesystem path.
- For every input path, a new output path is given. The same output
- path is given every time the same input path is given. The output
- path has the same number of elements as the input path, and if
- /foo/bar results in /a/b, then /foo/yo results in /a/b, i.e., the
- tree structure is the same.
+ Paths are normalized by using SHA-1 on a secret plus the real path.
+ The checksum is the normalized path.
'''
- def __init__(self):
- self._counter = 0
- self._dict = dict()
- self._dict[os.sep] = os.sep
- self._dict['.'] = '.'
- self._dict['..'] = '..'
-
- def split(self, path):
- if path == os.sep:
- return [os.sep]
- parts = path.split(os.sep)
- if parts:
- if parts[0] == '':
- parts[0] = os.sep
- if parts[-1] == '':
- parts[-1] = os.sep
- return parts
-
- def _base26(self, n):
- if n == 0:
- digits = [0]
- else:
- digits = []
- while n > 0:
- digits.append(n % 26)
- n /= 26
-
- letters = 'abcdefghijklmnopqrstuvwxyz'
- assert len(letters) == 26
- return ''.join(letters[x] for x in reversed(digits))
-
- def normalize_part(self, part):
- if part not in self._dict:
- self._dict[part] = self._base26(self._counter)
- self._counter += 1
- return self._dict[part]
+ def __init__(self, secret):
+ self._secret = secret
def normalize(self, path):
- parts = self.split(path)
- normalized = [self.normalize_part(x) for x in parts]
- result = ''
- for x in normalized:
- if not result:
- result = x
- elif x == os.sep:
- result += x
- elif result.endswith(os.sep):
- result += x
- else:
- result += os.sep + x
- return result
+ return hmac.new(self._secret, path).hexdigest()
class SamePath(object): # pragma: no cover
diff --git a/summainlib_tests.py b/summainlib_tests.py
index 085dd2b..6015541 100644
--- a/summainlib_tests.py
+++ b/summainlib_tests.py
@@ -269,59 +269,13 @@ class NumberNormalizerTests(unittest.TestCase):
class PathNormalizerTests(unittest.TestCase):
def setUp(self):
- self.pn = summainlib.PathNormalizer()
+ self.pn = summainlib.PathNormalizer('secret')
def test_returns_different_paths_for_different_inputs(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/ping/pong'), '/c/d')
+ self.assertNotEqual(self.pn.normalize('/foo/bar'),
+ self.pn.normalize('/ping/pong'))
def test_returns_same_paths_for_same_input(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
-
- def test_returns_same_parent_path_for_siblings(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/foo/yo'), '/a/c')
- self.assertEqual(self.pn.normalize('/foo'), '/a')
-
- def test_handles_trailing_slashes(self):
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
- self.assertEqual(self.pn.normalize('/foo/bar/'), '/a/b/')
-
- def test_handles_relative_paths(self):
- self.assertEqual(self.pn.normalize('foo/bar'), 'a/b')
- self.assertEqual(self.pn.normalize('/foo/bar'), '/a/b')
-
- def test_handles_dot(self):
- self.assertEqual(self.pn.normalize('.'), '.')
-
- def test_handles_dotdot(self):
- self.assertEqual(self.pn.normalize('..'), '..')
-
- def test_splits_root(self):
- self.assertEqual(self.pn.split('/'), ['/'])
-
- def test_splits_absolute_path(self):
- self.assertEqual(self.pn.split('/foo/bar'), ['/', 'foo', 'bar'])
-
- def test_splits_trailing_slash(self):
- self.assertEqual(self.pn.split('/foo/'), ['/', 'foo', '/'])
- self.assertEqual(self.pn.split('/foo/bar/'), ['/', 'foo', 'bar', '/'])
-
- def test_splits_relative_path(self):
- self.assertEqual(self.pn.split('foo/bar'), ['foo', 'bar'])
-
- def test_normalizes_slash_to_itself(self):
- self.assertEqual(self.pn.normalize_part('/'), '/')
-
- def test_normalizes_first_part_to_a(self):
- self.assertEqual(self.pn.normalize_part('foo'), 'a')
-
- def test_normalizes_second_part_to_b(self):
- self.pn.normalize_part('foo')
- self.assertEqual(self.pn.normalize_part('bar'), 'b')
-
- def test_normalizes_same_part_twice_to_same_result(self):
- self.pn.normalize_part('foo')
- self.assertEqual(self.pn.normalize_part('foo'), 'a')
+ self.assertEqual(self.pn.normalize('/foo/bar'),
+ self.pn.normalize('/foo/bar'))