summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2015-04-18 19:05:47 +0300
committerLars Wirzenius <liw@liw.fi>2015-04-18 19:05:47 +0300
commitad0b4f6096a37611a536243f13ff671fd6969a09 (patch)
tree03f9521543d99c95d3a0f786d89fdfcc3469c81a
parent692a0885b8f36233f88f1dfc5185b2bf9791cbf0 (diff)
downloadsummain-ad0b4f6096a37611a536243f13ff671fd6969a09.tar.gz
Run pep8 in check; fix problems found
-rw-r--r--Makefile3
-rwxr-xr-xsummain57
-rw-r--r--summainlib.py51
-rw-r--r--summainlib_tests.py129
4 files changed, 125 insertions, 115 deletions
diff --git a/Makefile b/Makefile
index b04b82a..a1d3486 100644
--- a/Makefile
+++ b/Makefile
@@ -26,7 +26,8 @@ check:
python -m CoverageTestRunner
rm -f .coverage
cmdtest tests
-
+ pep8 summain summainlib.py summainlib_tests.py
+
clean:
python setup.py clean
rm -rf *.pyc .coverage summain.1 build _summain.so
diff --git a/summain b/summain
index fd0e077..aa8ae79 100755
--- a/summain
+++ b/summain
@@ -37,7 +37,7 @@ class OutputFormat(object):
def write(self):
for name, o in self.objects:
self.write_object(name, o)
-
+
def write_object(self, name, o):
raise NotImplemented()
@@ -74,7 +74,7 @@ class GeneratorList(list):
def __init__(self, gen):
self.gen = gen
-
+
def __len__(self):
return 1
@@ -92,7 +92,7 @@ class Json(OutputFormat):
def dictify(self, name, o):
keys = self.keys + self.checksums
- values = { 'Name': name }
+ values = {'Name': name}
for k in keys:
if o[k] != '':
values[k] = o[k]
@@ -103,23 +103,28 @@ class Json(OutputFormat):
class Summain(cliapp.Application):
def add_settings(self):
- self.settings.boolean(['relative-paths', 'r'],
- 'print paths relative to arguments')
- self.settings.boolean(['mangle-paths', 'm'],
- 'mangle (obfuscate) paths')
- self.settings.string(['secret'],
- 'use SECRET to make mangled paths unguessable')
- self.settings.string_list(['exclude'],
- 'do not output or compute FIELD',
- metavar='FIELD')
- self.settings.string_list(['checksum', 'c'],
- 'which checksums to compute: '
- 'MD5, SHA1, SHA224, SHA256, SHA384, SHA512; '
- 'use once per checksum type '
- '(default is SHA1)')
- self.settings.choice(['output-format', 'f'],
- ['rfc822', 'csv', 'json'],
- 'choose output format (rfc822, csv, json)')
+ self.settings.boolean(
+ ['relative-paths', 'r'],
+ 'print paths relative to arguments')
+ self.settings.boolean(
+ ['mangle-paths', 'm'],
+ 'mangle (obfuscate) paths')
+ self.settings.string(
+ ['secret'],
+ 'use SECRET to make mangled paths unguessable')
+ self.settings.string_list(
+ ['exclude'],
+ 'do not output or compute FIELD',
+ metavar='FIELD')
+ self.settings.string_list(
+ ['checksum', 'c'],
+ 'which checksums to compute: '
+ 'MD5, SHA1, SHA224, SHA256, SHA384, SHA512; '
+ 'use once per checksum type (default is SHA1)')
+ self.settings.choice(
+ ['output-format', 'f'],
+ ['rfc822', 'csv', 'json'],
+ 'choose output format (rfc822, csv, json)')
def files(self, root):
if os.path.isdir(root) and not os.path.islink(root):
@@ -136,7 +141,7 @@ class Summain(cliapp.Application):
yield root
def process_args(self, args):
- checksums = [x.upper()
+ checksums = [x.upper()
for x in self.settings['checksum'] or ['SHA1']]
fmt = self.new_formatter(checksums, self.find_roots(args))
fmt.write()
@@ -159,12 +164,12 @@ class Summain(cliapp.Application):
def relative_path(self, root, o):
'''Return a path that is relative to root, if possible.
-
+
If pathname does not start with root, then return it
unmodified.
-
+
'''
-
+
if root.endswith(os.sep):
root2 = root
else:
@@ -182,8 +187,8 @@ class Summain(cliapp.Application):
'csv': CSV,
'json': Json,
}
- return table[self.settings['output-format']](self.output, checksums,
- objects)
+ formatter = table[self.settings['output-format']]
+ return formatter(self.output, checksums, objects)
Summain(version=summainlib.__version__).run()
diff --git a/summainlib.py b/summainlib.py
index ce5d130..6466383 100644
--- a/summainlib.py
+++ b/summainlib.py
@@ -35,27 +35,27 @@ __version__ = '0.19'
class NumberNormalizer(object):
'''Normalize inode and device numbers.
-
+
When we make two manifests of the same directory tree, but the
tree may have been moved to another disk, the inode and device
numbers may be different. This should not be a cause for concern,
however. What is important is that if two names were hardlinked
to the same file before, they still are, and if they weren't,
they still aren't.
-
+
To achieve this, we normalize the inode and device numbers.
The input files are fed to the normalizer in a deterministic
sequence, and the sequence defines the numbers we use. Thus,
if the input files have inode numbers [42, 13, 105], we produce
[1, 2, 3]. If one of the input numbers is repeated, that number
is re-used.
-
+
This is not a perfect solution. If the second manifest has a
new file, it will throw off the entire remaining sequence, causing
a big diff. But we'll live with that.
-
+
'''
-
+
def __init__(self):
self.reset()
@@ -65,11 +65,11 @@ class NumberNormalizer(object):
else:
numbers[input_number] = next
return numbers[input_number], next + 1
-
+
def get_ino(self, ino):
output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino)
return output
-
+
def get_dev(self, dev):
output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev)
return output
@@ -86,20 +86,20 @@ class NumberNormalizer(object):
class PathNormalizer(object):
'''Normalize a filesystem path.
-
+
Paths are normalized by using SHA-1 on a secret plus the real path.
The checksum is the normalized path.
-
+
'''
-
+
def __init__(self, secret):
self._secret = secret
-
+
def normalize(self, path):
return hmac.new(self._secret, path).hexdigest()
-class SamePath(object): # pragma: no cover
+class SamePath(object): # pragma: no cover
def normalize(self, path):
return path
@@ -127,15 +127,15 @@ RESULT_CTIME_NSEC = 16
class FilesystemObject(object):
'''An object in the file system.
-
+
Responsible for gathering information and formatting it for
reporting.
-
+
The optional arguments are intended for unit tests.
-
+
'''
-
- def __init__(self, filename, nn, pn, exclude,
+
+ def __init__(self, filename, nn, pn, exclude,
stat_result=None, sha1=None, sha224=None,
sha256=None, sha384=None, sha512=None,
md5=None, open_file=None, readlink=None,
@@ -152,7 +152,7 @@ class FilesystemObject(object):
self._sha384 = sha384 or hashlib.sha384()
self._sha512 = sha512 or hashlib.sha512()
self._stat_result = stat_result or _summain.lstat(filename)
- self._xattrs = (xattrs if xattrs is not None
+ self._xattrs = (xattrs if xattrs is not None
else self.get_xattrs(filename))
self.open_file = open_file or file
self.readlink = readlink or os.readlink
@@ -162,7 +162,7 @@ class FilesystemObject(object):
if self.relative is None:
name = self.filename
else:
- name = self.relative # pragma: no cover
+ name = self.relative # pragma: no cover
return urllib.quote(self._pn.normalize(name))
def _compute_mtime(self):
@@ -219,7 +219,7 @@ class FilesystemObject(object):
if stat.S_ISLNK(self._stat_result[RESULT_MODE]):
return self.readlink(self.filename)
- def _compute_xattrs(self): # pragma: no cover
+ def _compute_xattrs(self): # pragma: no cover
if len(self._xattrs) == 0:
return ''
@@ -231,7 +231,7 @@ class FilesystemObject(object):
parts = [' %s=%s' % (k, quote(self._xattrs[k])) for k in self._xattrs]
return '\n' + '\n'.join(parts)
-
+
def format_time(self, secs, nsecs):
s = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(secs))
s += '.%09d' % nsecs
@@ -248,7 +248,7 @@ class FilesystemObject(object):
if stat.S_ISREG(self._stat_result[RESULT_MODE]):
with self.open_file(filename) as f:
while True:
- data = f.read(64*1024) # 64 KiB seems reasonable.
+ data = f.read(64*1024) # 64 KiB seems reasonable.
if not data:
break
checksummer.update(data)
@@ -260,7 +260,7 @@ class FilesystemObject(object):
key = key.lower()
key = '_'.join(key.split('-'))
return key
-
+
def __getitem__(self, key):
normalized = self._normalize_key(key)
if normalized in self._exclude:
@@ -275,11 +275,11 @@ class FilesystemObject(object):
raise KeyError(key)
return self.values.get(key, '')
- def isdir(self): # pragma: no cover
+ def isdir(self): # pragma: no cover
'''Is this a directory?'''
return stat.S_ISDIR(int(self['Mode'], 8))
- def get_xattrs(self, filename): # pragma: no cover
+ def get_xattrs(self, filename): # pragma: no cover
ret = _summain.llistxattr(filename)
if type(ret) is int:
# Some file types don't support xattr, e.g. named pipes on FreeBSD:
@@ -293,4 +293,3 @@ class FilesystemObject(object):
for name in names:
xattrs[name] = _summain.lgetxattr(filename, name)
return xattrs
-
diff --git a/summainlib_tests.py b/summainlib_tests.py
index e2a3c0f..d982aae 100644
--- a/summainlib_tests.py
+++ b/summainlib_tests.py
@@ -24,7 +24,7 @@ class FakeChecksummer(object):
def update(self, data):
pass
-
+
def hexdigest(self):
return 'abc'
@@ -39,13 +39,13 @@ class FakeOpenFile(object):
data = self.data[:amount]
self.data = self.data[len(data):]
return data
-
+
def close(self):
pass
def __enter__(self):
return self
-
+
def __exit__(self, a, b, c):
pass
@@ -67,16 +67,16 @@ class FilesystemObjectTests(unittest.TestCase):
def setUp(self):
self.st = {
- summainlib.RESULT_MTIME_SEC: 1262307723,
- summainlib.RESULT_MTIME_NSEC: 123456789,
- summainlib.RESULT_MODE: stat.S_IFREG | 0644,
- summainlib.RESULT_INO: 12765,
- summainlib.RESULT_DEV: 42,
- summainlib.RESULT_NLINK: 2,
- summainlib.RESULT_SIZE: 1,
- summainlib.RESULT_UID: 0,
- summainlib.RESULT_GID: 0
- }
+ summainlib.RESULT_MTIME_SEC: 1262307723,
+ summainlib.RESULT_MTIME_NSEC: 123456789,
+ summainlib.RESULT_MODE: stat.S_IFREG | 0644,
+ summainlib.RESULT_INO: 12765,
+ summainlib.RESULT_DEV: 42,
+ summainlib.RESULT_NLINK: 2,
+ summainlib.RESULT_SIZE: 1,
+ summainlib.RESULT_UID: 0,
+ summainlib.RESULT_GID: 0
+ }
self.nn = summainlib.NumberNormalizer()
self.pn = summainlib.SamePath()
@@ -85,31 +85,34 @@ class FilesystemObjectTests(unittest.TestCase):
def new(self, name, mode=None):
if mode is not None:
self.st[summainlib.RESULT_MODE] = mode
- return summainlib.FilesystemObject(name, self.nn, self.pn,
- self.exclude,
- stat_result=self.st,
- sha1=FakeChecksummer(),
- sha224=FakeChecksummer(),
- sha256=FakeChecksummer(),
- sha384=FakeChecksummer(),
- sha512=FakeChecksummer(),
- md5=FakeChecksummer(),
- open_file=FakeOpenFile(),
- readlink=FakeReadlink(self),
- xattrs={})
+ return summainlib.FilesystemObject(
+ name,
+ self.nn,
+ self.pn,
+ self.exclude,
+ stat_result=self.st,
+ sha1=FakeChecksummer(),
+ sha224=FakeChecksummer(),
+ sha256=FakeChecksummer(),
+ sha384=FakeChecksummer(),
+ sha512=FakeChecksummer(),
+ md5=FakeChecksummer(),
+ open_file=FakeOpenFile(),
+ readlink=FakeReadlink(self),
+ xattrs={})
def test_raises_keyerror_for_unknown_field(self):
self.assertRaises(KeyError, self.new('foo').__getitem__,
'UNKNOWNHASH')
-
+
def test_formats_simple_name_identically(self):
self.assertEqual(self.new('foo')['Name'], 'foo')
-
+
def test_formats_space_correctly(self):
self.assertEqual(self.new('foo bar')['Name'], 'foo%20bar')
-
+
def test_formats_mtime_correctly(self):
- self.assertEqual(self.new('foo')['Mtime'],
+ self.assertEqual(self.new('foo')['Mtime'],
'2010-01-01 01:02:03.123456789 +0000')
def test_formats_mode_for_regular_file_correctly(self):
@@ -180,36 +183,39 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase):
self.pn = summainlib.SamePath()
self.exclude = []
self.checksums = ['SHA1']
-
+
def reset(self):
self.dev += 1
self.nn.reset()
def new(self, name):
st = {
- summainlib.RESULT_INO: self.ino,
- summainlib.RESULT_DEV: self.dev,
- summainlib.RESULT_MTIME_SEC: 0,
- summainlib.RESULT_MTIME_NSEC: 0,
- summainlib.RESULT_MODE: stat.S_IFREG|0,
- summainlib.RESULT_NLINK: 1,
- summainlib.RESULT_SIZE: 0,
- summainlib.RESULT_UID: 0,
- summainlib.RESULT_GID: 0
- }
+ summainlib.RESULT_INO: self.ino,
+ summainlib.RESULT_DEV: self.dev,
+ summainlib.RESULT_MTIME_SEC: 0,
+ summainlib.RESULT_MTIME_NSEC: 0,
+ summainlib.RESULT_MODE: stat.S_IFREG | 0,
+ summainlib.RESULT_NLINK: 1,
+ summainlib.RESULT_SIZE: 0,
+ summainlib.RESULT_UID: 0,
+ summainlib.RESULT_GID: 0
+ }
self.ino += 1
- return summainlib.FilesystemObject(name, self.nn, self.pn,
- self.exclude,
- stat_result=st,
- sha1=FakeChecksummer(),
- sha224=FakeChecksummer(),
- sha256=FakeChecksummer(),
- sha384=FakeChecksummer(),
- sha512=FakeChecksummer(),
- md5=FakeChecksummer(),
- open_file=FakeOpenFile(),
- readlink=FakeReadlink(self),
- xattrs={})
+ return summainlib.FilesystemObject(
+ name,
+ self.nn,
+ self.pn,
+ self.exclude,
+ stat_result=st,
+ sha1=FakeChecksummer(),
+ sha224=FakeChecksummer(),
+ sha256=FakeChecksummer(),
+ sha384=FakeChecksummer(),
+ sha512=FakeChecksummer(),
+ md5=FakeChecksummer(),
+ open_file=FakeOpenFile(),
+ readlink=FakeReadlink(self),
+ xattrs={})
def test_inode_numbers_are_repeatable(self):
a1 = self.new('foo')
@@ -226,32 +232,31 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase):
self.assertEqual(a1['Ino'], b1['Ino'])
self.assertEqual(a2['Dev'], b2['Dev'])
self.assertEqual(a2['Ino'], b2['Ino'])
-
-
+
+
class NumberNormalizerTests(unittest.TestCase):
def setUp(self):
self.nn = summainlib.NumberNormalizer()
-
+
def test_returns_1_2_3_regardless_of_input_numbers(self):
self.assertEqual([self.nn.get_ino(i) for i in [10, 11, 12]],
[1, 2, 3])
-
+
def test_returns_1_1_1_when_input_number_is_repeated(self):
self.assertEqual([self.nn.get_ino(i) for i in [10, 10, 10]],
[1, 1, 1])
-
-
+
+
class PathNormalizerTests(unittest.TestCase):
def setUp(self):
self.pn = summainlib.PathNormalizer('secret')
-
+
def test_returns_different_paths_for_different_inputs(self):
- self.assertNotEqual(self.pn.normalize('/foo/bar'),
+ self.assertNotEqual(self.pn.normalize('/foo/bar'),
self.pn.normalize('/ping/pong'))
-
+
def test_returns_same_paths_for_same_input(self):
self.assertEqual(self.pn.normalize('/foo/bar'),
self.pn.normalize('/foo/bar'))
-