diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-01-01 18:53:55 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-01-01 18:53:55 +0200 |
commit | 29183aa2d6b48a93ee380d37f10773ac51308305 (patch) | |
tree | 915214f575092b0159fe8566c01c7367d8d32ffe | |
parent | c1a0664e2fe5210293ae3fb13151ce1f32855339 (diff) | |
download | summain-29183aa2d6b48a93ee380d37f10773ac51308305.tar.gz |
Normalizer inode and device numbers. This is necessary so that
if a directory tree is moved to a new disk, the manifest doesn't
change.
-rw-r--r-- | summainlib.py | 57 | ||||
-rw-r--r-- | summainlib_tests.py | 57 |
2 files changed, 108 insertions, 6 deletions
diff --git a/summainlib.py b/summainlib.py index 29ae21e..31b42a7 100644 --- a/summainlib.py +++ b/summainlib.py @@ -23,6 +23,57 @@ import time import urllib +class NumberNormalizer(object): + + '''Normalize inode and device numbers. + + When we make two manifests of the same directory tree, but the + tree may have been moved to another disk, the inode and device + numbers may be different. This should not be a cause for concern, + however. What is important is that if two names were hardlinked + to the same file before, they still are, and if they weren't, + they still aren't. + + To achieve this, we normalize the inode and device numbers. + The input files are fed to the normalizer in a deterministic + sequence, and the sequence defines the numbers we use. Thus, + if the input files have inode numbers [42, 13, 105], we produce + [1, 2, 3]. If one of the input numbers is repeated, that number + is re-used. + + This is not a perfect solution. If the second manifest has a + new file, it will throw off the entire remaining sequence, causing + a big diff. But we'll live with that. + + ''' + + def __init__(self): + self.reset() + + def get(self, input_number, numbers, next): + if input_number in numbers: + return numbers[input_number], next + else: + numbers[input_number] = next + return numbers[input_number], next + 1 + + def get_ino(self, ino): + output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino) + return output + + def get_dev(self, dev): + output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev) + return output + + def reset(self): + '''This is used by unit tests.''' + self.ino_numbers = dict() + self.next_ino = 1 + + self.dev_numbers = dict() + self.next_dev = 1 + + class FilesystemObject(object): '''An object in the file system. @@ -34,7 +85,7 @@ class FilesystemObject(object): ''' - def __init__(self, filename, stat_result=None, sha1=None, + def __init__(self, filename, normalizer, stat_result=None, sha1=None, open_file=None, readlink=None): stat_result = stat_result or os.lstat(filename) sha1 = sha1 or hashlib.sha1() @@ -45,8 +96,8 @@ class FilesystemObject(object): self['Name'] = filename self['Mtime'] = self.format_time(stat_result.st_mtime) self['Mode'] = '%o' % stat_result.st_mode - self['Ino'] = '%d' % stat_result.st_ino - self['Dev'] = '%d' % stat_result.st_dev + self['Ino'] = '%d' % normalizer.get_ino(stat_result.st_ino) + self['Dev'] = '%d' % normalizer.get_dev(stat_result.st_dev) self['Nlink'] = '%d' % stat_result.st_nlink self['Size'] = '%d' % stat_result.st_size self['Uid'] = '%d' % stat_result.st_uid diff --git a/summainlib_tests.py b/summainlib_tests.py index 8d07dca..8068b53 100644 --- a/summainlib_tests.py +++ b/summainlib_tests.py @@ -76,8 +76,11 @@ class FilesystemObjectTests(unittest.TestCase): st_uid=0, st_gid=0) + self.nn = summainlib.NumberNormalizer() + def new(self, name): - return summainlib.FilesystemObject(name, stat_result=self.st, + return summainlib.FilesystemObject(name, self.nn, + stat_result=self.st, sha1=FakeSha1(), open_file=FakeOpenFile(), readlink=FakeReadlink(self)) @@ -96,10 +99,12 @@ class FilesystemObjectTests(unittest.TestCase): self.assertEqual(self.new('foo')['Mode'], '100644') def test_formats_inode_number_correctly(self): - self.assertEqual(self.new('foo')['Ino'], '12765') + # Note: normalization makes the result be 1. + self.assertEqual(self.new('foo')['Ino'], '1') def test_formats_device_number_correctly(self): - self.assertEqual(self.new('foo')['Dev'], '42') + # Note: normalization makes the result be 1. + self.assertEqual(self.new('foo')['Dev'], '1') def test_formats_link_count_correctly(self): self.assertEqual(self.new('foo')['Nlink'], '2') @@ -133,3 +138,49 @@ class FilesystemObjectTests(unittest.TestCase): def test_formats_target_correctly_for_regular_file(self): self.assertEqual(self.new('foo')['Target'], '') + +class FilesystemObjectNormalizedNumbersTests(unittest.TestCase): + + def setUp(self): + self.ino = 0 + self.dev = 0 + self.nn = summainlib.NumberNormalizer() + + def reset(self): + self.dev += 1 + self.nn.reset() + + def new(self, name): + st = FakeStatResult(st_ino=self.ino, st_dev=self.dev, st_mtime=0, + st_mode=stat.S_IFREG|0, st_nlink=1, st_size=0, + st_uid=0, st_gid=0) + self.ino += 1 + return summainlib.FilesystemObject(name, self.nn, stat_result=st, + sha1=FakeSha1(), + open_file=FakeOpenFile(), + readlink=FakeReadlink(self)) + + def test_inode_numbers_are_repeatable(self): + a1 = self.new('foo') + a2 = self.new('bar') + self.reset() + b1 = self.new('foo') + b2 = self.new('bar') + self.assertEqual(a1['Dev'], b1['Dev']) + self.assertEqual(a1['Ino'], b1['Ino']) + self.assertEqual(a2['Dev'], b2['Dev']) + self.assertEqual(a2['Ino'], b2['Ino']) + + +class NumberNormalizerTests(unittest.TestCase): + + def setUp(self): + self.nn = summainlib.NumberNormalizer() + + def test_returns_1_2_3_regardless_of_input_numbers(self): + self.assertEqual([self.nn.get_ino(i) for i in [10, 11, 12]], + [1, 2, 3]) + + def test_returns_1_1_1_when_input_number_is_repeated(self): + self.assertEqual([self.nn.get_ino(i) for i in [10, 10, 10]], + [1, 1, 1]) |