summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-01-01 18:53:55 +0200
committerLars Wirzenius <liw@liw.fi>2010-01-01 18:53:55 +0200
commit29183aa2d6b48a93ee380d37f10773ac51308305 (patch)
tree915214f575092b0159fe8566c01c7367d8d32ffe
parentc1a0664e2fe5210293ae3fb13151ce1f32855339 (diff)
downloadsummain-29183aa2d6b48a93ee380d37f10773ac51308305.tar.gz
Normalizer inode and device numbers. This is necessary so that
if a directory tree is moved to a new disk, the manifest doesn't change.
-rw-r--r--summainlib.py57
-rw-r--r--summainlib_tests.py57
2 files changed, 108 insertions, 6 deletions
diff --git a/summainlib.py b/summainlib.py
index 29ae21e..31b42a7 100644
--- a/summainlib.py
+++ b/summainlib.py
@@ -23,6 +23,57 @@ import time
import urllib
+class NumberNormalizer(object):
+
+ '''Normalize inode and device numbers.
+
+ When we make two manifests of the same directory tree, but the
+ tree may have been moved to another disk, the inode and device
+ numbers may be different. This should not be a cause for concern,
+ however. What is important is that if two names were hardlinked
+ to the same file before, they still are, and if they weren't,
+ they still aren't.
+
+ To achieve this, we normalize the inode and device numbers.
+ The input files are fed to the normalizer in a deterministic
+ sequence, and the sequence defines the numbers we use. Thus,
+ if the input files have inode numbers [42, 13, 105], we produce
+ [1, 2, 3]. If one of the input numbers is repeated, that number
+ is re-used.
+
+ This is not a perfect solution. If the second manifest has a
+ new file, it will throw off the entire remaining sequence, causing
+ a big diff. But we'll live with that.
+
+ '''
+
+ def __init__(self):
+ self.reset()
+
+ def get(self, input_number, numbers, next):
+ if input_number in numbers:
+ return numbers[input_number], next
+ else:
+ numbers[input_number] = next
+ return numbers[input_number], next + 1
+
+ def get_ino(self, ino):
+ output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino)
+ return output
+
+ def get_dev(self, dev):
+ output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev)
+ return output
+
+ def reset(self):
+ '''This is used by unit tests.'''
+ self.ino_numbers = dict()
+ self.next_ino = 1
+
+ self.dev_numbers = dict()
+ self.next_dev = 1
+
+
class FilesystemObject(object):
'''An object in the file system.
@@ -34,7 +85,7 @@ class FilesystemObject(object):
'''
- def __init__(self, filename, stat_result=None, sha1=None,
+ def __init__(self, filename, normalizer, stat_result=None, sha1=None,
open_file=None, readlink=None):
stat_result = stat_result or os.lstat(filename)
sha1 = sha1 or hashlib.sha1()
@@ -45,8 +96,8 @@ class FilesystemObject(object):
self['Name'] = filename
self['Mtime'] = self.format_time(stat_result.st_mtime)
self['Mode'] = '%o' % stat_result.st_mode
- self['Ino'] = '%d' % stat_result.st_ino
- self['Dev'] = '%d' % stat_result.st_dev
+ self['Ino'] = '%d' % normalizer.get_ino(stat_result.st_ino)
+ self['Dev'] = '%d' % normalizer.get_dev(stat_result.st_dev)
self['Nlink'] = '%d' % stat_result.st_nlink
self['Size'] = '%d' % stat_result.st_size
self['Uid'] = '%d' % stat_result.st_uid
diff --git a/summainlib_tests.py b/summainlib_tests.py
index 8d07dca..8068b53 100644
--- a/summainlib_tests.py
+++ b/summainlib_tests.py
@@ -76,8 +76,11 @@ class FilesystemObjectTests(unittest.TestCase):
st_uid=0,
st_gid=0)
+ self.nn = summainlib.NumberNormalizer()
+
def new(self, name):
- return summainlib.FilesystemObject(name, stat_result=self.st,
+ return summainlib.FilesystemObject(name, self.nn,
+ stat_result=self.st,
sha1=FakeSha1(),
open_file=FakeOpenFile(),
readlink=FakeReadlink(self))
@@ -96,10 +99,12 @@ class FilesystemObjectTests(unittest.TestCase):
self.assertEqual(self.new('foo')['Mode'], '100644')
def test_formats_inode_number_correctly(self):
- self.assertEqual(self.new('foo')['Ino'], '12765')
+ # Note: normalization makes the result be 1.
+ self.assertEqual(self.new('foo')['Ino'], '1')
def test_formats_device_number_correctly(self):
- self.assertEqual(self.new('foo')['Dev'], '42')
+ # Note: normalization makes the result be 1.
+ self.assertEqual(self.new('foo')['Dev'], '1')
def test_formats_link_count_correctly(self):
self.assertEqual(self.new('foo')['Nlink'], '2')
@@ -133,3 +138,49 @@ class FilesystemObjectTests(unittest.TestCase):
def test_formats_target_correctly_for_regular_file(self):
self.assertEqual(self.new('foo')['Target'], '')
+
+class FilesystemObjectNormalizedNumbersTests(unittest.TestCase):
+
+ def setUp(self):
+ self.ino = 0
+ self.dev = 0
+ self.nn = summainlib.NumberNormalizer()
+
+ def reset(self):
+ self.dev += 1
+ self.nn.reset()
+
+ def new(self, name):
+ st = FakeStatResult(st_ino=self.ino, st_dev=self.dev, st_mtime=0,
+ st_mode=stat.S_IFREG|0, st_nlink=1, st_size=0,
+ st_uid=0, st_gid=0)
+ self.ino += 1
+ return summainlib.FilesystemObject(name, self.nn, stat_result=st,
+ sha1=FakeSha1(),
+ open_file=FakeOpenFile(),
+ readlink=FakeReadlink(self))
+
+ def test_inode_numbers_are_repeatable(self):
+ a1 = self.new('foo')
+ a2 = self.new('bar')
+ self.reset()
+ b1 = self.new('foo')
+ b2 = self.new('bar')
+ self.assertEqual(a1['Dev'], b1['Dev'])
+ self.assertEqual(a1['Ino'], b1['Ino'])
+ self.assertEqual(a2['Dev'], b2['Dev'])
+ self.assertEqual(a2['Ino'], b2['Ino'])
+
+
+class NumberNormalizerTests(unittest.TestCase):
+
+ def setUp(self):
+ self.nn = summainlib.NumberNormalizer()
+
+ def test_returns_1_2_3_regardless_of_input_numbers(self):
+ self.assertEqual([self.nn.get_ino(i) for i in [10, 11, 12]],
+ [1, 2, 3])
+
+ def test_returns_1_1_1_when_input_number_is_repeated(self):
+ self.assertEqual([self.nn.get_ino(i) for i in [10, 10, 10]],
+ [1, 1, 1])