diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-01-01 18:53:55 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-01-01 18:53:55 +0200 |
commit | 29183aa2d6b48a93ee380d37f10773ac51308305 (patch) | |
tree | 915214f575092b0159fe8566c01c7367d8d32ffe /summainlib.py | |
parent | c1a0664e2fe5210293ae3fb13151ce1f32855339 (diff) | |
download | summain-29183aa2d6b48a93ee380d37f10773ac51308305.tar.gz |
Normalizer inode and device numbers. This is necessary so that
if a directory tree is moved to a new disk, the manifest doesn't
change.
Diffstat (limited to 'summainlib.py')
-rw-r--r-- | summainlib.py | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/summainlib.py b/summainlib.py index 29ae21e..31b42a7 100644 --- a/summainlib.py +++ b/summainlib.py @@ -23,6 +23,57 @@ import time import urllib +class NumberNormalizer(object): + + '''Normalize inode and device numbers. + + When we make two manifests of the same directory tree, but the + tree may have been moved to another disk, the inode and device + numbers may be different. This should not be a cause for concern, + however. What is important is that if two names were hardlinked + to the same file before, they still are, and if they weren't, + they still aren't. + + To achieve this, we normalize the inode and device numbers. + The input files are fed to the normalizer in a deterministic + sequence, and the sequence defines the numbers we use. Thus, + if the input files have inode numbers [42, 13, 105], we produce + [1, 2, 3]. If one of the input numbers is repeated, that number + is re-used. + + This is not a perfect solution. If the second manifest has a + new file, it will throw off the entire remaining sequence, causing + a big diff. But we'll live with that. + + ''' + + def __init__(self): + self.reset() + + def get(self, input_number, numbers, next): + if input_number in numbers: + return numbers[input_number], next + else: + numbers[input_number] = next + return numbers[input_number], next + 1 + + def get_ino(self, ino): + output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino) + return output + + def get_dev(self, dev): + output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev) + return output + + def reset(self): + '''This is used by unit tests.''' + self.ino_numbers = dict() + self.next_ino = 1 + + self.dev_numbers = dict() + self.next_dev = 1 + + class FilesystemObject(object): '''An object in the file system. @@ -34,7 +85,7 @@ class FilesystemObject(object): ''' - def __init__(self, filename, stat_result=None, sha1=None, + def __init__(self, filename, normalizer, stat_result=None, sha1=None, open_file=None, readlink=None): stat_result = stat_result or os.lstat(filename) sha1 = sha1 or hashlib.sha1() @@ -45,8 +96,8 @@ class FilesystemObject(object): self['Name'] = filename self['Mtime'] = self.format_time(stat_result.st_mtime) self['Mode'] = '%o' % stat_result.st_mode - self['Ino'] = '%d' % stat_result.st_ino - self['Dev'] = '%d' % stat_result.st_dev + self['Ino'] = '%d' % normalizer.get_ino(stat_result.st_ino) + self['Dev'] = '%d' % normalizer.get_dev(stat_result.st_dev) self['Nlink'] = '%d' % stat_result.st_nlink self['Size'] = '%d' % stat_result.st_size self['Uid'] = '%d' % stat_result.st_uid |