summaryrefslogtreecommitdiff
path: root/summainlib.py
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-01-01 18:53:55 +0200
committerLars Wirzenius <liw@liw.fi>2010-01-01 18:53:55 +0200
commit29183aa2d6b48a93ee380d37f10773ac51308305 (patch)
tree915214f575092b0159fe8566c01c7367d8d32ffe /summainlib.py
parentc1a0664e2fe5210293ae3fb13151ce1f32855339 (diff)
downloadsummain-29183aa2d6b48a93ee380d37f10773ac51308305.tar.gz
Normalizer inode and device numbers. This is necessary so that
if a directory tree is moved to a new disk, the manifest doesn't change.
Diffstat (limited to 'summainlib.py')
-rw-r--r--summainlib.py57
1 files changed, 54 insertions, 3 deletions
diff --git a/summainlib.py b/summainlib.py
index 29ae21e..31b42a7 100644
--- a/summainlib.py
+++ b/summainlib.py
@@ -23,6 +23,57 @@ import time
import urllib
+class NumberNormalizer(object):
+
+ '''Normalize inode and device numbers.
+
+ When we make two manifests of the same directory tree, but the
+ tree may have been moved to another disk, the inode and device
+ numbers may be different. This should not be a cause for concern,
+ however. What is important is that if two names were hardlinked
+ to the same file before, they still are, and if they weren't,
+ they still aren't.
+
+ To achieve this, we normalize the inode and device numbers.
+ The input files are fed to the normalizer in a deterministic
+ sequence, and the sequence defines the numbers we use. Thus,
+ if the input files have inode numbers [42, 13, 105], we produce
+ [1, 2, 3]. If one of the input numbers is repeated, that number
+ is re-used.
+
+ This is not a perfect solution. If the second manifest has a
+ new file, it will throw off the entire remaining sequence, causing
+ a big diff. But we'll live with that.
+
+ '''
+
+ def __init__(self):
+ self.reset()
+
+ def get(self, input_number, numbers, next):
+ if input_number in numbers:
+ return numbers[input_number], next
+ else:
+ numbers[input_number] = next
+ return numbers[input_number], next + 1
+
+ def get_ino(self, ino):
+ output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino)
+ return output
+
+ def get_dev(self, dev):
+ output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev)
+ return output
+
+ def reset(self):
+ '''This is used by unit tests.'''
+ self.ino_numbers = dict()
+ self.next_ino = 1
+
+ self.dev_numbers = dict()
+ self.next_dev = 1
+
+
class FilesystemObject(object):
'''An object in the file system.
@@ -34,7 +85,7 @@ class FilesystemObject(object):
'''
- def __init__(self, filename, stat_result=None, sha1=None,
+ def __init__(self, filename, normalizer, stat_result=None, sha1=None,
open_file=None, readlink=None):
stat_result = stat_result or os.lstat(filename)
sha1 = sha1 or hashlib.sha1()
@@ -45,8 +96,8 @@ class FilesystemObject(object):
self['Name'] = filename
self['Mtime'] = self.format_time(stat_result.st_mtime)
self['Mode'] = '%o' % stat_result.st_mode
- self['Ino'] = '%d' % stat_result.st_ino
- self['Dev'] = '%d' % stat_result.st_dev
+ self['Ino'] = '%d' % normalizer.get_ino(stat_result.st_ino)
+ self['Dev'] = '%d' % normalizer.get_dev(stat_result.st_dev)
self['Nlink'] = '%d' % stat_result.st_nlink
self['Size'] = '%d' % stat_result.st_size
self['Uid'] = '%d' % stat_result.st_uid