# Copyright (C) 2010, 2011 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import grp import hashlib import os import pwd import stat import time import urllib version = '0.5' class NumberNormalizer(object): '''Normalize inode and device numbers. When we make two manifests of the same directory tree, but the tree may have been moved to another disk, the inode and device numbers may be different. This should not be a cause for concern, however. What is important is that if two names were hardlinked to the same file before, they still are, and if they weren't, they still aren't. To achieve this, we normalize the inode and device numbers. The input files are fed to the normalizer in a deterministic sequence, and the sequence defines the numbers we use. Thus, if the input files have inode numbers [42, 13, 105], we produce [1, 2, 3]. If one of the input numbers is repeated, that number is re-used. This is not a perfect solution. If the second manifest has a new file, it will throw off the entire remaining sequence, causing a big diff. But we'll live with that. ''' def __init__(self): self.reset() def get(self, input_number, numbers, next): if input_number in numbers: return numbers[input_number], next else: numbers[input_number] = next return numbers[input_number], next + 1 def get_ino(self, ino): output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino) return output def get_dev(self, dev): output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev) return output def reset(self): '''This is used by unit tests.''' self.ino_numbers = dict() self.next_ino = 1 self.dev_numbers = dict() self.next_dev = 1 class PathNormalizer(object): '''Normalize a filesystem path. For every input path, a new output path is given. The same output path is given every time the same input path is given. The output path has the same number of elements as the input path, and if /foo/bar results in /a/b, then /foo/yo results in /a/b, i.e., the tree structure is the same. ''' def __init__(self): self._counter = 0 self._dict = dict() self._dict[os.sep] = os.sep self._dict['.'] = '.' self._dict['..'] = '..' def split(self, path): if path == os.sep: return [os.sep] parts = path.split(os.sep) if parts: if parts[0] == '': parts[0] = os.sep if parts[-1] == '': parts[-1] = os.sep return parts def _base26(self, n): if n == 0: digits = [0] else: digits = [] while n > 0: digits.append(n % 26) n /= 26 letters = 'abcdefghijklmnopqrstuvwxyz' assert len(letters) == 26 return ''.join(letters[x] for x in reversed(digits)) def normalize_part(self, part): if part not in self._dict: self._dict[part] = self._base26(self._counter) self._counter += 1 return self._dict[part] def normalize(self, path): parts = self.split(path) normalized = [self.normalize_part(x) for x in parts] result = '' for x in normalized: if not result: result = x elif x == os.sep: result += x elif result.endswith(os.sep): result += x else: result += os.sep + x return result class SamePath(object): # pragma: no cover def normalize(self, path): return path class FilesystemObject(object): '''An object in the file system. Responsible for gathering information and formatting it for reporting. The optional arguments are intended for unit tests. ''' def __init__(self, filename, nn, pn, exclude, stat_result=None, sha1=None, open_file=None, readlink=None): stat_result = stat_result or os.lstat(filename) sha1 = sha1 or hashlib.sha1() self.open_file = open_file or file readlink = readlink or os.readlink self.values = dict() self['Name'] = pn.normalize(filename) self['Mtime'] = self.format_time(stat_result.st_mtime) self['Mode'] = '%o' % stat_result.st_mode self['Ino'] = '%d' % nn.get_ino(stat_result.st_ino) self['Dev'] = '%d' % nn.get_dev(stat_result.st_dev) self['Nlink'] = '%d' % stat_result.st_nlink if not stat.S_ISDIR(stat_result.st_mode): self['Size'] = '%d' % stat_result.st_size self['Uid'] = '%d' % stat_result.st_uid self['Username'] = self.lookup_username(stat_result.st_uid) self['Gid'] = '%d' % stat_result.st_gid self['Group'] = self.lookup_group(stat_result.st_gid) if stat.S_ISREG(stat_result.st_mode): self['Sha-1'] = self.compute_sha1(filename, sha1) if stat.S_ISLNK(stat_result.st_mode): self['Target'] = readlink(filename) def format_time(self, timestamp): return time.strftime('%Y-%m-%d %H:%M:%S +0000', time.gmtime(timestamp)) def lookup_username(self, uid): return pwd.getpwuid(uid).pw_name def lookup_group(self, gid): return grp.getgrgid(gid).gr_name def compute_sha1(self, filename, sha1): f = self.open_file(filename) while True: data = f.read(64*1024) # 64 KiB seems reasonable. if not data: break sha1.update(data) f.close() return sha1.hexdigest() def hook_name(self, value): return urllib.quote(value) def __setitem__(self, key, value): hook_name = 'hook_%s' % key.lower() if hasattr(self, hook_name): value = getattr(self, hook_name)(value) self.values[key] = value def __getitem__(self, key): return self.values.get(key, '') def _isdir(self): '''Is this a directory?''' return stat.S_ISDIR(int(self['Mode'], 8)) def relative_path(self, root): '''Return a path that is relative to root, if possible. If pathname does not start with root, then return it unmodified. ''' if root.endswith(os.sep): root2 = root else: root2 = root + os.sep pathname = self['Name'] if pathname.startswith(root2): return pathname[len(root2):] elif pathname == root and self._isdir(): return '.' else: return pathname def format(self, root=None): # pragma: no cover if root is None: name = self['Name'] else: name = self.relative_path(root) values = ([('Name', name)] + [(x, self[x]) for x in sorted(self.values.keys()) if x != 'Name']) return ''.join('%s: %s\n' % (k, v) for k, v in values)