From dae83a778f9b35f50f317bca60b3d078a3e5cfa3 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 9 Oct 2016 15:45:07 +0300 Subject: Make it possible to use bumper for releases --- setup.py | 4 +- summainlib.py | 294 ------------------------------------------------- summainlib/__init__.py | 292 ++++++++++++++++++++++++++++++++++++++++++++++++ summainlib/version.py | 2 + 4 files changed, 297 insertions(+), 295 deletions(-) delete mode 100644 summainlib.py create mode 100644 summainlib/__init__.py create mode 100644 summainlib/version.py diff --git a/setup.py b/setup.py index 5355c79..7853233 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,10 @@ from distutils.core import setup, Extension +import summainlib + setup(name='summain', - version='0.20', + version=summainlib.__version__, description='create file manifests with checksums', author='Lars Wirzenius', author_email='liw@liw.fi', diff --git a/summainlib.py b/summainlib.py deleted file mode 100644 index d5ea048..0000000 --- a/summainlib.py +++ /dev/null @@ -1,294 +0,0 @@ -# Copyright (C) 2010, 2011 Lars Wirzenius -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -import base64 -import grp -import hashlib -import hmac -import os -import pwd -import stat -import time -import urllib -import errno - -import _summain - - -__version__ = '0.20' - - -class NumberNormalizer(object): - - '''Normalize inode and device numbers. - - When we make two manifests of the same directory tree, but the - tree may have been moved to another disk, the inode and device - numbers may be different. This should not be a cause for concern, - however. What is important is that if two names were hardlinked - to the same file before, they still are, and if they weren't, - they still aren't. - - To achieve this, we normalize the inode and device numbers. - The input files are fed to the normalizer in a deterministic - sequence, and the sequence defines the numbers we use. Thus, - if the input files have inode numbers [42, 13, 105], we produce - [1, 2, 3]. If one of the input numbers is repeated, that number - is re-used. - - This is not a perfect solution. If the second manifest has a - new file, it will throw off the entire remaining sequence, causing - a big diff. But we'll live with that. - - ''' - - def __init__(self): - self.reset() - - def get(self, input_number, numbers, next_number): - if input_number in numbers: - return numbers[input_number], next_number - else: - numbers[input_number] = next_number - return numbers[input_number], next_number + 1 - - def get_ino(self, ino): - output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino) - return output - - def get_dev(self, dev): - output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev) - return output - - def reset(self): - '''This is used by unit tests.''' - self.ino_numbers = dict() - self.next_ino = 1 - - self.dev_numbers = dict() - self.next_dev = 1 - - -class PathNormalizer(object): - - '''Normalize a filesystem path. - - Paths are normalized by using SHA-1 on a secret plus the real path. - The checksum is the normalized path. - - ''' - - def __init__(self, secret): - self._secret = secret - - def normalize(self, path): - return hmac.new(self._secret, path).hexdigest() - - -class SamePath(object): # pragma: no cover - - def normalize(self, path): - return path - - -RESULT_RET = 0 -RESULT_DEV = 1 -RESULT_INO = 2 -RESULT_MODE = 3 -RESULT_NLINK = 4 -RESULT_UID = 5 -RESULT_GID = 6 -RESULT_RDEV = 7 -RESULT_SIZE = 8 -RESULT_BLKSIZE = 9 -RESULT_BLOCKS = 10 -RESULT_ATIME_SEC = 11 -RESULT_ATIME_NSEC = 12 -RESULT_MTIME_SEC = 13 -RESULT_MTIME_NSEC = 14 -RESULT_CTIME_SEC = 15 -RESULT_CTIME_NSEC = 16 - - -class FilesystemObject(object): - - '''An object in the file system. - - Responsible for gathering information and formatting it for - reporting. - - The optional arguments are intended for unit tests. - - ''' - - def __init__(self, filename, nn, pn, exclude, - stat_result=None, sha1=None, sha224=None, - sha256=None, sha384=None, sha512=None, - md5=None, open_file=None, readlink=None, - xattrs=None): - self.filename = filename - self.relative = None - self._exclude = set(self._normalize_key(k) for k in exclude) - self._pn = pn - self._nn = nn - self._md5 = md5 or hashlib.md5() - self._sha1 = sha1 or hashlib.sha1() - self._sha224 = sha224 or hashlib.sha224() - self._sha256 = sha256 or hashlib.sha256() - self._sha384 = sha384 or hashlib.sha384() - self._sha512 = sha512 or hashlib.sha512() - self._stat_result = stat_result or _summain.lstat(filename) - self._xattrs = (xattrs if xattrs is not None - else self.get_xattrs(filename)) - self.open_file = open_file or file - self.readlink = readlink or os.readlink - self.values = dict() - - def _compute_name(self): - if self.relative is None: - name = self.filename - else: - name = self.relative # pragma: no cover - return urllib.quote(self._pn.normalize(name)) - - def _compute_mtime(self): - return self.format_time(self._stat_result[RESULT_MTIME_SEC], - self._stat_result[RESULT_MTIME_NSEC]) - - def _compute_mode(self): - return '%o' % self._stat_result[RESULT_MODE] - - def _compute_ino(self): - return '%d' % self._nn.get_ino(self._stat_result[RESULT_INO]) - - def _compute_dev(self): - return '%d' % self._nn.get_dev(self._stat_result[RESULT_DEV]) - - def _compute_nlink(self): - return '%d' % self._stat_result[RESULT_NLINK] - - def _compute_size(self): - if not stat.S_ISDIR(self._stat_result[RESULT_MODE]): - return '%d' % self._stat_result[RESULT_SIZE] - - def _compute_uid(self): - return '%d' % self._stat_result[RESULT_UID] - - def _compute_username(self): - return self.lookup_username(self._stat_result[RESULT_UID]) - - def _compute_gid(self): - return '%d' % self._stat_result[RESULT_GID] - - def _compute_group(self): - return self.lookup_group(self._stat_result[RESULT_GID]) - - def _compute_md5(self): - return self.compute_checksum(self.filename, self._md5) - - def _compute_sha1(self): - return self.compute_checksum(self.filename, self._sha1) - - def _compute_sha224(self): - return self.compute_checksum(self.filename, self._sha224) - - def _compute_sha256(self): - return self.compute_checksum(self.filename, self._sha256) - - def _compute_sha384(self): - return self.compute_checksum(self.filename, self._sha384) - - def _compute_sha512(self): - return self.compute_checksum(self.filename, self._sha512) - - def _compute_target(self): - if stat.S_ISLNK(self._stat_result[RESULT_MODE]): - return self.readlink(self.filename) - - def _compute_xattrs(self): # pragma: no cover - if len(self._xattrs) == 0: - return '' - - def quote(s): - if s.isalnum(): - return '"%s"' % s - else: - return '0s' + base64.urlsafe_b64encode(s) - - parts = [' %s=%s' % (k, quote(self._xattrs[k])) for k in self._xattrs] - return '\n' + '\n'.join(parts) - - def format_time(self, secs, nsecs): - s = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(secs)) - s += '.%09d' % nsecs - s += ' +0000' - return s - - def lookup_username(self, uid): - return pwd.getpwuid(uid).pw_name - - def lookup_group(self, gid): - return grp.getgrgid(gid).gr_name - - def compute_checksum(self, filename, checksummer): - if stat.S_ISREG(self._stat_result[RESULT_MODE]): - with self.open_file(filename) as f: - while True: - data = f.read(64 * 1024) # 64 KiB seems reasonable. - if not data: - break - checksummer.update(data) - return checksummer.hexdigest() - else: - return '' - - def _normalize_key(self, key): - key = key.lower() - key = '_'.join(key.split('-')) - return key - - def __getitem__(self, key): - normalized = self._normalize_key(key) - if normalized in self._exclude: - return '' - if key not in self.values: - method = '_compute_%s' % self._normalize_key(key) - if hasattr(self, method): - value = getattr(self, method)() - if value is not None: - self.values[key] = value - else: - raise KeyError(key) - return self.values.get(key, '') - - def isdir(self): # pragma: no cover - '''Is this a directory?''' - return stat.S_ISDIR(int(self['Mode'], 8)) - - def get_xattrs(self, filename): # pragma: no cover - ret = _summain.llistxattr(filename) - if type(ret) is int: - # Some file types don't support xattr, e.g. named pipes on FreeBSD: - if ret == errno.EOPNOTSUPP: - return {} - raise OSError((ret, os.strerror(ret), filename)) - - names = [s for s in ret.split('\0') if s] - - xattrs = {} - for name in names: - xattrs[name] = _summain.lgetxattr(filename, name) - return xattrs diff --git a/summainlib/__init__.py b/summainlib/__init__.py new file mode 100644 index 0000000..d45942b --- /dev/null +++ b/summainlib/__init__.py @@ -0,0 +1,292 @@ +# Copyright (C) 2010, 2011 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import base64 +import grp +import hashlib +import hmac +import os +import pwd +import stat +import time +import urllib +import errno + +import _summain +from .version import __version__, __version_info__ + + +class NumberNormalizer(object): + + '''Normalize inode and device numbers. + + When we make two manifests of the same directory tree, but the + tree may have been moved to another disk, the inode and device + numbers may be different. This should not be a cause for concern, + however. What is important is that if two names were hardlinked + to the same file before, they still are, and if they weren't, + they still aren't. + + To achieve this, we normalize the inode and device numbers. + The input files are fed to the normalizer in a deterministic + sequence, and the sequence defines the numbers we use. Thus, + if the input files have inode numbers [42, 13, 105], we produce + [1, 2, 3]. If one of the input numbers is repeated, that number + is re-used. + + This is not a perfect solution. If the second manifest has a + new file, it will throw off the entire remaining sequence, causing + a big diff. But we'll live with that. + + ''' + + def __init__(self): + self.reset() + + def get(self, input_number, numbers, next_number): + if input_number in numbers: + return numbers[input_number], next_number + else: + numbers[input_number] = next_number + return numbers[input_number], next_number + 1 + + def get_ino(self, ino): + output, self.next_ino = self.get(ino, self.ino_numbers, self.next_ino) + return output + + def get_dev(self, dev): + output, self.next_dev = self.get(dev, self.dev_numbers, self.next_dev) + return output + + def reset(self): + '''This is used by unit tests.''' + self.ino_numbers = dict() + self.next_ino = 1 + + self.dev_numbers = dict() + self.next_dev = 1 + + +class PathNormalizer(object): + + '''Normalize a filesystem path. + + Paths are normalized by using SHA-1 on a secret plus the real path. + The checksum is the normalized path. + + ''' + + def __init__(self, secret): + self._secret = secret + + def normalize(self, path): + return hmac.new(self._secret, path).hexdigest() + + +class SamePath(object): # pragma: no cover + + def normalize(self, path): + return path + + +RESULT_RET = 0 +RESULT_DEV = 1 +RESULT_INO = 2 +RESULT_MODE = 3 +RESULT_NLINK = 4 +RESULT_UID = 5 +RESULT_GID = 6 +RESULT_RDEV = 7 +RESULT_SIZE = 8 +RESULT_BLKSIZE = 9 +RESULT_BLOCKS = 10 +RESULT_ATIME_SEC = 11 +RESULT_ATIME_NSEC = 12 +RESULT_MTIME_SEC = 13 +RESULT_MTIME_NSEC = 14 +RESULT_CTIME_SEC = 15 +RESULT_CTIME_NSEC = 16 + + +class FilesystemObject(object): + + '''An object in the file system. + + Responsible for gathering information and formatting it for + reporting. + + The optional arguments are intended for unit tests. + + ''' + + def __init__(self, filename, nn, pn, exclude, + stat_result=None, sha1=None, sha224=None, + sha256=None, sha384=None, sha512=None, + md5=None, open_file=None, readlink=None, + xattrs=None): + self.filename = filename + self.relative = None + self._exclude = set(self._normalize_key(k) for k in exclude) + self._pn = pn + self._nn = nn + self._md5 = md5 or hashlib.md5() + self._sha1 = sha1 or hashlib.sha1() + self._sha224 = sha224 or hashlib.sha224() + self._sha256 = sha256 or hashlib.sha256() + self._sha384 = sha384 or hashlib.sha384() + self._sha512 = sha512 or hashlib.sha512() + self._stat_result = stat_result or _summain.lstat(filename) + self._xattrs = (xattrs if xattrs is not None + else self.get_xattrs(filename)) + self.open_file = open_file or file + self.readlink = readlink or os.readlink + self.values = dict() + + def _compute_name(self): + if self.relative is None: + name = self.filename + else: + name = self.relative # pragma: no cover + return urllib.quote(self._pn.normalize(name)) + + def _compute_mtime(self): + return self.format_time(self._stat_result[RESULT_MTIME_SEC], + self._stat_result[RESULT_MTIME_NSEC]) + + def _compute_mode(self): + return '%o' % self._stat_result[RESULT_MODE] + + def _compute_ino(self): + return '%d' % self._nn.get_ino(self._stat_result[RESULT_INO]) + + def _compute_dev(self): + return '%d' % self._nn.get_dev(self._stat_result[RESULT_DEV]) + + def _compute_nlink(self): + return '%d' % self._stat_result[RESULT_NLINK] + + def _compute_size(self): + if not stat.S_ISDIR(self._stat_result[RESULT_MODE]): + return '%d' % self._stat_result[RESULT_SIZE] + + def _compute_uid(self): + return '%d' % self._stat_result[RESULT_UID] + + def _compute_username(self): + return self.lookup_username(self._stat_result[RESULT_UID]) + + def _compute_gid(self): + return '%d' % self._stat_result[RESULT_GID] + + def _compute_group(self): + return self.lookup_group(self._stat_result[RESULT_GID]) + + def _compute_md5(self): + return self.compute_checksum(self.filename, self._md5) + + def _compute_sha1(self): + return self.compute_checksum(self.filename, self._sha1) + + def _compute_sha224(self): + return self.compute_checksum(self.filename, self._sha224) + + def _compute_sha256(self): + return self.compute_checksum(self.filename, self._sha256) + + def _compute_sha384(self): + return self.compute_checksum(self.filename, self._sha384) + + def _compute_sha512(self): + return self.compute_checksum(self.filename, self._sha512) + + def _compute_target(self): + if stat.S_ISLNK(self._stat_result[RESULT_MODE]): + return self.readlink(self.filename) + + def _compute_xattrs(self): # pragma: no cover + if len(self._xattrs) == 0: + return '' + + def quote(s): + if s.isalnum(): + return '"%s"' % s + else: + return '0s' + base64.urlsafe_b64encode(s) + + parts = [' %s=%s' % (k, quote(self._xattrs[k])) for k in self._xattrs] + return '\n' + '\n'.join(parts) + + def format_time(self, secs, nsecs): + s = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(secs)) + s += '.%09d' % nsecs + s += ' +0000' + return s + + def lookup_username(self, uid): + return pwd.getpwuid(uid).pw_name + + def lookup_group(self, gid): + return grp.getgrgid(gid).gr_name + + def compute_checksum(self, filename, checksummer): + if stat.S_ISREG(self._stat_result[RESULT_MODE]): + with self.open_file(filename) as f: + while True: + data = f.read(64 * 1024) # 64 KiB seems reasonable. + if not data: + break + checksummer.update(data) + return checksummer.hexdigest() + else: + return '' + + def _normalize_key(self, key): + key = key.lower() + key = '_'.join(key.split('-')) + return key + + def __getitem__(self, key): + normalized = self._normalize_key(key) + if normalized in self._exclude: + return '' + if key not in self.values: + method = '_compute_%s' % self._normalize_key(key) + if hasattr(self, method): + value = getattr(self, method)() + if value is not None: + self.values[key] = value + else: + raise KeyError(key) + return self.values.get(key, '') + + def isdir(self): # pragma: no cover + '''Is this a directory?''' + return stat.S_ISDIR(int(self['Mode'], 8)) + + def get_xattrs(self, filename): # pragma: no cover + ret = _summain.llistxattr(filename) + if type(ret) is int: + # Some file types don't support xattr, e.g. named pipes on FreeBSD: + if ret == errno.EOPNOTSUPP: + return {} + raise OSError((ret, os.strerror(ret), filename)) + + names = [s for s in ret.split('\0') if s] + + xattrs = {} + for name in names: + xattrs[name] = _summain.lgetxattr(filename, name) + return xattrs diff --git a/summainlib/version.py b/summainlib/version.py new file mode 100644 index 0000000..addb965 --- /dev/null +++ b/summainlib/version.py @@ -0,0 +1,2 @@ +__version__ = '0.20+git' +__version_info__ = (0, 20, '+git') -- cgit v1.2.1