From db9fc04e3542ff3475f0caecbfe0409c0aa63f8c Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 29 Dec 2011 13:06:22 +0000 Subject: add C extension module for system calls Python doesn't provide Snarfed from Obnam. --- _summainmodule.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 _summainmodule.c diff --git a/_summainmodule.c b/_summainmodule.c new file mode 100644 index 0000000..4184ef0 --- /dev/null +++ b/_summainmodule.c @@ -0,0 +1,166 @@ +/* + * _summainmodule.c -- Python extensions for Summain + * + * Copyright (C) 2008, 2009 Lars Wirzenius + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + + +#include + + +#ifndef _XOPEN_SOURCE +#define _XOPEN_SOURCE 600 +#endif +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +#include +#include +#include +#include +#include + + +static PyObject * +lstat_wrapper(PyObject *self, PyObject *args) +{ + int ret; + const char *filename; + struct stat st = {0}; + + if (!PyArg_ParseTuple(args, "s", &filename)) + return NULL; + + ret = lstat(filename, &st); + if (ret == -1) + ret = errno; + return Py_BuildValue("iLLLLLLLLLLLLLLLL", + ret, + (long long) st.st_dev, + (long long) st.st_ino, + (long long) st.st_mode, + (long long) st.st_nlink, + (long long) st.st_uid, + (long long) st.st_gid, + (long long) st.st_rdev, + (long long) st.st_size, + (long long) st.st_blksize, + (long long) st.st_blocks, + (long long) st.st_atim.tv_sec, + (long long) st.st_atim.tv_nsec, + (long long) st.st_mtim.tv_sec, + (long long) st.st_mtim.tv_nsec, + (long long) st.st_ctim.tv_sec, + (long long) st.st_ctim.tv_nsec); +} + + +static PyObject * +llistxattr_wrapper(PyObject *self, PyObject *args) +{ + const char *filename; + size_t bufsize; + PyObject *o; + + if (!PyArg_ParseTuple(args, "s", &filename)) + return NULL; + + bufsize = 0; + o = NULL; + do { + bufsize += 1024; + char *buf = malloc(bufsize); + ssize_t n = llistxattr(filename, buf, bufsize); + + if (n >= 0) + o = Py_BuildValue("s#", buf, (int) n); + else if (n == -1 && errno != ERANGE) + o = Py_BuildValue("i", errno); + free(buf); + } while (o == NULL); + + return o; +} + + +static PyObject * +lgetxattr_wrapper(PyObject *self, PyObject *args) +{ + const char *filename; + const char *attrname; + size_t bufsize; + PyObject *o; + + if (!PyArg_ParseTuple(args, "ss", &filename, &attrname)) + return NULL; + + bufsize = 0; + o = NULL; + do { + bufsize += 1024; + char *buf = malloc(bufsize); + ssize_t n = lgetxattr(filename, attrname, buf, bufsize); + + if (n > 0) + o = Py_BuildValue("s#", buf, (int) n); + else if (n == -1 && errno != ERANGE) + o = Py_BuildValue("i", errno); + free(buf); + } while (o == NULL); + + return o; +} + + +static PyObject * +lsetxattr_wrapper(PyObject *self, PyObject *args) +{ + const char *filename; + const char *name; + const char *value; + int size; + int ret; + + if (!PyArg_ParseTuple(args, "sss#", &filename, &name, &value, &size)) + return NULL; + + ret = lsetxattr(filename, name, value, size, 0); + if (ret == -1) + ret = errno; + return Py_BuildValue("i", ret); +} + + +static PyMethodDef methods[] = { + {"lstat", lstat_wrapper, METH_VARARGS, + "lstat(2) wrapper; arg is filename, returns tuple."}, + {"llistxattr", llistxattr_wrapper, METH_VARARGS, + "llistxattr(2) wrapper; arg is filename, returns tuple."}, + {"lgetxattr", lgetxattr_wrapper, METH_VARARGS, + "lgetxattr(2) wrapper; arg is filename, returns tuple."}, + {"lsetxattr", lsetxattr_wrapper, METH_VARARGS, + "lsetxattr(2) wrapper; arg is filename, returns errno."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + +PyMODINIT_FUNC +init_summain(void) +{ + (void) Py_InitModule("_summain", methods); +} -- cgit v1.2.1 From 1f68a394826daabe18c5662ba60c583450cdb72d Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 29 Dec 2011 13:27:40 +0000 Subject: use nanosecond resolution for timestamps --- setup.py | 3 ++- summainlib.py | 57 ++++++++++++++++++++++++++++++++++++----------------- summainlib_tests.py | 48 +++++++++++++++++++++++--------------------- 3 files changed, 67 insertions(+), 41 deletions(-) diff --git a/setup.py b/setup.py index 32711b8..7853233 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # along with this program. If not, see . -from distutils.core import setup +from distutils.core import setup, Extension import summainlib @@ -27,6 +27,7 @@ setup(name='summain', url='http://liw.fi/summain/', py_modules=['summainlib'], scripts=['summain'], + ext_modules=[Extension('_summain', sources=['_summainmodule.c'])], data_files=[ ('share/man/man1', ['summain.1']), ], diff --git a/summainlib.py b/summainlib.py index 87d61ee..8eac369 100644 --- a/summainlib.py +++ b/summainlib.py @@ -24,6 +24,8 @@ import stat import time import urllib +import _summain + __version__ = '0.10' @@ -101,6 +103,25 @@ class SamePath(object): # pragma: no cover return path +RESULT_RET = 0 +RESULT_DEV = 1 +RESULT_INO = 2 +RESULT_MODE = 3 +RESULT_NLINK = 4 +RESULT_UID = 5 +RESULT_GID = 6 +RESULT_RDEV = 7 +RESULT_SIZE = 8 +RESULT_BLKSIZE = 9 +RESULT_BLOCKS = 10 +RESULT_ATIME_SEC = 11 +RESULT_ATIME_NSEC = 12 +RESULT_MTIME_SEC = 13 +RESULT_MTIME_NSEC = 14 +RESULT_CTIME_SEC = 15 +RESULT_CTIME_NSEC = 16 + + class FilesystemObject(object): '''An object in the file system. @@ -126,7 +147,7 @@ class FilesystemObject(object): self._sha256 = sha256 or hashlib.sha256() self._sha384 = sha384 or hashlib.sha384() self._sha512 = sha512 or hashlib.sha512() - self._stat_result = stat_result or os.lstat(filename) + self._stat_result = stat_result or _summain.lstat(filename) self.open_file = open_file or file self.readlink = readlink or os.readlink self.values = dict() @@ -135,35 +156,36 @@ class FilesystemObject(object): return urllib.quote(self._pn.normalize(self._filename)) def _compute_mtime(self): - return self.format_time(self._stat_result.st_mtime) + return self.format_time(self._stat_result[RESULT_MTIME_SEC], + self._stat_result[RESULT_MTIME_NSEC]) def _compute_mode(self): - return '%o' % self._stat_result.st_mode + return '%o' % self._stat_result[RESULT_MODE] def _compute_ino(self): - return '%d' % self._nn.get_ino(self._stat_result.st_ino) + return '%d' % self._nn.get_ino(self._stat_result[RESULT_INO]) def _compute_dev(self): - return '%d' % self._nn.get_dev(self._stat_result.st_dev) + return '%d' % self._nn.get_dev(self._stat_result[RESULT_DEV]) def _compute_nlink(self): - return '%d' % self._stat_result.st_nlink + return '%d' % self._stat_result[RESULT_NLINK] def _compute_size(self): - if not stat.S_ISDIR(self._stat_result.st_mode): - return '%d' % self._stat_result.st_size + if not stat.S_ISDIR(self._stat_result[RESULT_MODE]): + return '%d' % self._stat_result[RESULT_SIZE] def _compute_uid(self): - return '%d' % self._stat_result.st_uid + return '%d' % self._stat_result[RESULT_UID] def _compute_username(self): - return self.lookup_username(self._stat_result.st_uid) + return self.lookup_username(self._stat_result[RESULT_UID]) def _compute_gid(self): - return '%d' % self._stat_result.st_gid + return '%d' % self._stat_result[RESULT_GID] def _compute_group(self): - return self.lookup_group(self._stat_result.st_gid) + return self.lookup_group(self._stat_result[RESULT_GID]) def _compute_md5(self): return self.compute_checksum(self._filename, self._md5) @@ -184,13 +206,12 @@ class FilesystemObject(object): return self.compute_checksum(self._filename, self._sha512) def _compute_target(self): - if stat.S_ISLNK(self._stat_result.st_mode): + if stat.S_ISLNK(self._stat_result[RESULT_MODE]): return self.readlink(self._filename) - def format_time(self, timestamp): - s = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(timestamp)) - fract, whole = math.modf(timestamp) - s += ('%.6f' % fract)[1:] # Skip leading '.' + def format_time(self, secs, nsecs): + s = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(secs)) + s += ('%.6f' % (nsecs * 1e-9))[1:] # skip leading 0 s += ' +0000' return s @@ -201,7 +222,7 @@ class FilesystemObject(object): return grp.getgrgid(gid).gr_name def compute_checksum(self, filename, checksummer): - if stat.S_ISREG(self._stat_result.st_mode): + if stat.S_ISREG(self._stat_result[RESULT_MODE]): with self.open_file(filename) as f: while True: data = f.read(64*1024) # 64 KiB seems reasonable. diff --git a/summainlib_tests.py b/summainlib_tests.py index 4ec3da5..8480057 100644 --- a/summainlib_tests.py +++ b/summainlib_tests.py @@ -20,13 +20,6 @@ import unittest import summainlib -class FakeStatResult(object): - - def __init__(self, *args, **kwargs): - for name, value in kwargs.iteritems(): - setattr(self, name, value) - - class FakeChecksummer(object): def update(self, data): @@ -63,7 +56,7 @@ class FakeReadlink(object): self.parent = parent def __call__(self, filename): - if stat.S_ISLNK(self.parent.st.st_mode): + if stat.S_ISLNK(self.parent.st[summainlib.RESULT_MODE]): self.target = 'symlink' else: self.target = '' @@ -73,14 +66,17 @@ class FakeReadlink(object): class FilesystemObjectTests(unittest.TestCase): def setUp(self): - self.st = FakeStatResult(st_mtime=1262307723.123456789, - st_mode=stat.S_IFREG | 0644, - st_ino=12765, - st_dev=42, - st_nlink=2, - st_size=1, - st_uid=0, - st_gid=0) + self.st = { + summainlib.RESULT_MTIME_SEC: 1262307723, + summainlib.RESULT_MTIME_NSEC: 123456789, + summainlib.RESULT_MODE: stat.S_IFREG | 0644, + summainlib.RESULT_INO: 12765, + summainlib.RESULT_DEV: 42, + summainlib.RESULT_NLINK: 2, + summainlib.RESULT_SIZE: 1, + summainlib.RESULT_UID: 0, + summainlib.RESULT_GID: 0 + } self.nn = summainlib.NumberNormalizer() self.pn = summainlib.SamePath() @@ -88,7 +84,7 @@ class FilesystemObjectTests(unittest.TestCase): def new(self, name, mode=None): if mode is not None: - self.st.st_mode = mode + self.st[summainlib.RESULT_MODE] = mode return summainlib.FilesystemObject(name, self.nn, self.pn, self.exclude, stat_result=self.st, @@ -153,7 +149,7 @@ class FilesystemObjectTests(unittest.TestCase): self.assertEqual(self.new('foo')['SHA512'], 'abc') def test_formats_checksums_correctly_for_special_file(self): - self.st.st_mode = stat.S_IFDIR | 0755 + self.st[summainlib.RESULT_MODE] = stat.S_IFDIR | 0755 self.assertEqual(self.new('foo')['MD5'], '') self.assertEqual(self.new('foo')['SHA1'], '') self.assertEqual(self.new('foo')['SHA224'], '') @@ -162,7 +158,7 @@ class FilesystemObjectTests(unittest.TestCase): self.assertEqual(self.new('foo')['SHA512'], '') def test_formats_target_correctly_for_symlink(self): - self.st.st_mode = stat.S_IFLNK | 0777 + self.st[summainlib.RESULT_MODE] = stat.S_IFLNK | 0777 self.assertEqual(self.new('foo')['Target'], 'symlink') def test_formats_target_correctly_for_regular_file(self): @@ -189,9 +185,17 @@ class FilesystemObjectNormalizedNumbersTests(unittest.TestCase): self.nn.reset() def new(self, name): - st = FakeStatResult(st_ino=self.ino, st_dev=self.dev, st_mtime=0, - st_mode=stat.S_IFREG|0, st_nlink=1, st_size=0, - st_uid=0, st_gid=0) + st = { + summainlib.RESULT_INO: self.ino, + summainlib.RESULT_DEV: self.dev, + summainlib.RESULT_MTIME_SEC: 0, + summainlib.RESULT_MTIME_NSEC: 0, + summainlib.RESULT_MODE: stat.S_IFREG|0, + summainlib.RESULT_NLINK: 1, + summainlib.RESULT_SIZE: 0, + summainlib.RESULT_UID: 0, + summainlib.RESULT_GID: 0 + } self.ino += 1 return summainlib.FilesystemObject(name, self.nn, self.pn, self.exclude, -- cgit v1.2.1 From 0bb1c8aab4c7ca7ed66ab756d10131691a4f235e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 29 Dec 2011 13:32:35 +0000 Subject: produce nanosec timestamps without floating point Go away, stupid rounding error, away! --- summainlib.py | 2 +- summainlib_tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/summainlib.py b/summainlib.py index 8eac369..79e64e7 100644 --- a/summainlib.py +++ b/summainlib.py @@ -211,7 +211,7 @@ class FilesystemObject(object): def format_time(self, secs, nsecs): s = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(secs)) - s += ('%.6f' % (nsecs * 1e-9))[1:] # skip leading 0 + s += '.%09d' % nsecs s += ' +0000' return s diff --git a/summainlib_tests.py b/summainlib_tests.py index 8480057..f8bcc27 100644 --- a/summainlib_tests.py +++ b/summainlib_tests.py @@ -109,7 +109,7 @@ class FilesystemObjectTests(unittest.TestCase): def test_formats_mtime_correctly(self): self.assertEqual(self.new('foo')['Mtime'], - '2010-01-01 01:02:03.123457 +0000') + '2010-01-01 01:02:03.123456789 +0000') def test_formats_mode_for_regular_file_correctly(self): self.assertEqual(self.new('foo')['Mode'], '100644') -- cgit v1.2.1 From 0d00f4d37a1c59f70dbabce97a9a3bd44c85dd6e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Thu, 29 Dec 2011 13:47:47 +0000 Subject: update NEWS about new feature --- NEWS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS b/NEWS index 3396f6b..fc1be15 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,15 @@ NEWS file for summain ===================== +Version 0.11, released UNRELEASED +--------------------------------- + +* Now properly supports sub-second timestamps. Previously, the timestamps + were limited by the Python standard library's use of floating point + values for timestamps, which didn't give the full nanosecond precision. + Summain now uses a C extension to call the `lstat`(2) system call + directly, to get the full precision. + Version 0.10, released 2011-08-09 --------------------------------- -- cgit v1.2.1