summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2015-10-24 14:32:01 +0300
committerLars Wirzenius <liw@liw.fi>2015-10-24 16:25:17 +0300
commit28fbb8fab29b4488aa03ec95c18a80b97010f278 (patch)
tree6fa73c6a8ec91deb8b34666313d5b7ed857b14f0
parentfafe2b5078641c9f6a637bdaa7521d2ac0df130e (diff)
downloadobnam-28fbb8fab29b4488aa03ec95c18a80b97010f278.tar.gz
Reimplement scan_tree without recursion
Previously, scan_tree would, when encountering a very deep directory tree, crash due to Python's maximal stack depth limit. To avoid that, avoid recursion and use an explicit list of unprocessed items.
-rw-r--r--NEWS4
-rw-r--r--obnamlib/vfs.py74
2 files changed, 52 insertions, 26 deletions
diff --git a/NEWS b/NEWS
index e4886325..8bdf9a95 100644
--- a/NEWS
+++ b/NEWS
@@ -30,6 +30,10 @@ Version 1.18, released UNRELEASED
* Henri Sivonen improved the compression code to not compress if the
result would be larger.
+* Lars Wirzenius changed the `scan_tree` code to not be recursive, to
+ avoid problems with directory trees that are deeper than Python's
+ call stack limit allows.
+
Version 1.17, released 2015-09-12
---------------------------------
diff --git a/obnamlib/vfs.py b/obnamlib/vfs.py
index c9de5ed1..60819eaf 100644
--- a/obnamlib/vfs.py
+++ b/obnamlib/vfs.py
@@ -284,38 +284,58 @@ class VirtualFileSystem(object):
'''
- error_handler = error_handler or (lambda name, e: None)
-
- try:
- pairs = self.listdir2(dirname)
- except OSError, e:
- log('listdir failed: %s: %s' % (e.filename, e.strerror))
- error_handler(dirname, e)
- pairs = []
-
- queue = []
- for name, st in pairs:
- pathname = os.path.join(dirname, name)
- if isinstance(st, BaseException):
- error_handler(pathname, st)
- elif ok is None or ok(pathname, st):
- if stat.S_ISDIR(st.st_mode):
- for t in self.scan_tree(pathname, ok=ok, dirst=st):
- yield t
+ def important_first(items, is_important):
+ important = []
+ unimportant = []
+ for item in items:
+ if is_important(item):
+ important.append(item)
else:
- queue.append((pathname, st))
+ unimportant.append(item)
+ return important + unimportant
- for pathname, st in queue:
- yield pathname, st
+ def is_directory(pair):
+ _, metadata = pair
+ return (not isinstance(metadata, BaseException)
+ and stat.S_ISDIR(metadata.st_mode))
- if dirst is None:
+ def list_files(pathname):
try:
- dirst = self.lstat(dirname)
+ pairs = self.listdir2(pathname)
+ except OSError, e:
+ log('listdir failed: %s: %s' % (e.filename, e.strerror))
+ error_handler(pathname, e)
+ return []
+ else:
+ pairs = [(os.path.join(pathname, basename), st)
+ for basename, st in pairs]
+ return important_first(pairs, is_directory)
+
+ def lstat(pathname):
+ try:
+ return self.lstat(pathname)
except OSError, e:
log('lstat for dir failed: %s: %s' % (e.filename, e.strerror))
- return
+ return e
+
+ def process_dir(dirname, metadata, items):
+ new_items = [
+ (subname, submeta, False)
+ for subname, submeta in list_files(dirname)]
+ return new_items + [(dirname, metadata, True)] + items
+
+ error_handler = error_handler or (lambda name, e: None)
+ ok = ok or (lambda name, st: True)
- yield dirname, dirst
+ items = [(dirname, lstat(dirname), False)]
+ while items:
+ filename, metadata, processed_dir = items.pop(0)
+ if isinstance(metadata, BaseException):
+ error_handler(filename, metadata)
+ elif stat.S_ISDIR(metadata.st_mode) and not processed_dir:
+ items = process_dir(filename, metadata, items)
+ elif ok(filename, metadata):
+ yield filename, metadata
class VfsFactory(object):
@@ -744,7 +764,9 @@ class VfsTests(object): # pragma: no cover
self.set_up_scan_tree()
result = list(self.fs.scan_tree(self.basepath))
pathnames = [pathname for pathname, _ in result]
- self.assertEqual(sorted(pathnames), sorted(self.pathnames))
+ self.assertEqual(sorted(pathnames), sorted(self.pathnames),
+ 'pathnames: %r --- self.pathnames: %r' %
+ (pathnames, self.pathnames))
def test_scan_tree_filters_away_unwanted(self):
def ok(pathname, st):