diff options
author | Lars Wirzenius <liw@liw.fi> | 2012-12-03 22:26:37 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2012-12-03 22:26:37 +0000 |
commit | 8e93ee489cebaf0816b727dae4cc8d33c7ea3179 (patch) | |
tree | 7549081e42931799aea3d48b99e54563266f6592 | |
parent | a4e91cc2398c67dbafe0ef6bdc9923c46ca646b7 (diff) | |
parent | 086c7d6a0d822e7ca407bbb52a107538124b75b5 (diff) | |
download | larch-8e93ee489cebaf0816b727dae4cc8d33c7ea3179.tar.gz |
Improve fsck and related bug fixes
-rwxr-xr-x | larch/fsck.py | 177 | ||||
-rw-r--r-- | larch/journal.py | 6 | ||||
-rw-r--r-- | larch/nodestore.py | 11 | ||||
-rw-r--r-- | larch/nodestore_disk.py | 6 |
4 files changed, 58 insertions, 142 deletions
diff --git a/larch/fsck.py b/larch/fsck.py index 3554d95..7ca0111 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -56,7 +56,9 @@ class WorkItem(object): try: return self.fsck.forest.node_store.get_node(node_id) except larch.NodeMissing: - self.error('node %s is missing' % node_id) + self.error( + 'forest %s: node %s is missing' % + (self.fsck.forest_name, node_id)) class CheckNode(WorkItem): @@ -64,150 +66,58 @@ class CheckNode(WorkItem): def __init__(self, fsck, node_id): self.fsck = fsck self.node_id = node_id - self.name = 'node %s' % node_id + self.name = 'node %s in %s' % (self.node_id, self.fsck.forest_name) def do(self): - tracing.trace('checking node %s' % self.node_id) node = self.get_node(self.node_id) - if node: - if type(node) not in [larch.IndexNode, larch.LeafNode]: - self.error('node must be an index or leaf node') - return - keys = node.keys() - if self.fsck.forest.node_store.get_refcount(node.id) <= 0: - self.warning('node refcount must be > 0') - if not len(keys): - self.warning('node must have keys') - if sorted(keys) != keys: - self.error('node keys must be sorted') - if sorted(set(keys)) != keys: - self.error('node keys must be unique') - encoded = self.fsck.forest.node_store.codec.encode(node) - if len(encoded) > self.fsck.forest.node_store.node_size: - self.warning('node is too large') - if len(encoded) == 0: - self.warning('node has zero size when encoded') - - if self.fsck.fix and type(node) == larch.IndexNode: - tracing.trace('checking and fixing index node %s' % - self.node_id) - keys = [] - for key in node: - child_id = node[key] - if self.get_node(child_id): - keys.append(key) - else: - tracing.trace('child %s is missing' % child_id) - if keys != node.keys(): - tracing.trace('Replacing index node %s with fixed copy' % - self.node_id) - new_node = larch.IndexNode(node.id, keys, - [node[k] for k in keys]) - self.fsck.forest.node_store.put_node(new_node) - tracing.trace('fixed it: %s' % new_node.keys()) - - -class CheckRoot(WorkItem): - - def __init__(self, fsck, root_id): - self.fsck = fsck - self.root_id = root_id - self.name = 'root node %s' % root_id - - def do(self): - tracing.trace('checking root node %s' % self.root_id) - node = self.get_node(self.root_id) - if node: - if self.fsck.forest.node_store.get_refcount(self.root_id) != 1: - self.warning('root refcount must be 1') - if type(node) != larch.IndexNode: - self.error('root must be an index node') - else: - self.error('missing root node %s' % self.root_id) + if type(node) == larch.IndexNode: + for child_id in node.values(): + seen_already = child_id in self.fsck.refcounts + self.fsck.count(child_id) + if not seen_already: + yield CheckNode(self.fsck, child_id) +class CheckForest(WorkItem): -class CheckRecursively(WorkItem): - - def __init__(self, fsck, root_id, seen): + def __init__(self, fsck): self.fsck = fsck - self.root_id = root_id - self.name = 'tree %s' % root_id - self.seen = seen - + self.name = 'forest %s' % self.fsck.forest_name + def do(self): - tracing.trace('checking recursive from root node %s' % self.root_id) - for node, minkey, maxkey in self.walk(self.root_id): - if node.id not in self.seen: - tracing.trace('checking node %s' % node.id) - self.seen.add(node.id) - keys = node.keys() - if keys: - if keys[0] < minkey: - self.error('node %s: first key is too small' % node.id) - if keys[-1] > maxkey: - self.error('node %s: last key is too large' % node.id) - - def walk(self, root_id): - - def walker(node_id, minkey, maxkey, expected_type): - expected_child = None - node = self.get_node(node_id) - if node: - yield node, minkey, maxkey - if type(node) == larch.IndexNode: - tracing.trace('recursively found index node %s' % node.id) - keys = node.keys() - tracing.trace('keys: %s' % repr(keys)) - for i, key in enumerate(keys): - child_id = node[key] - if i + 1 < len(keys): - next_key = keys[i+1] - else: - next_key = maxkey - if expected_child is None: - child = self.get_node(child_id) - if child: - expected_child = type(child) - for x in walker(child_id, key, next_key, - expected_child): - yield x - else: - if expected_type == larch.IndexNode: - self.error('cannot find index node %s' % node_id) - elif expected_type == larch.LeafNode: - self.error('cannot find leaf node %s' % node_id) - else: - self.error('cannot find node of unknown type %s' % node_id) - - ns = self.fsck.forest.node_store - tree_minkey = chr(0) * ns.codec.key_bytes - tree_maxkey = chr(255) * ns.codec.key_bytes - for x in walker(root_id, tree_minkey, tree_maxkey, larch.IndexNode): - yield x - - -class CheckExtraNodes(WorkItem): + for tree in self.fsck.forest.trees: + self.fsck.count(tree.root.id) + yield CheckNode(self.fsck, tree.root.id) + + +class CheckRefcounts(WorkItem): def __init__(self, fsck): self.fsck = fsck - self.seen = set() - self.name = 'extra nodes' + self.name = 'refcounts in %s' % self.fsck.forest_name def do(self): - tracing.trace('checking for extra nodes') - for node_id in self.fsck.forest.node_store.list_nodes(): - if node_id not in self.seen: - self.warning('node %d is not part of the tree' % node_id) + for node_id in self.fsck.refcounts: + refcount = self.fsck.forest.node_store.get_refcount(node_id) + if refcount != self.fsck.refcounts[node_id]: + self.error( + 'forest %s: node %s: refcount is %s but should be %s' % + (self.fsck.forest_name, + node_id, + refcount, + self.fsck.refcounts[node_id])) + if self.fsck.fix: + self.fsck.forest.node_store.set_refcount( + node_id, self.fsck.refcounts[node_id]) class CommitForest(WorkItem): def __init__(self, fsck): self.fsck = fsck - self.name = 'committing fixes' + self.name = 'committing fixes to %s' % self.fsck.forest_name def do(self): - tracing.trace('committing changes to forest') + tracing.trace('committing changes to %s' % self.fsck.forest_name) self.fsck.forest.commit() @@ -217,20 +127,19 @@ class Fsck(object): def __init__(self, forest, warning, error, fix): self.forest = forest + self.forest_name = getattr( + forest.node_store, 'dirname', 'in-memory forest') self.warning = warning self.error = error self.fix = fix + self.refcounts = {} def find_work(self): - for node_id in self.forest.node_store.list_nodes(): - tracing.trace('found node %s' % node_id) - yield CheckNode(self, node_id) - for tree in self.forest.trees: - yield CheckRoot(self, tree.root.id) - extra = CheckExtraNodes(self) - for tree in self.forest.trees: - yield CheckRecursively(self, tree.root.id, extra.seen) - yield extra + yield CheckForest(self) + yield CheckRefcounts(self) if self.fix: yield CommitForest(self) + def count(self, node_id): + self.refcounts[node_id] = self.refcounts.get(node_id, 0) + 1 + diff --git a/larch/journal.py b/larch/journal.py index 51d866d..36b38a5 100644 --- a/larch/journal.py +++ b/larch/journal.py @@ -146,8 +146,8 @@ class Journal(object): if new in self.new_files: return self.fs.cat(new) elif deleted in self.deleted_files: - raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), - filename)) + raise OSError( + errno.ENOENT, os.strerror(errno.ENOENT), filename) return self.fs.cat(filename) def remove(self, filename): @@ -161,7 +161,7 @@ class Journal(object): self.fs.remove(new) self.new_files.remove(new) elif deleted in self.deleted_files: - raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), filename)) + raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), filename) else: self.fs.overwrite_file(deleted, '') self.deleted_files.add(deleted) diff --git a/larch/nodestore.py b/larch/nodestore.py index 82bcfbc..fb0abe0 100644 --- a/larch/nodestore.py +++ b/larch/nodestore.py @@ -21,9 +21,14 @@ class NodeMissing(larch.Error): '''A node cannot be found from a NodeStore.''' - def __init__(self, node_store, node_id): - self.msg = ('Node %d cannot be found in the node store %s' % - (node_id, node_store)) + def __init__(self, node_store, node_id, error=None): + if error is None: + error_msg = '' + else: + error_msg = (': %s: %s: %s' % + (error.errno, error.strerror, error.filename)) + self.msg = ('Node %d cannot be found in the node store %s%s' % + (node_id, node_store, error_msg)) class NodeTooBig(larch.Error): diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py index 11f48b5..197a411 100644 --- a/larch/nodestore_disk.py +++ b/larch/nodestore_disk.py @@ -20,6 +20,7 @@ import os import StringIO import struct import tempfile +import traceback import tracing import larch @@ -232,9 +233,10 @@ class NodeStoreDisk(larch.NodeStore): try: encoded = self.journal.cat(name) except (IOError, OSError), e: - logging.error('Error reading node: %s: %s: %s' % + logging.debug('Error reading node: %s: %s: %s' % (e.errno, e.strerror, e.filename or name)) - raise larch.NodeMissing(self.dirname, node_id) + logging.debug(traceback.format_exc()) + raise larch.NodeMissing(self.dirname, node_id, error=e) else: node = self.codec.decode(encoded) node.frozen = True |