From 63ad4737a6af23beade77b5486ed4ae78a976a6d Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 23 Nov 2012 16:32:12 +0000 Subject: Report what's being checked with more precision --- larch/fsck.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/larch/fsck.py b/larch/fsck.py index 3554d95..12c1c53 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -64,7 +64,7 @@ class CheckNode(WorkItem): def __init__(self, fsck, node_id): self.fsck = fsck self.node_id = node_id - self.name = 'node %s' % node_id + self.name = 'node %s in %s' % (node_id, self.fsck.forest_name) def do(self): tracing.trace('checking node %s' % self.node_id) @@ -112,7 +112,7 @@ class CheckRoot(WorkItem): def __init__(self, fsck, root_id): self.fsck = fsck self.root_id = root_id - self.name = 'root node %s' % root_id + self.name = 'root node %s in %s' % (root_id, self.fsck.forest_name) def do(self): tracing.trace('checking root node %s' % self.root_id) @@ -131,7 +131,7 @@ class CheckRecursively(WorkItem): def __init__(self, fsck, root_id, seen): self.fsck = fsck self.root_id = root_id - self.name = 'tree %s' % root_id + self.name = 'tree %s in %s' % (root_id, self.fsck.forest_name) self.seen = seen def do(self): @@ -150,6 +150,9 @@ class CheckRecursively(WorkItem): def walk(self, root_id): def walker(node_id, minkey, maxkey, expected_type): + if node_id in self.seen: + return + self.seen.add(node_id) expected_child = None node = self.get_node(node_id) if node: @@ -157,7 +160,6 @@ class CheckRecursively(WorkItem): if type(node) == larch.IndexNode: tracing.trace('recursively found index node %s' % node.id) keys = node.keys() - tracing.trace('keys: %s' % repr(keys)) for i, key in enumerate(keys): child_id = node[key] if i + 1 < len(keys): @@ -191,7 +193,7 @@ class CheckExtraNodes(WorkItem): def __init__(self, fsck): self.fsck = fsck self.seen = set() - self.name = 'extra nodes' + self.name = 'extra nodes in %s' % self.fsck.forest_name def do(self): tracing.trace('checking for extra nodes') @@ -204,7 +206,7 @@ class CommitForest(WorkItem): def __init__(self, fsck): self.fsck = fsck - self.name = 'committing fixes' + self.name = 'committing fixes to %s' % self.fsck.forest_name def do(self): tracing.trace('committing changes to forest') @@ -217,6 +219,8 @@ class Fsck(object): def __init__(self, forest, warning, error, fix): self.forest = forest + self.forest_name = getattr( + forest.node_store, 'dirname', 'in-memory forest') self.warning = warning self.error = error self.fix = fix -- cgit v1.2.1 From ffce6d4b2890a52c925856eb195074c1be510d31 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 24 Nov 2012 09:57:44 +0000 Subject: Simplify fsck so it only checks node reachability --- larch/fsck.py | 165 ++++++---------------------------------------------------- 1 file changed, 17 insertions(+), 148 deletions(-) diff --git a/larch/fsck.py b/larch/fsck.py index 12c1c53..9875959 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -64,153 +64,27 @@ class CheckNode(WorkItem): def __init__(self, fsck, node_id): self.fsck = fsck self.node_id = node_id - self.name = 'node %s in %s' % (node_id, self.fsck.forest_name) + self.name = 'node %s in %s' % (self.node_id, self.fsck.forest_name) def do(self): - tracing.trace('checking node %s' % self.node_id) node = self.get_node(self.node_id) - if node: - if type(node) not in [larch.IndexNode, larch.LeafNode]: - self.error('node must be an index or leaf node') - return - keys = node.keys() - if self.fsck.forest.node_store.get_refcount(node.id) <= 0: - self.warning('node refcount must be > 0') - if not len(keys): - self.warning('node must have keys') - if sorted(keys) != keys: - self.error('node keys must be sorted') - if sorted(set(keys)) != keys: - self.error('node keys must be unique') - encoded = self.fsck.forest.node_store.codec.encode(node) - if len(encoded) > self.fsck.forest.node_store.node_size: - self.warning('node is too large') - if len(encoded) == 0: - self.warning('node has zero size when encoded') - - if self.fsck.fix and type(node) == larch.IndexNode: - tracing.trace('checking and fixing index node %s' % - self.node_id) - keys = [] - for key in node: - child_id = node[key] - if self.get_node(child_id): - keys.append(key) - else: - tracing.trace('child %s is missing' % child_id) - if keys != node.keys(): - tracing.trace('Replacing index node %s with fixed copy' % - self.node_id) - new_node = larch.IndexNode(node.id, keys, - [node[k] for k in keys]) - self.fsck.forest.node_store.put_node(new_node) - tracing.trace('fixed it: %s' % new_node.keys()) - - -class CheckRoot(WorkItem): - - def __init__(self, fsck, root_id): - self.fsck = fsck - self.root_id = root_id - self.name = 'root node %s in %s' % (root_id, self.fsck.forest_name) - - def do(self): - tracing.trace('checking root node %s' % self.root_id) - node = self.get_node(self.root_id) - if node: - if self.fsck.forest.node_store.get_refcount(self.root_id) != 1: - self.warning('root refcount must be 1') - if type(node) != larch.IndexNode: - self.error('root must be an index node') - else: - self.error('missing root node %s' % self.root_id) - - -class CheckRecursively(WorkItem): - - def __init__(self, fsck, root_id, seen): - self.fsck = fsck - self.root_id = root_id - self.name = 'tree %s in %s' % (root_id, self.fsck.forest_name) - self.seen = seen - - def do(self): - tracing.trace('checking recursive from root node %s' % self.root_id) - for node, minkey, maxkey in self.walk(self.root_id): - if node.id not in self.seen: - tracing.trace('checking node %s' % node.id) - self.seen.add(node.id) - keys = node.keys() - if keys: - if keys[0] < minkey: - self.error('node %s: first key is too small' % node.id) - if keys[-1] > maxkey: - self.error('node %s: last key is too large' % node.id) - - def walk(self, root_id): - - def walker(node_id, minkey, maxkey, expected_type): - if node_id in self.seen: - return - self.seen.add(node_id) - expected_child = None - node = self.get_node(node_id) - if node: - yield node, minkey, maxkey - if type(node) == larch.IndexNode: - tracing.trace('recursively found index node %s' % node.id) - keys = node.keys() - for i, key in enumerate(keys): - child_id = node[key] - if i + 1 < len(keys): - next_key = keys[i+1] - else: - next_key = maxkey - if expected_child is None: - child = self.get_node(child_id) - if child: - expected_child = type(child) - for x in walker(child_id, key, next_key, - expected_child): - yield x - else: - if expected_type == larch.IndexNode: - self.error('cannot find index node %s' % node_id) - elif expected_type == larch.LeafNode: - self.error('cannot find leaf node %s' % node_id) - else: - self.error('cannot find node of unknown type %s' % node_id) - - ns = self.fsck.forest.node_store - tree_minkey = chr(0) * ns.codec.key_bytes - tree_maxkey = chr(255) * ns.codec.key_bytes - for x in walker(root_id, tree_minkey, tree_maxkey, larch.IndexNode): - yield x - - -class CheckExtraNodes(WorkItem): + if type(node) == larch.IndexNode: + for child_id in node.values(): + if child_id not in self.fsck.seen_ids: + self.fsck.seen_ids.add(child_id) + yield CheckNode(self.fsck, child_id) - def __init__(self, fsck): - self.fsck = fsck - self.seen = set() - self.name = 'extra nodes in %s' % self.fsck.forest_name - def do(self): - tracing.trace('checking for extra nodes') - for node_id in self.fsck.forest.node_store.list_nodes(): - if node_id not in self.seen: - self.warning('node %d is not part of the tree' % node_id) - - -class CommitForest(WorkItem): +class CheckForest(WorkItem): def __init__(self, fsck): self.fsck = fsck - self.name = 'committing fixes to %s' % self.fsck.forest_name + self.name = 'forest %s' % self.fsck.forest_name def do(self): - tracing.trace('committing changes to forest') - self.fsck.forest.commit() + for tree in self.fsck.forest.trees: + self.fsck.seen_ids.add(tree.root.id) + yield CheckNode(self.fsck, tree.root.id) class Fsck(object): @@ -224,17 +98,12 @@ class Fsck(object): self.warning = warning self.error = error self.fix = fix + self.seen_ids = set() + self.refcounts = {} def find_work(self): - for node_id in self.forest.node_store.list_nodes(): - tracing.trace('found node %s' % node_id) - yield CheckNode(self, node_id) - for tree in self.forest.trees: - yield CheckRoot(self, tree.root.id) - extra = CheckExtraNodes(self) - for tree in self.forest.trees: - yield CheckRecursively(self, tree.root.id, extra.seen) - yield extra - if self.fix: - yield CommitForest(self) + yield CheckForest(self) + + def count(self, node_id): + self.refcounts[node_id] = self.refcounts.get(node_id, 0) + 1 -- cgit v1.2.1 From 7e7ce21dea2ee7c22793370c8851fca126b34960 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 24 Nov 2012 10:09:50 +0000 Subject: Check refcounts --- larch/fsck.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/larch/fsck.py b/larch/fsck.py index 9875959..d3232d4 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -56,7 +56,9 @@ class WorkItem(object): try: return self.fsck.forest.node_store.get_node(node_id) except larch.NodeMissing: - self.error('node %s is missing' % node_id) + self.error( + 'forest %s: node %s is missing' % + (self.fsck.forest_name, node_id)) class CheckNode(WorkItem): @@ -70,11 +72,11 @@ class CheckNode(WorkItem): node = self.get_node(self.node_id) if type(node) == larch.IndexNode: for child_id in node.values(): - if child_id not in self.fsck.seen_ids: - self.fsck.seen_ids.add(child_id) + seen_already = child_id in self.fsck.refcounts + self.fsck.count(child_id) + if not seen_already: yield CheckNode(self.fsck, child_id) - class CheckForest(WorkItem): def __init__(self, fsck): @@ -83,10 +85,27 @@ class CheckForest(WorkItem): def do(self): for tree in self.fsck.forest.trees: - self.fsck.seen_ids.add(tree.root.id) + self.fsck.count(tree.root.id) yield CheckNode(self.fsck, tree.root.id) +class CheckRefcounts(WorkItem): + + def __init__(self, fsck): + self.fsck = fsck + self.name = 'refcounts in %s' % self.fsck.forest_name + + def do(self): + for node_id in self.fsck.refcounts: + refcount = self.fsck.forest.node_store.get_refcount(node_id) + if refcount != self.fsck.refcounts[node_id]: + self.error( + 'forest %s: node %s: refcount is %s but should be %s' % + (self.fsck.forest_name, + node_id, + refcount, + self.fsck.refcounts[node_id])) + class Fsck(object): '''Verify internal consistency of a larch.Forest.''' @@ -98,11 +117,11 @@ class Fsck(object): self.warning = warning self.error = error self.fix = fix - self.seen_ids = set() self.refcounts = {} def find_work(self): yield CheckForest(self) + yield CheckRefcounts(self) def count(self, node_id): self.refcounts[node_id] = self.refcounts.get(node_id, 0) + 1 -- cgit v1.2.1 From 97c7f7d84a5127bcb2cea15b319d52009f4333d2 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 24 Nov 2012 16:10:32 +0000 Subject: Fix refcount errors --- larch/fsck.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/larch/fsck.py b/larch/fsck.py index d3232d4..e9d0f6e 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -105,6 +105,20 @@ class CheckRefcounts(WorkItem): node_id, refcount, self.fsck.refcounts[node_id])) + if self.fsck.fix: + self.fsck.forest.node_store.set_refcount(node_id, refcount) + + +class CommitForest(WorkItem): + + def __init__(self, fsck): + self.fsck = fsck + self.name = 'committing fixes to %s' % self.fsck.forest_name + + def do(self): + tracing.trace('committing changes to %s' % self.fsck.forest_name) + self.fsck.forest.commit() + class Fsck(object): @@ -122,6 +136,8 @@ class Fsck(object): def find_work(self): yield CheckForest(self) yield CheckRefcounts(self) + if self.fix: + yield CommitForest(self) def count(self, node_id): self.refcounts[node_id] = self.refcounts.get(node_id, 0) + 1 -- cgit v1.2.1 From 2bd8483b06f581991836b48864b6ecbdd3c1b99c Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 25 Nov 2012 11:18:29 +0000 Subject: Set refcount correctly --- larch/fsck.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/larch/fsck.py b/larch/fsck.py index e9d0f6e..7ca0111 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -106,7 +106,8 @@ class CheckRefcounts(WorkItem): refcount, self.fsck.refcounts[node_id])) if self.fsck.fix: - self.fsck.forest.node_store.set_refcount(node_id, refcount) + self.fsck.forest.node_store.set_refcount( + node_id, self.fsck.refcounts[node_id]) class CommitForest(WorkItem): -- cgit v1.2.1 From b6b41082c4ad3d1ec39c857e15ce0c7e88c63927 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 2 Dec 2012 10:40:27 +0000 Subject: Fix how OSError is created --- larch/journal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/larch/journal.py b/larch/journal.py index 51d866d..4086ffa 100644 --- a/larch/journal.py +++ b/larch/journal.py @@ -146,8 +146,8 @@ class Journal(object): if new in self.new_files: return self.fs.cat(new) elif deleted in self.deleted_files: - raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), - filename)) + raise OSError( + errno.ENOENT, os.strerror(errno.ENOENT), filename) return self.fs.cat(filename) def remove(self, filename): -- cgit v1.2.1 From 61a8858c5d5ea2729d671f6a3f59870cc9a92832 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 2 Dec 2012 10:41:05 +0000 Subject: Fix an other instance of OSError mis-creation --- larch/journal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/larch/journal.py b/larch/journal.py index 4086ffa..36b38a5 100644 --- a/larch/journal.py +++ b/larch/journal.py @@ -161,7 +161,7 @@ class Journal(object): self.fs.remove(new) self.new_files.remove(new) elif deleted in self.deleted_files: - raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), filename)) + raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), filename) else: self.fs.overwrite_file(deleted, '') self.deleted_files.add(deleted) -- cgit v1.2.1 From b79121abfe1a3c04af0b77855c9f45a03cbc8332 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 2 Dec 2012 10:45:52 +0000 Subject: Log the exception that gets replaced by NodeMissing --- larch/nodestore_disk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py index 11f48b5..65c5f93 100644 --- a/larch/nodestore_disk.py +++ b/larch/nodestore_disk.py @@ -20,6 +20,7 @@ import os import StringIO import struct import tempfile +import traceback import tracing import larch @@ -234,6 +235,7 @@ class NodeStoreDisk(larch.NodeStore): except (IOError, OSError), e: logging.error('Error reading node: %s: %s: %s' % (e.errno, e.strerror, e.filename or name)) + logging.debug(traceback.format_exc()) raise larch.NodeMissing(self.dirname, node_id) else: node = self.codec.decode(encoded) -- cgit v1.2.1 From 086c7d6a0d822e7ca407bbb52a107538124b75b5 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Mon, 3 Dec 2012 12:56:21 +0000 Subject: Optionally add original exception to NodeMissing --- larch/nodestore.py | 11 ++++++++--- larch/nodestore_disk.py | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/larch/nodestore.py b/larch/nodestore.py index 82bcfbc..fb0abe0 100644 --- a/larch/nodestore.py +++ b/larch/nodestore.py @@ -21,9 +21,14 @@ class NodeMissing(larch.Error): '''A node cannot be found from a NodeStore.''' - def __init__(self, node_store, node_id): - self.msg = ('Node %d cannot be found in the node store %s' % - (node_id, node_store)) + def __init__(self, node_store, node_id, error=None): + if error is None: + error_msg = '' + else: + error_msg = (': %s: %s: %s' % + (error.errno, error.strerror, error.filename)) + self.msg = ('Node %d cannot be found in the node store %s%s' % + (node_id, node_store, error_msg)) class NodeTooBig(larch.Error): diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py index 65c5f93..197a411 100644 --- a/larch/nodestore_disk.py +++ b/larch/nodestore_disk.py @@ -233,10 +233,10 @@ class NodeStoreDisk(larch.NodeStore): try: encoded = self.journal.cat(name) except (IOError, OSError), e: - logging.error('Error reading node: %s: %s: %s' % + logging.debug('Error reading node: %s: %s: %s' % (e.errno, e.strerror, e.filename or name)) logging.debug(traceback.format_exc()) - raise larch.NodeMissing(self.dirname, node_id) + raise larch.NodeMissing(self.dirname, node_id, error=e) else: node = self.codec.decode(encoded) node.frozen = True -- cgit v1.2.1