diff options
author | Lars Wirzenius <liw@liw.fi> | 2012-12-19 13:06:04 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2012-12-19 13:06:04 +0000 |
commit | 989eb64fc8be4261f5b1ceaec6b91e97395fa336 (patch) | |
tree | 6e30e4e97edc76b1bc1d2e0038601be999cb8686 | |
parent | 31503336c52eadce1ec923a95d909c97c6680a1a (diff) | |
parent | bb218e371982229c4b071076248846a897c0251a (diff) | |
download | larch-989eb64fc8be4261f5b1ceaec6b91e97395fa336.tar.gz |
Fix missing B-tree nodes in fsck
-rw-r--r-- | NEWS | 6 | ||||
-rwxr-xr-x | larch/fsck.py | 64 |
2 files changed, 58 insertions, 12 deletions
@@ -4,6 +4,12 @@ NEWS for larch These are the release notes for larch, a Python implementation of a copy-on-write B-tree, designed by Ohad Rodeh. +Version UNRELEASED +------------------ + +* Fsck now check for missing nodes, and optionally fixes them by + deleting references to them. + Version 1.20121216 ------------------ diff --git a/larch/fsck.py b/larch/fsck.py index 7ca0111..cd6fbc9 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -53,6 +53,7 @@ class WorkItem(object): self.fsck.error('ERROR: %s: %s' % (self.name, msg)) def get_node(self, node_id): + tracing.trace('node_id=%s' % node_id) try: return self.fsck.forest.node_store.get_node(node_id) except larch.NodeMissing: @@ -60,22 +61,59 @@ class WorkItem(object): 'forest %s: node %s is missing' % (self.fsck.forest_name, node_id)) + def start_modification(self, node): + self.fsck.forest.node_store.start_modification(node) -class CheckNode(WorkItem): + def put_node(self, node): + tracing.trace('node.id=%s' % node.id) + return self.fsck.forest.node_store.put_node(node) - def __init__(self, fsck, node_id): + +class CheckIndexNode(WorkItem): + + def __init__(self, fsck, node): self.fsck = fsck - self.node_id = node_id - self.name = 'node %s in %s' % (self.node_id, self.fsck.forest_name) + self.node = node + self.name = 'node %s in %s' % (self.node.id, self.fsck.forest_name) def do(self): - node = self.get_node(self.node_id) - if type(node) == larch.IndexNode: - for child_id in node.values(): - seen_already = child_id in self.fsck.refcounts - self.fsck.count(child_id) - if not seen_already: - yield CheckNode(self.fsck, child_id) + tracing.trace('node.id=%s' % self.node.id) + + if type(self.node) != larch.IndexNode: + self.error( + 'forest %s: node %s: ' + 'Expected to get an index node, got %s instead' % + (self.fsck.forest_name, self.node.id, type(self.node))) + return + + if len(self.node) == 0: + self.error('forest %s: index node %s: No children' % + (self.fsck.forest_name, self.node.id)) + return + + # Increase refcounts for all children, and check that the child + # nodes exist. If the children are index nodes, create work + # items to check those. Leaf nodes get no further checking. + + drop_keys = [] + for key in self.node: + child_id = self.node[key] + seen_already = child_id in self.fsck.refcounts + self.fsck.count(child_id) + if not seen_already: + child = self.get_node(child_id) + if child is None: + drop_keys.append(key) + elif type(child) == larch.IndexNode: + yield CheckIndexNode(self.fsck, child) + + # Fix references to missing children by dropping them. + if self.fsck.fix and drop_keys: + self.start_modification(self.node) + for key in drop_keys: + self.node.remove(key) + self.put_node(self.node) + class CheckForest(WorkItem): @@ -86,7 +124,9 @@ class CheckForest(WorkItem): def do(self): for tree in self.fsck.forest.trees: self.fsck.count(tree.root.id) - yield CheckNode(self.fsck, tree.root.id) + root_node = self.get_node(tree.root.id) + tracing.trace('root_node.id=%s' % root_node.id) + yield CheckIndexNode(self.fsck, root_node) class CheckRefcounts(WorkItem): |