summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2012-12-19 13:06:04 +0000
committerLars Wirzenius <liw@liw.fi>2012-12-19 13:06:04 +0000
commit989eb64fc8be4261f5b1ceaec6b91e97395fa336 (patch)
tree6e30e4e97edc76b1bc1d2e0038601be999cb8686
parent31503336c52eadce1ec923a95d909c97c6680a1a (diff)
parentbb218e371982229c4b071076248846a897c0251a (diff)
downloadlarch-989eb64fc8be4261f5b1ceaec6b91e97395fa336.tar.gz
Fix missing B-tree nodes in fsck
-rw-r--r--NEWS6
-rwxr-xr-xlarch/fsck.py64
2 files changed, 58 insertions, 12 deletions
diff --git a/NEWS b/NEWS
index bc313e7..a760a39 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,12 @@ NEWS for larch
These are the release notes for larch, a Python implementation of a
copy-on-write B-tree, designed by Ohad Rodeh.
+Version UNRELEASED
+------------------
+
+* Fsck now check for missing nodes, and optionally fixes them by
+ deleting references to them.
+
Version 1.20121216
------------------
diff --git a/larch/fsck.py b/larch/fsck.py
index 7ca0111..cd6fbc9 100755
--- a/larch/fsck.py
+++ b/larch/fsck.py
@@ -53,6 +53,7 @@ class WorkItem(object):
self.fsck.error('ERROR: %s: %s' % (self.name, msg))
def get_node(self, node_id):
+ tracing.trace('node_id=%s' % node_id)
try:
return self.fsck.forest.node_store.get_node(node_id)
except larch.NodeMissing:
@@ -60,22 +61,59 @@ class WorkItem(object):
'forest %s: node %s is missing' %
(self.fsck.forest_name, node_id))
+ def start_modification(self, node):
+ self.fsck.forest.node_store.start_modification(node)
-class CheckNode(WorkItem):
+ def put_node(self, node):
+ tracing.trace('node.id=%s' % node.id)
+ return self.fsck.forest.node_store.put_node(node)
- def __init__(self, fsck, node_id):
+
+class CheckIndexNode(WorkItem):
+
+ def __init__(self, fsck, node):
self.fsck = fsck
- self.node_id = node_id
- self.name = 'node %s in %s' % (self.node_id, self.fsck.forest_name)
+ self.node = node
+ self.name = 'node %s in %s' % (self.node.id, self.fsck.forest_name)
def do(self):
- node = self.get_node(self.node_id)
- if type(node) == larch.IndexNode:
- for child_id in node.values():
- seen_already = child_id in self.fsck.refcounts
- self.fsck.count(child_id)
- if not seen_already:
- yield CheckNode(self.fsck, child_id)
+ tracing.trace('node.id=%s' % self.node.id)
+
+ if type(self.node) != larch.IndexNode:
+ self.error(
+ 'forest %s: node %s: '
+ 'Expected to get an index node, got %s instead' %
+ (self.fsck.forest_name, self.node.id, type(self.node)))
+ return
+
+ if len(self.node) == 0:
+ self.error('forest %s: index node %s: No children' %
+ (self.fsck.forest_name, self.node.id))
+ return
+
+ # Increase refcounts for all children, and check that the child
+ # nodes exist. If the children are index nodes, create work
+ # items to check those. Leaf nodes get no further checking.
+
+ drop_keys = []
+ for key in self.node:
+ child_id = self.node[key]
+ seen_already = child_id in self.fsck.refcounts
+ self.fsck.count(child_id)
+ if not seen_already:
+ child = self.get_node(child_id)
+ if child is None:
+ drop_keys.append(key)
+ elif type(child) == larch.IndexNode:
+ yield CheckIndexNode(self.fsck, child)
+
+ # Fix references to missing children by dropping them.
+ if self.fsck.fix and drop_keys:
+ self.start_modification(self.node)
+ for key in drop_keys:
+ self.node.remove(key)
+ self.put_node(self.node)
+
class CheckForest(WorkItem):
@@ -86,7 +124,9 @@ class CheckForest(WorkItem):
def do(self):
for tree in self.fsck.forest.trees:
self.fsck.count(tree.root.id)
- yield CheckNode(self.fsck, tree.root.id)
+ root_node = self.get_node(tree.root.id)
+ tracing.trace('root_node.id=%s' % root_node.id)
+ yield CheckIndexNode(self.fsck, root_node)
class CheckRefcounts(WorkItem):