From 0638950f26fe2a296d18b67aee1679f02c87b06f Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 19 Dec 2012 11:31:12 +0000 Subject: Add missing empty line --- larch/fsck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/larch/fsck.py b/larch/fsck.py index 7ca0111..bbcd7a3 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -77,6 +77,7 @@ class CheckNode(WorkItem): if not seen_already: yield CheckNode(self.fsck, child_id) + class CheckForest(WorkItem): def __init__(self, fsck): -- cgit v1.2.1 From 1b598f29d0e7eec3a52ba79b2c457593f82f6dcf Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 19 Dec 2012 12:35:59 +0000 Subject: Refactor logic for checking index nodes This is a preliminary step for dealing with missing children. We're going to need to do a get_node for the children, but we want to avoid doing that many times per node, so this change makes it so this will be easier to do. --- larch/fsck.py | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/larch/fsck.py b/larch/fsck.py index bbcd7a3..e15bec7 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -53,6 +53,7 @@ class WorkItem(object): self.fsck.error('ERROR: %s: %s' % (self.name, msg)) def get_node(self, node_id): + tracing.trace('node_id=%s' % node_id) try: return self.fsck.forest.node_store.get_node(node_id) except larch.NodeMissing: @@ -61,21 +62,38 @@ class WorkItem(object): (self.fsck.forest_name, node_id)) -class CheckNode(WorkItem): +class CheckIndexNode(WorkItem): - def __init__(self, fsck, node_id): + def __init__(self, fsck, node): self.fsck = fsck - self.node_id = node_id - self.name = 'node %s in %s' % (self.node_id, self.fsck.forest_name) + self.node = node + self.name = 'node %s in %s' % (self.node.id, self.fsck.forest_name) def do(self): - node = self.get_node(self.node_id) - if type(node) == larch.IndexNode: - for child_id in node.values(): - seen_already = child_id in self.fsck.refcounts - self.fsck.count(child_id) - if not seen_already: - yield CheckNode(self.fsck, child_id) + tracing.trace('node.id=%s' % self.node.id) + + if type(self.node) != larch.IndexNode: + self.error( + 'Node %s: Expected to get an index node, got %s instead' % + (self.node.id, type(self.node))) + return + + child_ids = self.node.values() + if len(child_ids) == 0: + self.error('Index node %s: No children' % self.node.id) + return + + # Increase refcounts for all children, and check that the child + # nodes exist. If the children are index nodes, create work + # items to check those. Leaf nodes get no further checking. + + for child_id in child_ids: + seen_already = child_id in self.fsck.refcounts + self.fsck.count(child_id) + if not seen_already: + child = self.get_node(child_id) + if type(child) == larch.IndexNode: + yield CheckIndexNode(self.fsck, child) class CheckForest(WorkItem): @@ -87,7 +105,9 @@ class CheckForest(WorkItem): def do(self): for tree in self.fsck.forest.trees: self.fsck.count(tree.root.id) - yield CheckNode(self.fsck, tree.root.id) + root_node = self.get_node(tree.root.id) + tracing.trace('root_node.id=%s' % root_node.id) + yield CheckIndexNode(self.fsck, root_node) class CheckRefcounts(WorkItem): -- cgit v1.2.1 From 7e84bc5887b8e0ebde475a018c92aa39f68c34a6 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 19 Dec 2012 13:02:38 +0000 Subject: Fix missing node problems by removing them from B-trees --- larch/fsck.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/larch/fsck.py b/larch/fsck.py index e15bec7..cd6fbc9 100755 --- a/larch/fsck.py +++ b/larch/fsck.py @@ -61,6 +61,13 @@ class WorkItem(object): 'forest %s: node %s is missing' % (self.fsck.forest_name, node_id)) + def start_modification(self, node): + self.fsck.forest.node_store.start_modification(node) + + def put_node(self, node): + tracing.trace('node.id=%s' % node.id) + return self.fsck.forest.node_store.put_node(node) + class CheckIndexNode(WorkItem): @@ -74,27 +81,39 @@ class CheckIndexNode(WorkItem): if type(self.node) != larch.IndexNode: self.error( - 'Node %s: Expected to get an index node, got %s instead' % - (self.node.id, type(self.node))) + 'forest %s: node %s: ' + 'Expected to get an index node, got %s instead' % + (self.fsck.forest_name, self.node.id, type(self.node))) return - child_ids = self.node.values() - if len(child_ids) == 0: - self.error('Index node %s: No children' % self.node.id) + if len(self.node) == 0: + self.error('forest %s: index node %s: No children' % + (self.fsck.forest_name, self.node.id)) return # Increase refcounts for all children, and check that the child # nodes exist. If the children are index nodes, create work # items to check those. Leaf nodes get no further checking. - for child_id in child_ids: + drop_keys = [] + for key in self.node: + child_id = self.node[key] seen_already = child_id in self.fsck.refcounts self.fsck.count(child_id) if not seen_already: child = self.get_node(child_id) - if type(child) == larch.IndexNode: + if child is None: + drop_keys.append(key) + elif type(child) == larch.IndexNode: yield CheckIndexNode(self.fsck, child) + # Fix references to missing children by dropping them. + if self.fsck.fix and drop_keys: + self.start_modification(self.node) + for key in drop_keys: + self.node.remove(key) + self.put_node(self.node) + class CheckForest(WorkItem): -- cgit v1.2.1 From bb218e371982229c4b071076248846a897c0251a Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Wed, 19 Dec 2012 13:05:54 +0000 Subject: Update NEWS --- NEWS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index bc313e7..a760a39 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,12 @@ NEWS for larch These are the release notes for larch, a Python implementation of a copy-on-write B-tree, designed by Ohad Rodeh. +Version UNRELEASED +------------------ + +* Fsck now check for missing nodes, and optionally fixes them by + deleting references to them. + Version 1.20121216 ------------------ -- cgit v1.2.1