summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2012-12-03 22:26:37 +0000
committerLars Wirzenius <liw@liw.fi>2012-12-03 22:26:37 +0000
commit8e93ee489cebaf0816b727dae4cc8d33c7ea3179 (patch)
tree7549081e42931799aea3d48b99e54563266f6592
parenta4e91cc2398c67dbafe0ef6bdc9923c46ca646b7 (diff)
parent086c7d6a0d822e7ca407bbb52a107538124b75b5 (diff)
downloadlarch-8e93ee489cebaf0816b727dae4cc8d33c7ea3179.tar.gz
Improve fsck and related bug fixes
-rwxr-xr-xlarch/fsck.py177
-rw-r--r--larch/journal.py6
-rw-r--r--larch/nodestore.py11
-rw-r--r--larch/nodestore_disk.py6
4 files changed, 58 insertions, 142 deletions
diff --git a/larch/fsck.py b/larch/fsck.py
index 3554d95..7ca0111 100755
--- a/larch/fsck.py
+++ b/larch/fsck.py
@@ -56,7 +56,9 @@ class WorkItem(object):
try:
return self.fsck.forest.node_store.get_node(node_id)
except larch.NodeMissing:
- self.error('node %s is missing' % node_id)
+ self.error(
+ 'forest %s: node %s is missing' %
+ (self.fsck.forest_name, node_id))
class CheckNode(WorkItem):
@@ -64,150 +66,58 @@ class CheckNode(WorkItem):
def __init__(self, fsck, node_id):
self.fsck = fsck
self.node_id = node_id
- self.name = 'node %s' % node_id
+ self.name = 'node %s in %s' % (self.node_id, self.fsck.forest_name)
def do(self):
- tracing.trace('checking node %s' % self.node_id)
node = self.get_node(self.node_id)
- if node:
- if type(node) not in [larch.IndexNode, larch.LeafNode]:
- self.error('node must be an index or leaf node')
- return
- keys = node.keys()
- if self.fsck.forest.node_store.get_refcount(node.id) <= 0:
- self.warning('node refcount must be > 0')
- if not len(keys):
- self.warning('node must have keys')
- if sorted(keys) != keys:
- self.error('node keys must be sorted')
- if sorted(set(keys)) != keys:
- self.error('node keys must be unique')
- encoded = self.fsck.forest.node_store.codec.encode(node)
- if len(encoded) > self.fsck.forest.node_store.node_size:
- self.warning('node is too large')
- if len(encoded) == 0:
- self.warning('node has zero size when encoded')
-
- if self.fsck.fix and type(node) == larch.IndexNode:
- tracing.trace('checking and fixing index node %s' %
- self.node_id)
- keys = []
- for key in node:
- child_id = node[key]
- if self.get_node(child_id):
- keys.append(key)
- else:
- tracing.trace('child %s is missing' % child_id)
- if keys != node.keys():
- tracing.trace('Replacing index node %s with fixed copy' %
- self.node_id)
- new_node = larch.IndexNode(node.id, keys,
- [node[k] for k in keys])
- self.fsck.forest.node_store.put_node(new_node)
- tracing.trace('fixed it: %s' % new_node.keys())
-
-
-class CheckRoot(WorkItem):
-
- def __init__(self, fsck, root_id):
- self.fsck = fsck
- self.root_id = root_id
- self.name = 'root node %s' % root_id
-
- def do(self):
- tracing.trace('checking root node %s' % self.root_id)
- node = self.get_node(self.root_id)
- if node:
- if self.fsck.forest.node_store.get_refcount(self.root_id) != 1:
- self.warning('root refcount must be 1')
- if type(node) != larch.IndexNode:
- self.error('root must be an index node')
- else:
- self.error('missing root node %s' % self.root_id)
+ if type(node) == larch.IndexNode:
+ for child_id in node.values():
+ seen_already = child_id in self.fsck.refcounts
+ self.fsck.count(child_id)
+ if not seen_already:
+ yield CheckNode(self.fsck, child_id)
+class CheckForest(WorkItem):
-class CheckRecursively(WorkItem):
-
- def __init__(self, fsck, root_id, seen):
+ def __init__(self, fsck):
self.fsck = fsck
- self.root_id = root_id
- self.name = 'tree %s' % root_id
- self.seen = seen
-
+ self.name = 'forest %s' % self.fsck.forest_name
+
def do(self):
- tracing.trace('checking recursive from root node %s' % self.root_id)
- for node, minkey, maxkey in self.walk(self.root_id):
- if node.id not in self.seen:
- tracing.trace('checking node %s' % node.id)
- self.seen.add(node.id)
- keys = node.keys()
- if keys:
- if keys[0] < minkey:
- self.error('node %s: first key is too small' % node.id)
- if keys[-1] > maxkey:
- self.error('node %s: last key is too large' % node.id)
-
- def walk(self, root_id):
-
- def walker(node_id, minkey, maxkey, expected_type):
- expected_child = None
- node = self.get_node(node_id)
- if node:
- yield node, minkey, maxkey
- if type(node) == larch.IndexNode:
- tracing.trace('recursively found index node %s' % node.id)
- keys = node.keys()
- tracing.trace('keys: %s' % repr(keys))
- for i, key in enumerate(keys):
- child_id = node[key]
- if i + 1 < len(keys):
- next_key = keys[i+1]
- else:
- next_key = maxkey
- if expected_child is None:
- child = self.get_node(child_id)
- if child:
- expected_child = type(child)
- for x in walker(child_id, key, next_key,
- expected_child):
- yield x
- else:
- if expected_type == larch.IndexNode:
- self.error('cannot find index node %s' % node_id)
- elif expected_type == larch.LeafNode:
- self.error('cannot find leaf node %s' % node_id)
- else:
- self.error('cannot find node of unknown type %s' % node_id)
-
- ns = self.fsck.forest.node_store
- tree_minkey = chr(0) * ns.codec.key_bytes
- tree_maxkey = chr(255) * ns.codec.key_bytes
- for x in walker(root_id, tree_minkey, tree_maxkey, larch.IndexNode):
- yield x
-
-
-class CheckExtraNodes(WorkItem):
+ for tree in self.fsck.forest.trees:
+ self.fsck.count(tree.root.id)
+ yield CheckNode(self.fsck, tree.root.id)
+
+
+class CheckRefcounts(WorkItem):
def __init__(self, fsck):
self.fsck = fsck
- self.seen = set()
- self.name = 'extra nodes'
+ self.name = 'refcounts in %s' % self.fsck.forest_name
def do(self):
- tracing.trace('checking for extra nodes')
- for node_id in self.fsck.forest.node_store.list_nodes():
- if node_id not in self.seen:
- self.warning('node %d is not part of the tree' % node_id)
+ for node_id in self.fsck.refcounts:
+ refcount = self.fsck.forest.node_store.get_refcount(node_id)
+ if refcount != self.fsck.refcounts[node_id]:
+ self.error(
+ 'forest %s: node %s: refcount is %s but should be %s' %
+ (self.fsck.forest_name,
+ node_id,
+ refcount,
+ self.fsck.refcounts[node_id]))
+ if self.fsck.fix:
+ self.fsck.forest.node_store.set_refcount(
+ node_id, self.fsck.refcounts[node_id])
class CommitForest(WorkItem):
def __init__(self, fsck):
self.fsck = fsck
- self.name = 'committing fixes'
+ self.name = 'committing fixes to %s' % self.fsck.forest_name
def do(self):
- tracing.trace('committing changes to forest')
+ tracing.trace('committing changes to %s' % self.fsck.forest_name)
self.fsck.forest.commit()
@@ -217,20 +127,19 @@ class Fsck(object):
def __init__(self, forest, warning, error, fix):
self.forest = forest
+ self.forest_name = getattr(
+ forest.node_store, 'dirname', 'in-memory forest')
self.warning = warning
self.error = error
self.fix = fix
+ self.refcounts = {}
def find_work(self):
- for node_id in self.forest.node_store.list_nodes():
- tracing.trace('found node %s' % node_id)
- yield CheckNode(self, node_id)
- for tree in self.forest.trees:
- yield CheckRoot(self, tree.root.id)
- extra = CheckExtraNodes(self)
- for tree in self.forest.trees:
- yield CheckRecursively(self, tree.root.id, extra.seen)
- yield extra
+ yield CheckForest(self)
+ yield CheckRefcounts(self)
if self.fix:
yield CommitForest(self)
+ def count(self, node_id):
+ self.refcounts[node_id] = self.refcounts.get(node_id, 0) + 1
+
diff --git a/larch/journal.py b/larch/journal.py
index 51d866d..36b38a5 100644
--- a/larch/journal.py
+++ b/larch/journal.py
@@ -146,8 +146,8 @@ class Journal(object):
if new in self.new_files:
return self.fs.cat(new)
elif deleted in self.deleted_files:
- raise OSError((errno.ENOENT, os.strerror(errno.ENOENT),
- filename))
+ raise OSError(
+ errno.ENOENT, os.strerror(errno.ENOENT), filename)
return self.fs.cat(filename)
def remove(self, filename):
@@ -161,7 +161,7 @@ class Journal(object):
self.fs.remove(new)
self.new_files.remove(new)
elif deleted in self.deleted_files:
- raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), filename))
+ raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), filename)
else:
self.fs.overwrite_file(deleted, '')
self.deleted_files.add(deleted)
diff --git a/larch/nodestore.py b/larch/nodestore.py
index 82bcfbc..fb0abe0 100644
--- a/larch/nodestore.py
+++ b/larch/nodestore.py
@@ -21,9 +21,14 @@ class NodeMissing(larch.Error):
'''A node cannot be found from a NodeStore.'''
- def __init__(self, node_store, node_id):
- self.msg = ('Node %d cannot be found in the node store %s' %
- (node_id, node_store))
+ def __init__(self, node_store, node_id, error=None):
+ if error is None:
+ error_msg = ''
+ else:
+ error_msg = (': %s: %s: %s' %
+ (error.errno, error.strerror, error.filename))
+ self.msg = ('Node %d cannot be found in the node store %s%s' %
+ (node_id, node_store, error_msg))
class NodeTooBig(larch.Error):
diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py
index 11f48b5..197a411 100644
--- a/larch/nodestore_disk.py
+++ b/larch/nodestore_disk.py
@@ -20,6 +20,7 @@ import os
import StringIO
import struct
import tempfile
+import traceback
import tracing
import larch
@@ -232,9 +233,10 @@ class NodeStoreDisk(larch.NodeStore):
try:
encoded = self.journal.cat(name)
except (IOError, OSError), e:
- logging.error('Error reading node: %s: %s: %s' %
+ logging.debug('Error reading node: %s: %s: %s' %
(e.errno, e.strerror, e.filename or name))
- raise larch.NodeMissing(self.dirname, node_id)
+ logging.debug(traceback.format_exc())
+ raise larch.NodeMissing(self.dirname, node_id, error=e)
else:
node = self.codec.decode(encoded)
node.frozen = True