summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-06-28 08:29:13 +1200
committerLars Wirzenius <liw@liw.fi>2010-06-28 08:29:13 +1200
commit4015e7eb19bf45fe97656d433e867f42487b9330 (patch)
tree3a1a641359d390cbbd38898f2e4d8bdb57af34d9
parentba92b68dccdd2ef5192e6ec49b079b0656f9c7e2 (diff)
downloadobnam-4015e7eb19bf45fe97656d433e867f42487b9330.tar.gz
Change how directory contents are stored, for higher speed.
Previously, the contents of a directory was stored using key(dirname, DIR_CONTENTS, counter)=namehash, where counter was increased for each new entry, and namehash was the 64-bit hash of the child's pathname. The problem this has is that when listing the contents of a directory, we have to look up the pathname for each child, and that's pretty slow. Now we store the contents of the directory as key(dirname, DIR_CONTENTS, namehash)=basename, so that listdir can be done with a single range query. This does not handle hash collisions, but that'll be doable, we just need to make sure we find and store the right namehash when inserting the value, and possibly when accessing the child as well.
-rw-r--r--obnamlib/store.py33
1 files changed, 7 insertions, 26 deletions
diff --git a/obnamlib/store.py b/obnamlib/store.py
index 7751409d..a799dc4b 100644
--- a/obnamlib/store.py
+++ b/obnamlib/store.py
@@ -278,10 +278,6 @@ class GenerationStore(StoreTree):
return struct.pack(fmt, mainhash, subtype, subkey)
- def unkey_int(self, key):
- '''Split a key into its components, with subkey being an integer.'''
- return struct.unpack('!8sBQ', key)
-
def key(self, mainkey, subtype, subkey):
'''Compute a full key.
@@ -380,14 +376,9 @@ class GenerationStore(StoreTree):
# Also remove from parent's contents.
parent = os.path.dirname(filename)
if parent != filename: # root dir is its own parent
- h = self.hash_name(filename)
- minkey = self.key(parent, self.DIR_CONTENTS, 0)
- maxkey = self.key(parent, self.DIR_CONTENTS, self.SUBKEY_MAX)
- pairs = self.curgen.lookup_range(minkey, maxkey)
- for key, value in pairs:
- if value == h:
- self.curgen.remove(key)
- break
+ subkey = self.hash_name(filename)
+ key = self.key(parent, self.DIR_CONTENTS, subkey)
+ self.curgen.remove_range(key, key)
def create(self, filename, metadata):
self.set_metadata(filename, metadata)
@@ -398,17 +389,9 @@ class GenerationStore(StoreTree):
basename = os.path.basename(filename)
genid = self.get_generation_id(self.curgen)
if basename not in self.listdir(genid, parent):
- minkey = self.key(parent, self.DIR_CONTENTS, 0)
- maxkey = self.key(parent, self.DIR_CONTENTS, self.SUBKEY_MAX)
- pairs = self.curgen.lookup_range(minkey, maxkey)
- if pairs:
- a, b, maxindex = self.unkey_int(pairs[-1][0])
- index = maxindex + 1
- else:
- index = 0
- key = self.key(parent, self.DIR_CONTENTS, index)
- h = self.hash_name(filename)
- self.curgen.insert(key, h)
+ subkey = self.hash_name(filename)
+ key = self.key(parent, self.DIR_CONTENTS, subkey)
+ self.curgen.insert(key, basename)
def get_metadata(self, genid, filename):
tree = self.find_generation(genid)
@@ -429,9 +412,7 @@ class GenerationStore(StoreTree):
maxkey = self.key(dirname, self.DIR_CONTENTS, self.SUBKEY_MAX)
basenames = []
for key, value in tree.lookup_range(minkey, maxkey):
- namekey = self.hashkey(value, self.FILE, self.FILE_NAME)
- pathname = tree.lookup(namekey)
- basenames.append(os.path.basename(pathname))
+ basenames.append(value)
return basenames
def get_file_chunks(self, genid, filename):