diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-06-28 08:29:13 +1200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-06-28 08:29:13 +1200 |
commit | 4015e7eb19bf45fe97656d433e867f42487b9330 (patch) | |
tree | 3a1a641359d390cbbd38898f2e4d8bdb57af34d9 | |
parent | ba92b68dccdd2ef5192e6ec49b079b0656f9c7e2 (diff) | |
download | obnam-4015e7eb19bf45fe97656d433e867f42487b9330.tar.gz |
Change how directory contents are stored, for higher speed.
Previously, the contents of a directory was stored using
key(dirname, DIR_CONTENTS, counter)=namehash, where counter
was increased for each new entry, and namehash was the 64-bit
hash of the child's pathname. The problem this has is that
when listing the contents of a directory, we have to look
up the pathname for each child, and that's pretty slow.
Now we store the contents of the directory as
key(dirname, DIR_CONTENTS, namehash)=basename, so that listdir
can be done with a single range query.
This does not handle hash collisions, but that'll be doable,
we just need to make sure we find and store the right namehash
when inserting the value, and possibly when accessing the
child as well.
-rw-r--r-- | obnamlib/store.py | 33 |
1 files changed, 7 insertions, 26 deletions
diff --git a/obnamlib/store.py b/obnamlib/store.py index 7751409d..a799dc4b 100644 --- a/obnamlib/store.py +++ b/obnamlib/store.py @@ -278,10 +278,6 @@ class GenerationStore(StoreTree): return struct.pack(fmt, mainhash, subtype, subkey) - def unkey_int(self, key): - '''Split a key into its components, with subkey being an integer.''' - return struct.unpack('!8sBQ', key) - def key(self, mainkey, subtype, subkey): '''Compute a full key. @@ -380,14 +376,9 @@ class GenerationStore(StoreTree): # Also remove from parent's contents. parent = os.path.dirname(filename) if parent != filename: # root dir is its own parent - h = self.hash_name(filename) - minkey = self.key(parent, self.DIR_CONTENTS, 0) - maxkey = self.key(parent, self.DIR_CONTENTS, self.SUBKEY_MAX) - pairs = self.curgen.lookup_range(minkey, maxkey) - for key, value in pairs: - if value == h: - self.curgen.remove(key) - break + subkey = self.hash_name(filename) + key = self.key(parent, self.DIR_CONTENTS, subkey) + self.curgen.remove_range(key, key) def create(self, filename, metadata): self.set_metadata(filename, metadata) @@ -398,17 +389,9 @@ class GenerationStore(StoreTree): basename = os.path.basename(filename) genid = self.get_generation_id(self.curgen) if basename not in self.listdir(genid, parent): - minkey = self.key(parent, self.DIR_CONTENTS, 0) - maxkey = self.key(parent, self.DIR_CONTENTS, self.SUBKEY_MAX) - pairs = self.curgen.lookup_range(minkey, maxkey) - if pairs: - a, b, maxindex = self.unkey_int(pairs[-1][0]) - index = maxindex + 1 - else: - index = 0 - key = self.key(parent, self.DIR_CONTENTS, index) - h = self.hash_name(filename) - self.curgen.insert(key, h) + subkey = self.hash_name(filename) + key = self.key(parent, self.DIR_CONTENTS, subkey) + self.curgen.insert(key, basename) def get_metadata(self, genid, filename): tree = self.find_generation(genid) @@ -429,9 +412,7 @@ class GenerationStore(StoreTree): maxkey = self.key(dirname, self.DIR_CONTENTS, self.SUBKEY_MAX) basenames = [] for key, value in tree.lookup_range(minkey, maxkey): - namekey = self.hashkey(value, self.FILE, self.FILE_NAME) - pathname = tree.lookup(namekey) - basenames.append(os.path.basename(pathname)) + basenames.append(value) return basenames def get_file_chunks(self, genid, filename): |