diff options
author | Lars Wirzenius <liw@xander> | 2010-12-26 15:44:27 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@xander> | 2010-12-26 15:44:27 +0200 |
commit | 11ae2fd0ec6e544715c8507e4902295d3828dbc6 (patch) | |
tree | 7df2cf0dfe20e99e9ded58309df5aa057ea5ab1a | |
parent | 9039da687cc9d9dda3eb4fd46c23ec11b01571c8 (diff) | |
download | obnam-11ae2fd0ec6e544715c8507e4902295d3828dbc6.tar.gz |
Store many chunkd ids per key.
-rw-r--r-- | obnamlib/clientmetadatatree.py | 65 |
1 files changed, 40 insertions, 25 deletions
diff --git a/obnamlib/clientmetadatatree.py b/obnamlib/clientmetadatatree.py index 16a7f780..8fc01782 100644 --- a/obnamlib/clientmetadatatree.py +++ b/obnamlib/clientmetadatatree.py @@ -15,6 +15,7 @@ import hashlib +import logging import os import struct import time @@ -70,6 +71,8 @@ class ClientMetadataTree(obnamlib.StoreTree): node_size, upload_queue_size, lru_size) self.genhash = self.hash_name('generation') self.known_generations = dict() + self.chunkids_per_key = max(1, + int(node_size / 4 / struct.calcsize('Q'))) def hash_name(self, filename): '''Return hash of filename suitable for use as main key.''' @@ -297,40 +300,52 @@ class ClientMetadataTree(obnamlib.StoreTree): minkey = self.fskey(file_id, self.FILE_CHUNKS, 0) maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX) pairs = tree.lookup_range(minkey, maxkey) - return [struct.unpack('!Q', value)[0] - for key, value in pairs] - + chunkids = [] + for key, value in pairs: + chunkids.extend(self._decode_chunks(value)) + return chunkids + + def _encode_chunks(self, chunkids): + fmt = '!' + ('Q' * len(chunkids)) + return struct.pack(fmt, *chunkids) + + def _decode_chunks(self, encoded): + size = struct.calcsize('Q') + count = len(encoded) / size + fmt = '!' + ('Q' * count) + return struct.unpack(fmt, encoded) + + def _insert_chunks(self, tree, file_id, i, chunkids): + key = self.fskey(file_id, self.FILE_CHUNKS, i) + encoded = self._encode_chunks(chunkids) + tree.insert(key, encoded) + def set_file_chunks(self, filename, chunkids): file_id = self.get_file_id(self.tree, filename) minkey = self.fskey(file_id, self.FILE_CHUNKS, 0) maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX) - old_chunks = set(struct.unpack('!Q', v)[0] - for k,v in self.tree.lookup_range(minkey, maxkey)) + + for key, value in self.tree.lookup_range(minkey, maxkey): + for chunkid in self._decode_chunks(value): + k = self.chunk_key(chunkid, file_id) + self.tree.remove_range(k, k) + self.tree.remove_range(minkey, maxkey) - for i, chunkid in enumerate(chunkids): - key = self.fskey(file_id, self.FILE_CHUNKS, i) - self.tree.insert(key, struct.pack('!Q', chunkid)) - if chunkid not in old_chunks: - self.tree.insert(self.chunk_key(chunkid, file_id), '') - else: - old_chunks.remove(chunkid) - lenkey = self.fskey(file_id, self.FILE_NUM_CHUNKS, 0) - self._insert_int(self.tree, lenkey, len(chunkids)) - for chunkid in old_chunks: - key = self.chunk_key(chunkid, file_id) - self.tree.remove_range(key, key) + + self.append_file_chunks(filename, chunkids) def append_file_chunks(self, filename, chunkids): file_id = self.get_file_id(self.tree, filename) - minkey = self.fskey(file_id, self.FILE_CHUNKS, 0) - maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX) lenkey = self.fskey(file_id, self.FILE_NUM_CHUNKS, 0) - n = self._lookup_int(self.tree, lenkey, 0) - for i, chunkid in enumerate(chunkids, n): - key = self.fskey(file_id, self.FILE_CHUNKS, i) - self.tree.insert(key, struct.pack('!Q', chunkid)) - self.tree.insert(self.chunk_key(chunkid, file_id), '') - self._insert_int(self.tree, lenkey, n + len(chunkids)) + i = self._lookup_int(self.tree, lenkey, 0) + while chunkids: + some = chunkids[:self.chunkids_per_key] + self._insert_chunks(self.tree, file_id, i, some) + for chunkid in some: + self.tree.insert(self.chunk_key(chunkid, file_id), '') + i += 1 + chunkids = chunkids[self.chunkids_per_key:] + self._insert_int(self.tree, lenkey, i) def chunk_in_use(self, gen_id, chunk_id): '''Is a chunk used by a generation?''' |