summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@xander>2010-12-26 15:44:27 +0200
committerLars Wirzenius <liw@xander>2010-12-26 15:44:27 +0200
commit11ae2fd0ec6e544715c8507e4902295d3828dbc6 (patch)
tree7df2cf0dfe20e99e9ded58309df5aa057ea5ab1a
parent9039da687cc9d9dda3eb4fd46c23ec11b01571c8 (diff)
downloadobnam-11ae2fd0ec6e544715c8507e4902295d3828dbc6.tar.gz
Store many chunkd ids per key.
-rw-r--r--obnamlib/clientmetadatatree.py65
1 files changed, 40 insertions, 25 deletions
diff --git a/obnamlib/clientmetadatatree.py b/obnamlib/clientmetadatatree.py
index 16a7f780..8fc01782 100644
--- a/obnamlib/clientmetadatatree.py
+++ b/obnamlib/clientmetadatatree.py
@@ -15,6 +15,7 @@
import hashlib
+import logging
import os
import struct
import time
@@ -70,6 +71,8 @@ class ClientMetadataTree(obnamlib.StoreTree):
node_size, upload_queue_size, lru_size)
self.genhash = self.hash_name('generation')
self.known_generations = dict()
+ self.chunkids_per_key = max(1,
+ int(node_size / 4 / struct.calcsize('Q')))
def hash_name(self, filename):
'''Return hash of filename suitable for use as main key.'''
@@ -297,40 +300,52 @@ class ClientMetadataTree(obnamlib.StoreTree):
minkey = self.fskey(file_id, self.FILE_CHUNKS, 0)
maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX)
pairs = tree.lookup_range(minkey, maxkey)
- return [struct.unpack('!Q', value)[0]
- for key, value in pairs]
-
+ chunkids = []
+ for key, value in pairs:
+ chunkids.extend(self._decode_chunks(value))
+ return chunkids
+
+ def _encode_chunks(self, chunkids):
+ fmt = '!' + ('Q' * len(chunkids))
+ return struct.pack(fmt, *chunkids)
+
+ def _decode_chunks(self, encoded):
+ size = struct.calcsize('Q')
+ count = len(encoded) / size
+ fmt = '!' + ('Q' * count)
+ return struct.unpack(fmt, encoded)
+
+ def _insert_chunks(self, tree, file_id, i, chunkids):
+ key = self.fskey(file_id, self.FILE_CHUNKS, i)
+ encoded = self._encode_chunks(chunkids)
+ tree.insert(key, encoded)
+
def set_file_chunks(self, filename, chunkids):
file_id = self.get_file_id(self.tree, filename)
minkey = self.fskey(file_id, self.FILE_CHUNKS, 0)
maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX)
- old_chunks = set(struct.unpack('!Q', v)[0]
- for k,v in self.tree.lookup_range(minkey, maxkey))
+
+ for key, value in self.tree.lookup_range(minkey, maxkey):
+ for chunkid in self._decode_chunks(value):
+ k = self.chunk_key(chunkid, file_id)
+ self.tree.remove_range(k, k)
+
self.tree.remove_range(minkey, maxkey)
- for i, chunkid in enumerate(chunkids):
- key = self.fskey(file_id, self.FILE_CHUNKS, i)
- self.tree.insert(key, struct.pack('!Q', chunkid))
- if chunkid not in old_chunks:
- self.tree.insert(self.chunk_key(chunkid, file_id), '')
- else:
- old_chunks.remove(chunkid)
- lenkey = self.fskey(file_id, self.FILE_NUM_CHUNKS, 0)
- self._insert_int(self.tree, lenkey, len(chunkids))
- for chunkid in old_chunks:
- key = self.chunk_key(chunkid, file_id)
- self.tree.remove_range(key, key)
+
+ self.append_file_chunks(filename, chunkids)
def append_file_chunks(self, filename, chunkids):
file_id = self.get_file_id(self.tree, filename)
- minkey = self.fskey(file_id, self.FILE_CHUNKS, 0)
- maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX)
lenkey = self.fskey(file_id, self.FILE_NUM_CHUNKS, 0)
- n = self._lookup_int(self.tree, lenkey, 0)
- for i, chunkid in enumerate(chunkids, n):
- key = self.fskey(file_id, self.FILE_CHUNKS, i)
- self.tree.insert(key, struct.pack('!Q', chunkid))
- self.tree.insert(self.chunk_key(chunkid, file_id), '')
- self._insert_int(self.tree, lenkey, n + len(chunkids))
+ i = self._lookup_int(self.tree, lenkey, 0)
+ while chunkids:
+ some = chunkids[:self.chunkids_per_key]
+ self._insert_chunks(self.tree, file_id, i, some)
+ for chunkid in some:
+ self.tree.insert(self.chunk_key(chunkid, file_id), '')
+ i += 1
+ chunkids = chunkids[self.chunkids_per_key:]
+ self._insert_int(self.tree, lenkey, i)
def chunk_in_use(self, gen_id, chunk_id):
'''Is a chunk used by a generation?'''