diff options
author | Lars Wirzenius <liw@xander> | 2010-12-26 16:27:11 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@xander> | 2010-12-26 16:27:11 +0200 |
commit | 0d7b4ae7b46f2e4ec5d3b64fe801de6015891665 (patch) | |
tree | 3c0c9e54c72099ab4b35e3bfd8a498e2cacd322c | |
parent | 8ca2e7d0b0947fe5ad1f8029424a4605e1c71b63 (diff) | |
parent | 1bcd6d492709bcd3a8051474da175923aca64c62 (diff) | |
download | obnam-0d7b4ae7b46f2e4ec5d3b64fe801de6015891665.tar.gz |
merge from trunk
-rw-r--r-- | obnamlib/__init__.py | 1 | ||||
-rw-r--r-- | obnamlib/clientmetadatatree.py | 68 | ||||
-rw-r--r-- | obnamlib/clientmetadatatree_tests.py | 26 | ||||
-rw-r--r-- | obnamlib/plugins/backup_plugin.py | 10 | ||||
-rw-r--r-- | obnamlib/store.py | 10 | ||||
-rw-r--r-- | obnamlib/store_tests.py | 11 |
6 files changed, 107 insertions, 19 deletions
diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py index bded5ef4..11b2a805 100644 --- a/obnamlib/__init__.py +++ b/obnamlib/__init__.py @@ -30,6 +30,7 @@ DEFAULT_NODE_SIZE = 64 * 1024 DEFAULT_CHUNK_SIZE = 64 * 1024 DEFAULT_UPLOAD_QUEUE_SIZE = 1024 DEFAULT_LRU_SIZE = 10 * 1000 +DEFAULT_CHUNKIDS_PER_GROUP = 1024 # Maximum identifier for clients, chunks, files, etc. This is the largest # unsigned 64-bit value. In various places we assume 64-bit field sizes diff --git a/obnamlib/clientmetadatatree.py b/obnamlib/clientmetadatatree.py index e415545b..8fc01782 100644 --- a/obnamlib/clientmetadatatree.py +++ b/obnamlib/clientmetadatatree.py @@ -15,6 +15,7 @@ import hashlib +import logging import os import struct import time @@ -39,6 +40,7 @@ class ClientMetadataTree(obnamlib.StoreTree): PREFIX_FS_META = 0 # prefix FILE_NAME = 0 # subkey type for storing pathnames FILE_CHUNKS = 1 # subkey type for list of chunks + FILE_NUM_CHUNKS = 2 # subkey type for length of list of chunks FILE_METADATA = 3 # subkey type for inode fields, etc DIR_CONTENTS = 4 # subkey type for list of directory contents @@ -69,6 +71,8 @@ class ClientMetadataTree(obnamlib.StoreTree): node_size, upload_queue_size, lru_size) self.genhash = self.hash_name('generation') self.known_generations = dict() + self.chunkids_per_key = max(1, + int(node_size / 4 / struct.calcsize('Q'))) def hash_name(self, filename): '''Return hash of filename suitable for use as main key.''' @@ -135,8 +139,14 @@ class ClientMetadataTree(obnamlib.StoreTree): return self.hash_name(pathname) - def _lookup_int(self, tree, key): - return struct.unpack('!Q', tree.lookup(key))[0] + def _lookup_int(self, tree, key, default=None): + if default is None: + return struct.unpack('!Q', tree.lookup(key))[0] + else: + try: + return struct.unpack('!Q', tree.lookup(key))[0] + except KeyError: + return default def _insert_int(self, tree, key, value): return tree.insert(key, struct.pack('!Q', value)) @@ -290,26 +300,52 @@ class ClientMetadataTree(obnamlib.StoreTree): minkey = self.fskey(file_id, self.FILE_CHUNKS, 0) maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX) pairs = tree.lookup_range(minkey, maxkey) - return [struct.unpack('!Q', value)[0] - for key, value in pairs] - + chunkids = [] + for key, value in pairs: + chunkids.extend(self._decode_chunks(value)) + return chunkids + + def _encode_chunks(self, chunkids): + fmt = '!' + ('Q' * len(chunkids)) + return struct.pack(fmt, *chunkids) + + def _decode_chunks(self, encoded): + size = struct.calcsize('Q') + count = len(encoded) / size + fmt = '!' + ('Q' * count) + return struct.unpack(fmt, encoded) + + def _insert_chunks(self, tree, file_id, i, chunkids): + key = self.fskey(file_id, self.FILE_CHUNKS, i) + encoded = self._encode_chunks(chunkids) + tree.insert(key, encoded) + def set_file_chunks(self, filename, chunkids): file_id = self.get_file_id(self.tree, filename) minkey = self.fskey(file_id, self.FILE_CHUNKS, 0) maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX) - old_chunks = set(struct.unpack('!Q', v)[0] - for k,v in self.tree.lookup_range(minkey, maxkey)) + + for key, value in self.tree.lookup_range(minkey, maxkey): + for chunkid in self._decode_chunks(value): + k = self.chunk_key(chunkid, file_id) + self.tree.remove_range(k, k) + self.tree.remove_range(minkey, maxkey) - for i, chunkid in enumerate(chunkids): - key = self.fskey(file_id, self.FILE_CHUNKS, i) - self.tree.insert(key, struct.pack('!Q', chunkid)) - if chunkid not in old_chunks: + + self.append_file_chunks(filename, chunkids) + + def append_file_chunks(self, filename, chunkids): + file_id = self.get_file_id(self.tree, filename) + lenkey = self.fskey(file_id, self.FILE_NUM_CHUNKS, 0) + i = self._lookup_int(self.tree, lenkey, 0) + while chunkids: + some = chunkids[:self.chunkids_per_key] + self._insert_chunks(self.tree, file_id, i, some) + for chunkid in some: self.tree.insert(self.chunk_key(chunkid, file_id), '') - else: - old_chunks.remove(chunkid) - for chunkid in old_chunks: - key = self.chunk_key(chunkid, file_id) - self.tree.remove_range(key, key) + i += 1 + chunkids = chunkids[self.chunkids_per_key:] + self._insert_int(self.tree, lenkey, i) def chunk_in_use(self, gen_id, chunk_id): '''Is a chunk used by a generation?''' diff --git a/obnamlib/clientmetadatatree_tests.py b/obnamlib/clientmetadatatree_tests.py index 8f7d990f..7eb6f366 100644 --- a/obnamlib/clientmetadatatree_tests.py +++ b/obnamlib/clientmetadatatree_tests.py @@ -232,6 +232,32 @@ class ClientMetadataTreeFileOpsTests(unittest.TestCase): self.client.set_file_chunks('/foo', [1, 2, 3]) self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'), [1, 2, 3]) + + def test_appends_file_chunks_to_empty_list(self): + self.client.append_file_chunks('/foo', [1, 2, 3]) + self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'), + [1, 2, 3]) + + def test_appends_file_chunks_to_nonempty_list(self): + self.client.set_file_chunks('/foo', [1, 2, 3]) + self.client.append_file_chunks('/foo', [4, 5, 6]) + self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'), + [1, 2, 3, 4, 5, 6]) + + def test_generation_has_no_chunk_refs_initially(self): + minkey = self.client.chunk_key(0, 0) + maxkey = self.client.chunk_key(obnamlib.MAX_ID, obnamlib.MAX_ID) + self.assertEqual(self.client.tree.lookup_range(minkey, maxkey), []) + + def test_generation_has_no_chunk_refs_initially(self): + minkey = self.client.chunk_key(0, 0) + maxkey = self.client.chunk_key(obnamlib.MAX_ID, obnamlib.MAX_ID) + self.assertEqual(self.client.tree.lookup_range(minkey, maxkey), []) + + def test_sets_file_chunks(self): + self.client.set_file_chunks('/foo', [1, 2, 3]) + self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'), + [1, 2, 3]) def test_generation_has_no_chunk_refs_initially(self): minkey = self.client.chunk_key(0, 0) diff --git a/obnamlib/plugins/backup_plugin.py b/obnamlib/plugins/backup_plugin.py index db3f21b1..cddae53b 100644 --- a/obnamlib/plugins/backup_plugin.py +++ b/obnamlib/plugins/backup_plugin.py @@ -247,19 +247,23 @@ class BackupPlugin(obnamlib.ObnamPlugin): def backup_file_contents(self, filename): '''Back up contents of a regular file.''' logging.debug('backup_file_contents: %s' % filename) - chunkids = [] + self.store.set_file_chunks(filename, []) f = self.fs.open(filename, 'r') chunk_size = int(self.app.config['chunk-size']) + chunkids = [] while True: data = f.read(chunk_size) if not data: break chunkids.append(self.backup_file_chunk(data)) + if len(chunkids) >= obnamlib.DEFAULT_CHUNKIDS_PER_GROUP: + self.store.append_file_chunks(filename, chunkids) + chunkids = [] self.app.hooks.call('progress-data-uploaded', len(data)) f.close() + if chunkids: + self.store.append_file_chunks(filename, chunkids) - self.store.set_file_chunks(filename, chunkids) - def backup_file_chunk(self, data): '''Back up a chunk of data by putting it into the store.''' checksum = self.store.checksum(data) diff --git a/obnamlib/store.py b/obnamlib/store.py index b9eb0a9f..6fd4a1ce 100644 --- a/obnamlib/store.py +++ b/obnamlib/store.py @@ -558,6 +558,16 @@ class Store(object): self.client.set_file_chunks(filename, chunkids) + @require_started_generation + def append_file_chunks(self, filename, chunkids): + '''Append to list of ids of chunks belonging to a file. + + File must be in the started generation. + + ''' + + self.client.append_file_chunks(filename, chunkids) + @require_open_client def genspec(self, spec): '''Interpret a generation specification.''' diff --git a/obnamlib/store_tests.py b/obnamlib/store_tests.py index 4128668d..a2e1fe9a 100644 --- a/obnamlib/store_tests.py +++ b/obnamlib/store_tests.py @@ -577,6 +577,17 @@ class StoreGetSetChunksTests(unittest.TestCase): chunkids = self.store.get_file_chunks(self.gen, '/foo') self.assertEqual(sorted(chunkids), [1, 2]) + def test_appends_chunks_to_empty_list(self): + self.store.append_file_chunks('/foo', [1, 2]) + chunkids = self.store.get_file_chunks(self.gen, '/foo') + self.assertEqual(sorted(chunkids), [1, 2]) + + def test_appends_chunks_to_nonempty_list(self): + self.store.append_file_chunks('/foo', [1, 2]) + self.store.append_file_chunks('/foo', [3, 4]) + chunkids = self.store.get_file_chunks(self.gen, '/foo') + self.assertEqual(sorted(chunkids), [1, 2, 3, 4]) + class StoreGenspecTests(unittest.TestCase): |