summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@xander>2010-12-26 16:27:11 +0200
committerLars Wirzenius <liw@xander>2010-12-26 16:27:11 +0200
commit0d7b4ae7b46f2e4ec5d3b64fe801de6015891665 (patch)
tree3c0c9e54c72099ab4b35e3bfd8a498e2cacd322c
parent8ca2e7d0b0947fe5ad1f8029424a4605e1c71b63 (diff)
parent1bcd6d492709bcd3a8051474da175923aca64c62 (diff)
downloadobnam-0d7b4ae7b46f2e4ec5d3b64fe801de6015891665.tar.gz
merge from trunk
-rw-r--r--obnamlib/__init__.py1
-rw-r--r--obnamlib/clientmetadatatree.py68
-rw-r--r--obnamlib/clientmetadatatree_tests.py26
-rw-r--r--obnamlib/plugins/backup_plugin.py10
-rw-r--r--obnamlib/store.py10
-rw-r--r--obnamlib/store_tests.py11
6 files changed, 107 insertions, 19 deletions
diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py
index bded5ef4..11b2a805 100644
--- a/obnamlib/__init__.py
+++ b/obnamlib/__init__.py
@@ -30,6 +30,7 @@ DEFAULT_NODE_SIZE = 64 * 1024
DEFAULT_CHUNK_SIZE = 64 * 1024
DEFAULT_UPLOAD_QUEUE_SIZE = 1024
DEFAULT_LRU_SIZE = 10 * 1000
+DEFAULT_CHUNKIDS_PER_GROUP = 1024
# Maximum identifier for clients, chunks, files, etc. This is the largest
# unsigned 64-bit value. In various places we assume 64-bit field sizes
diff --git a/obnamlib/clientmetadatatree.py b/obnamlib/clientmetadatatree.py
index e415545b..8fc01782 100644
--- a/obnamlib/clientmetadatatree.py
+++ b/obnamlib/clientmetadatatree.py
@@ -15,6 +15,7 @@
import hashlib
+import logging
import os
import struct
import time
@@ -39,6 +40,7 @@ class ClientMetadataTree(obnamlib.StoreTree):
PREFIX_FS_META = 0 # prefix
FILE_NAME = 0 # subkey type for storing pathnames
FILE_CHUNKS = 1 # subkey type for list of chunks
+ FILE_NUM_CHUNKS = 2 # subkey type for length of list of chunks
FILE_METADATA = 3 # subkey type for inode fields, etc
DIR_CONTENTS = 4 # subkey type for list of directory contents
@@ -69,6 +71,8 @@ class ClientMetadataTree(obnamlib.StoreTree):
node_size, upload_queue_size, lru_size)
self.genhash = self.hash_name('generation')
self.known_generations = dict()
+ self.chunkids_per_key = max(1,
+ int(node_size / 4 / struct.calcsize('Q')))
def hash_name(self, filename):
'''Return hash of filename suitable for use as main key.'''
@@ -135,8 +139,14 @@ class ClientMetadataTree(obnamlib.StoreTree):
return self.hash_name(pathname)
- def _lookup_int(self, tree, key):
- return struct.unpack('!Q', tree.lookup(key))[0]
+ def _lookup_int(self, tree, key, default=None):
+ if default is None:
+ return struct.unpack('!Q', tree.lookup(key))[0]
+ else:
+ try:
+ return struct.unpack('!Q', tree.lookup(key))[0]
+ except KeyError:
+ return default
def _insert_int(self, tree, key, value):
return tree.insert(key, struct.pack('!Q', value))
@@ -290,26 +300,52 @@ class ClientMetadataTree(obnamlib.StoreTree):
minkey = self.fskey(file_id, self.FILE_CHUNKS, 0)
maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX)
pairs = tree.lookup_range(minkey, maxkey)
- return [struct.unpack('!Q', value)[0]
- for key, value in pairs]
-
+ chunkids = []
+ for key, value in pairs:
+ chunkids.extend(self._decode_chunks(value))
+ return chunkids
+
+ def _encode_chunks(self, chunkids):
+ fmt = '!' + ('Q' * len(chunkids))
+ return struct.pack(fmt, *chunkids)
+
+ def _decode_chunks(self, encoded):
+ size = struct.calcsize('Q')
+ count = len(encoded) / size
+ fmt = '!' + ('Q' * count)
+ return struct.unpack(fmt, encoded)
+
+ def _insert_chunks(self, tree, file_id, i, chunkids):
+ key = self.fskey(file_id, self.FILE_CHUNKS, i)
+ encoded = self._encode_chunks(chunkids)
+ tree.insert(key, encoded)
+
def set_file_chunks(self, filename, chunkids):
file_id = self.get_file_id(self.tree, filename)
minkey = self.fskey(file_id, self.FILE_CHUNKS, 0)
maxkey = self.fskey(file_id, self.FILE_CHUNKS, self.SUBKEY_MAX)
- old_chunks = set(struct.unpack('!Q', v)[0]
- for k,v in self.tree.lookup_range(minkey, maxkey))
+
+ for key, value in self.tree.lookup_range(minkey, maxkey):
+ for chunkid in self._decode_chunks(value):
+ k = self.chunk_key(chunkid, file_id)
+ self.tree.remove_range(k, k)
+
self.tree.remove_range(minkey, maxkey)
- for i, chunkid in enumerate(chunkids):
- key = self.fskey(file_id, self.FILE_CHUNKS, i)
- self.tree.insert(key, struct.pack('!Q', chunkid))
- if chunkid not in old_chunks:
+
+ self.append_file_chunks(filename, chunkids)
+
+ def append_file_chunks(self, filename, chunkids):
+ file_id = self.get_file_id(self.tree, filename)
+ lenkey = self.fskey(file_id, self.FILE_NUM_CHUNKS, 0)
+ i = self._lookup_int(self.tree, lenkey, 0)
+ while chunkids:
+ some = chunkids[:self.chunkids_per_key]
+ self._insert_chunks(self.tree, file_id, i, some)
+ for chunkid in some:
self.tree.insert(self.chunk_key(chunkid, file_id), '')
- else:
- old_chunks.remove(chunkid)
- for chunkid in old_chunks:
- key = self.chunk_key(chunkid, file_id)
- self.tree.remove_range(key, key)
+ i += 1
+ chunkids = chunkids[self.chunkids_per_key:]
+ self._insert_int(self.tree, lenkey, i)
def chunk_in_use(self, gen_id, chunk_id):
'''Is a chunk used by a generation?'''
diff --git a/obnamlib/clientmetadatatree_tests.py b/obnamlib/clientmetadatatree_tests.py
index 8f7d990f..7eb6f366 100644
--- a/obnamlib/clientmetadatatree_tests.py
+++ b/obnamlib/clientmetadatatree_tests.py
@@ -232,6 +232,32 @@ class ClientMetadataTreeFileOpsTests(unittest.TestCase):
self.client.set_file_chunks('/foo', [1, 2, 3])
self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'),
[1, 2, 3])
+
+ def test_appends_file_chunks_to_empty_list(self):
+ self.client.append_file_chunks('/foo', [1, 2, 3])
+ self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'),
+ [1, 2, 3])
+
+ def test_appends_file_chunks_to_nonempty_list(self):
+ self.client.set_file_chunks('/foo', [1, 2, 3])
+ self.client.append_file_chunks('/foo', [4, 5, 6])
+ self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'),
+ [1, 2, 3, 4, 5, 6])
+
+ def test_generation_has_no_chunk_refs_initially(self):
+ minkey = self.client.chunk_key(0, 0)
+ maxkey = self.client.chunk_key(obnamlib.MAX_ID, obnamlib.MAX_ID)
+ self.assertEqual(self.client.tree.lookup_range(minkey, maxkey), [])
+
+ def test_generation_has_no_chunk_refs_initially(self):
+ minkey = self.client.chunk_key(0, 0)
+ maxkey = self.client.chunk_key(obnamlib.MAX_ID, obnamlib.MAX_ID)
+ self.assertEqual(self.client.tree.lookup_range(minkey, maxkey), [])
+
+ def test_sets_file_chunks(self):
+ self.client.set_file_chunks('/foo', [1, 2, 3])
+ self.assertEqual(self.client.get_file_chunks(self.clientid, '/foo'),
+ [1, 2, 3])
def test_generation_has_no_chunk_refs_initially(self):
minkey = self.client.chunk_key(0, 0)
diff --git a/obnamlib/plugins/backup_plugin.py b/obnamlib/plugins/backup_plugin.py
index db3f21b1..cddae53b 100644
--- a/obnamlib/plugins/backup_plugin.py
+++ b/obnamlib/plugins/backup_plugin.py
@@ -247,19 +247,23 @@ class BackupPlugin(obnamlib.ObnamPlugin):
def backup_file_contents(self, filename):
'''Back up contents of a regular file.'''
logging.debug('backup_file_contents: %s' % filename)
- chunkids = []
+ self.store.set_file_chunks(filename, [])
f = self.fs.open(filename, 'r')
chunk_size = int(self.app.config['chunk-size'])
+ chunkids = []
while True:
data = f.read(chunk_size)
if not data:
break
chunkids.append(self.backup_file_chunk(data))
+ if len(chunkids) >= obnamlib.DEFAULT_CHUNKIDS_PER_GROUP:
+ self.store.append_file_chunks(filename, chunkids)
+ chunkids = []
self.app.hooks.call('progress-data-uploaded', len(data))
f.close()
+ if chunkids:
+ self.store.append_file_chunks(filename, chunkids)
- self.store.set_file_chunks(filename, chunkids)
-
def backup_file_chunk(self, data):
'''Back up a chunk of data by putting it into the store.'''
checksum = self.store.checksum(data)
diff --git a/obnamlib/store.py b/obnamlib/store.py
index b9eb0a9f..6fd4a1ce 100644
--- a/obnamlib/store.py
+++ b/obnamlib/store.py
@@ -558,6 +558,16 @@ class Store(object):
self.client.set_file_chunks(filename, chunkids)
+ @require_started_generation
+ def append_file_chunks(self, filename, chunkids):
+ '''Append to list of ids of chunks belonging to a file.
+
+ File must be in the started generation.
+
+ '''
+
+ self.client.append_file_chunks(filename, chunkids)
+
@require_open_client
def genspec(self, spec):
'''Interpret a generation specification.'''
diff --git a/obnamlib/store_tests.py b/obnamlib/store_tests.py
index 4128668d..a2e1fe9a 100644
--- a/obnamlib/store_tests.py
+++ b/obnamlib/store_tests.py
@@ -577,6 +577,17 @@ class StoreGetSetChunksTests(unittest.TestCase):
chunkids = self.store.get_file_chunks(self.gen, '/foo')
self.assertEqual(sorted(chunkids), [1, 2])
+ def test_appends_chunks_to_empty_list(self):
+ self.store.append_file_chunks('/foo', [1, 2])
+ chunkids = self.store.get_file_chunks(self.gen, '/foo')
+ self.assertEqual(sorted(chunkids), [1, 2])
+
+ def test_appends_chunks_to_nonempty_list(self):
+ self.store.append_file_chunks('/foo', [1, 2])
+ self.store.append_file_chunks('/foo', [3, 4])
+ chunkids = self.store.get_file_chunks(self.gen, '/foo')
+ self.assertEqual(sorted(chunkids), [1, 2, 3, 4])
+
class StoreGenspecTests(unittest.TestCase):