Rewrite GAChunkStore in terms of BlobStore

This avoids some duplicate code.
author: Lars Wirzenius <liw@liw.fi> 2015-07-19 13:32:54 +0300
committer: Lars Wirzenius <liw@liw.fi> 2015-07-19 13:32:54 +0300
commit: 96f3482abb08e67caebacb65a555fbf0e4d9c2ae (patch)
tree: 639a072cbce5ea935322ca536f827a45a3db9cce
parent: fe42dd953139ffc3bf25ed0a0beea9acbaf5dce6 (diff)
download: obnam-96f3482abb08e67caebacb65a555fbf0e4d9c2ae.tar.gz
1 files changed, 35 insertions, 38 deletions
diff --git a/obnamlib/fmt_ga/chunk_store.py b/obnamlib/fmt_ga/chunk_store.py
index f506f6e7..c0767dc0 100644
--- a/obnamlib/fmt_ga/chunk_store.py
+++ b/obnamlib/fmt_ga/chunk_store.py
@@ -28,65 +28,62 @@ class GAChunkStore(object):
     def __init__(self):
         self._fs = None
         self._dirname = 'chunk-store'
-        self._bag = None
-        self._bag_store = obnamlib.BagStore()
-        self._max_bag_size = None
+        self._max_chunk_size = None
+        self._bag_store = None
+        self._blob_store = None
 
     def set_fs(self, fs):
         self._fs = fs
+
+        self._bag_store = obnamlib.BagStore()
         self._bag_store.set_location(fs, self._dirname)
+        self._blob_store = obnamlib.BlobStore()
+        self._blob_store.set_bag_store(self._bag_store)
+        if self._max_chunk_size is not None:
+            self._blob_store.set_max_bag_size(self._max_chunk_size)
 
     def set_max_chunk_size(self, max_chunk_size):
-        self._max_bag_size = max_chunk_size
+        self._max_chunk_size = max_chunk_size
+        if self._blob_store:
+            self._blob_store.set_max_bag_size(max_chunk_size)
 
     def put_chunk_content(self, content):
         self._fs.create_and_init_toplevel(self._dirname)
-        if self._bag is None:
-            self._bag = self._new_bag()
-        chunk_id = self._bag.append(content)
-        if self._bag_is_big_enough(self._bag):
-            self.flush_chunks()
-        return chunk_id
-
-    def _new_bag(self):
-        bag = obnamlib.Bag()
-        bag.set_id(self._bag_store.reserve_bag_id())
-        return bag
-
-    def _bag_is_big_enough(self, bag):
-        approx_size = sum(len(bag[i]) for i in range(len(bag)))
-        return self._max_bag_size is None or approx_size >= self._max_bag_size
+        return self._blob_store.put_blob(content)
 
     def flush_chunks(self):
-        if self._bag:
-            self._bag_store.put_bag(self._bag)
-            self._bag = None
+        self._blob_store.flush()
 
     def get_chunk_content(self, chunk_id):
-        bag_id, index = obnamlib.parse_object_id(chunk_id)
-        try:
-            bag = self._bag_store.get_bag(bag_id)
-        except (IOError, OSError) as e:
-            if e.errno == errno.ENOENT:
-                raise obnamlib.RepositoryChunkDoesNotExist(
-                    chunk_id=chunk_id,
-                    filename=None)
-            raise
-        return bag[index]
+        content = self._blob_store.get_blob(chunk_id)
+        if content is None:
+            raise obnamlib.RepositoryChunkDoesNotExist(
+                chunk_id=chunk_id,
+                filename=None)
+        return content
 
     def has_chunk(self, chunk_id):
-        bag_id, _ = obnamlib.parse_object_id(chunk_id)
-        return self._bag_store.has_bag(bag_id)
+        # This is ugly, 'cause it requires reading in the whole bag.
+        # We could easily check if the bag exists, but not whether it
+        # contains the actual chunk.
+        try:
+            return self.get_chunk_content(chunk_id)
+        except obnamlib.RepositoryChunkDoesNotExist:
+            return False
+        else:
+            return True
 
     def get_chunk_ids(self):
-        result = []
-        if self._bag:
-            result += self._get_chunk_ids_from_bag(self._bag)
+        # This is slow as hell, as it needs to read in all the bags to
+        # get all the chunk ids. We're going to need to either drop
+        # get_chunk_ids or have a way to get the blob identifiers for
+        # a bag without reading it in and parsing it.
 
+        self.flush_chunks()
+        result = []
         for bag_id in self._bag_store.get_bag_ids():
             bag = self._bag_store.get_bag(bag_id)
             result += self._get_chunk_ids_from_bag(bag)
-
         return result
 
     def _get_chunk_ids_from_bag(self, bag):
author	Lars Wirzenius <liw@liw.fi>	2015-07-19 13:32:54 +0300
committer	Lars Wirzenius <liw@liw.fi>	2015-07-19 13:32:54 +0300
commit	96f3482abb08e67caebacb65a555fbf0e4d9c2ae (patch)
tree	639a072cbce5ea935322ca536f827a45a3db9cce
parent	fe42dd953139ffc3bf25ed0a0beea9acbaf5dce6 (diff)
download	obnam-96f3482abb08e67caebacb65a555fbf0e4d9c2ae.tar.gz