diff options
author | Lars Wirzenius <liw@liw.fi> | 2015-07-11 20:02:51 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2015-07-11 20:02:51 +0300 |
commit | 5de708a7637eb97838aa4bd6f2a9134ed59760b1 (patch) | |
tree | ea9f224a968d7e7de87aa08a9273d22ec48aa281 | |
parent | 0791557014f06d8658e207f783a9fa3ce8c8aa7c (diff) | |
parent | a84b881f4a20be4ed3b65c13d306ec022d40f8de (diff) | |
download | obnam-5de708a7637eb97838aa4bd6f2a9134ed59760b1.tar.gz |
Change Green Albatross storage to be smaller objects
-rwxr-xr-x | dump-bag | 43 | ||||
-rwxr-xr-x | dump-obj | 39 | ||||
-rw-r--r-- | obnamlib/__init__.py | 2 | ||||
-rw-r--r-- | obnamlib/fmt_ga/client.py | 531 | ||||
-rw-r--r-- | obnamlib/fmt_ga/dirobj.py | 6 | ||||
-rw-r--r-- | obnamlib/fmt_ga/tree.py | 23 | ||||
-rw-r--r-- | obnamlib/plugins/backup_plugin.py | 1 | ||||
-rw-r--r-- | obnamlib/repo_interface.py | 16 |
8 files changed, 573 insertions, 88 deletions
diff --git a/dump-bag b/dump-bag new file mode 100755 index 00000000..3c0fe6e5 --- /dev/null +++ b/dump-bag @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright 2015 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# =*= License: GPL-3+ =*= + + +import sys + +import yaml + +import obnamlib + + +def strip_filtering(blob): + # Dummy version, does not actually work if any compression or + # encryption has been used. + tag, remaining = blob.split('\0', 1) + return remaining + + +for filename in sys.argv[1:]: + with open(filename) as f: + blob = f.read() + blob = strip_filtering(blob) + bag = obnamlib.deserialise_bag(blob) + + decoded = [] + for obj_blob in bag: + obj = obnamlib.deserialise_object(obj_blob) + yaml.safe_dump(obj, stream=sys.stdout) diff --git a/dump-obj b/dump-obj new file mode 100755 index 00000000..da3bf25b --- /dev/null +++ b/dump-obj @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# Copyright 2015 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# =*= License: GPL-3+ =*= + + +import sys + +import yaml + +import obnamlib + + +def strip_filtering(blob): + # Dummy version, does not actually work if any compression or + # encryption has been used. + tag, remaining = blob.split('\0', 1) + return remaining + + +for filename in sys.argv[1:]: + with open(filename) as f: + blob = f.read() + blob = strip_filtering(blob) + obj = obnamlib.deserialise_object(blob) + yaml.safe_dump(obj, stream=sys.stdout) diff --git a/obnamlib/__init__.py b/obnamlib/__init__.py index 91028065..c7d64d86 100644 --- a/obnamlib/__init__.py +++ b/obnamlib/__init__.py @@ -110,7 +110,7 @@ from splitpath import split_pathname from obj_serialiser import serialise_object, deserialise_object from bag import Bag, BagIdNotSetError, make_object_id, parse_object_id -from bag_store import BagStore +from bag_store import BagStore, serialise_bag, deserialise_bag from blob_store import BlobStore from repo_factory import ( diff --git a/obnamlib/fmt_ga/client.py b/obnamlib/fmt_ga/client.py index fbeb68e7..facca578 100644 --- a/obnamlib/fmt_ga/client.py +++ b/obnamlib/fmt_ga/client.py @@ -18,6 +18,7 @@ import copy import os +import stat import obnamlib @@ -44,7 +45,8 @@ class GAClient(object): return self._dirname def clear(self): - self._data = {} + self._client_keys = GAKeys() + self._generations = GAGenerationList() self._data_is_loaded = False def commit(self): @@ -53,90 +55,128 @@ class GAClient(object): self._save_data() def _finish_current_generation_if_any(self): - generations = self._data.get('generations', []) - if generations: - keys = generations[-1]['keys'] + if self._generations: + latest = self._generations.get_latest() key_name = obnamlib.repo_key_name(obnamlib.REPO_GENERATION_ENDED) - if keys[key_name] is None: - keys[key_name] = int(self._current_time()) + if latest.get_key(key_name) is None: + latest.set_key(key_name, int(self._current_time())) def _save_data(self): - blob = obnamlib.serialise_object(self._data) + self._save_file_metadata() + self._save_per_client_data() + + def _save_file_metadata(self): + for gen in self._generations: + metadata = gen.get_file_metadata() + metadata.flush() + gen.set_root_object_id(metadata.get_root_object_id()) + + def _get_blob_store(self): + bag_store = obnamlib.BagStore() + bag_store.set_location(self._fs, self._dirname) + + blob_store = obnamlib.BlobStore() + blob_store.set_bag_store(bag_store) + + return blob_store + + def _save_per_client_data(self): + data = { + 'keys': self._client_keys.as_dict(), + 'generations': [g.as_dict() for g in self._generations], + } + blob = obnamlib.serialise_object(data) filename = self._get_filename() self._fs.overwrite_file(filename, blob) + def _load_data(self): + if not self._data_is_loaded: + self.clear() + self._load_per_client_data() + self._load_file_metadata() + self._data_is_loaded = True + + def _load_per_client_data(self): + filename = self._get_filename() + if self._fs.exists(filename): + blob = self._fs.cat(filename) + data = obnamlib.deserialise_object(blob) + self._client_keys.set_from_dict(data['keys']) + for gen_dict in data['generations']: + gen = GAGeneration() + gen.set_from_dict(gen_dict) + self._generations.append(gen) + def _get_filename(self): return os.path.join(self.get_dirname(), 'data.dat') + def _load_file_metadata(self): + blob_store = self._get_blob_store() + for gen in self._generations: + metadata = gen.get_file_metadata() + metadata.set_blob_store(blob_store) + metadata.set_root_object_id(gen.get_root_object_id()) + def get_client_generation_ids(self): self._load_data() - generations = self._data.get('generations', []) return [ - obnamlib.GenerationId(self._client_name, gen['id']) - for gen in generations] - - def _load_data(self): - if not self._data_is_loaded: - filename = self._get_filename() - if self._fs.exists(filename): - blob = self._fs.cat(filename) - self._data = obnamlib.deserialise_object(blob) - assert self._data is not None - else: - self._data = {} - self._data_is_loaded = True + obnamlib.GenerationId(self._client_name, gen.get_number()) + for gen in self._generations] def create_generation(self): self._load_data() self._require_previous_generation_is_finished() - generations = self._data.get('generations', []) - if generations: - previous = copy.deepcopy(generations[-1]) + new_generation = GAGeneration() + new_metadata = new_generation.get_file_metadata() + new_metadata.set_blob_store(self._get_blob_store()) + + if self._generations: + latest = self._generations.get_latest() + new_dict = copy.deepcopy(latest.as_dict()) + new_generation.set_from_dict(new_dict) + + latest_metadata = latest.get_file_metadata() + new_metadata.set_root_object_id( + latest_metadata.get_root_object_id()) else: - previous = { - 'keys': {}, - 'files': {}, - } + new_metadata.set_root_object_id(None) - new_generation = dict(previous) - new_generation['id'] = self._new_generation_number() - keys = new_generation['keys'] - keys[obnamlib.repo_key_name(obnamlib.REPO_GENERATION_STARTED)] = \ - int(self._current_time()) - keys[obnamlib.repo_key_name(obnamlib.REPO_GENERATION_ENDED)] = None + new_generation.set_number(self._new_generation_number()) + new_generation.set_key( + obnamlib.repo_key_name(obnamlib.REPO_GENERATION_STARTED), + int(self._current_time())) + new_generation.set_key( + obnamlib.repo_key_name(obnamlib.REPO_GENERATION_ENDED), + None) - self._data['generations'] = generations + [new_generation] + self._generations.append(new_generation) - return obnamlib.GenerationId(self._client_name, new_generation['id']) + return obnamlib.GenerationId( + self._client_name, new_generation.get_number()) def _require_previous_generation_is_finished(self): - generations = self._data.get('generations', []) - if generations: - keys = generations[-1]['keys'] + if self._generations: + latest = self._generations.get_latest() key_name = obnamlib.repo_key_name(obnamlib.REPO_GENERATION_ENDED) - if keys[key_name] is None: + if latest.get_key(key_name) is None: raise obnamlib.RepositoryClientGenerationUnfinished( client_name=self._client_name) def _new_generation_number(self): - generations = self._data.get('generations', []) - ids = [int(gen['id']) for gen in generations] - if ids: - newest_id = ids[-1] - next_id = newest_id + 1 + if self._generations: + ids = [gen.get_number() for gen in self._generations] + return str(int(ids[-1]) + 1) else: - next_id = 1 - return str(next_id) + return str(1) def remove_generation(self, gen_number): self._load_data() - generations = self._data.get('generations', []) remaining = [] removed = False - for generation in generations: - if generation['id'] == gen_number: + for generation in self._generations: + if generation.get_number() == gen_number: removed = True else: remaining.append(generation) @@ -146,33 +186,31 @@ class GAClient(object): client_name=self._client_name, gen_id=gen_number) - self._data['generations'] = remaining + self._generations.set_generations(remaining) def get_generation_key(self, gen_number, key): self._load_data() generation = self._lookup_generation_by_gen_number(gen_number) key_name = obnamlib.repo_key_name(key) if key in obnamlib.REPO_GENERATION_INTEGER_KEYS: - value = generation['keys'].get(key_name, None) + value = generation.get_key(key_name) if value is None: value = 0 return int(value) else: - return generation['keys'].get(key_name, '') + return generation.get_key(key_name, default='') def _lookup_generation_by_gen_number(self, gen_number): - if 'generations' in self._data: - generations = self._data['generations'] - for generation in generations: - if generation['id'] == gen_number: - return generation + for generation in self._generations: + if generation.get_number() == gen_number: + return generation raise obnamlib.RepositoryGenerationDoesNotExist( gen_id=gen_number, client_name=self._client_name) def set_generation_key(self, gen_number, key, value): self._load_data() generation = self._lookup_generation_by_gen_number(gen_number) - generation['keys'][obnamlib.repo_key_name(key)] = value + generation.set_key(obnamlib.repo_key_name(key), value) def file_exists(self, gen_number, filename): self._load_data() @@ -180,42 +218,41 @@ class GAClient(object): generation = self._lookup_generation_by_gen_number(gen_number) except obnamlib.RepositoryGenerationDoesNotExist: return False - return filename in generation['files'] + metadata = generation.get_file_metadata() + return metadata.file_exists(filename) def add_file(self, gen_number, filename): self._load_data() generation = self._lookup_generation_by_gen_number(gen_number) - if filename not in generation['files']: - generation['files'][filename] = { - 'keys': {}, - 'chunks': [], - } + metadata = generation.get_file_metadata() + metadata.add_file(filename) def remove_file(self, gen_number, filename): self._load_data() generation = self._lookup_generation_by_gen_number(gen_number) - if filename in generation['files']: - del generation['files'][filename] + metadata = generation.get_file_metadata() + metadata.remove_file(filename) def get_file_key(self, gen_number, filename, key): self._load_data() self._require_file_exists(gen_number, filename) generation = self._lookup_generation_by_gen_number(gen_number) - files = generation['files'] - key_name = obnamlib.repo_key_name(key) + metadata = generation.get_file_metadata() if key in obnamlib.REPO_FILE_INTEGER_KEYS: default = 0 else: default = '' - if key_name not in files[filename]['keys']: + value = metadata.get_file_key(filename, key) + if value is None: return default - return files[filename]['keys'][key_name] or default + return value def _require_file_exists(self, gen_number, filename): generation = self._lookup_generation_by_gen_number(gen_number) - if filename not in generation['files']: + metadata = generation.get_file_metadata() + if not metadata.file_exists(filename): raise obnamlib.RepositoryFileDoesNotExistInGeneration( client_name=self._client_name, genspec=gen_number, @@ -225,34 +262,37 @@ class GAClient(object): self._load_data() self._require_file_exists(gen_number, filename) generation = self._lookup_generation_by_gen_number(gen_number) - files = generation['files'] - key_name = obnamlib.repo_key_name(key) - files[filename]['keys'][key_name] = value + metadata = generation.get_file_metadata() + metadata.set_file_key(filename, key, value) def get_file_chunk_ids(self, gen_number, filename): self._load_data() self._require_file_exists(gen_number, filename) generation = self._lookup_generation_by_gen_number(gen_number) - return generation['files'][filename]['chunks'] + metadata = generation.get_file_metadata() + return metadata.get_file_chunk_ids(filename) def append_file_chunk_id(self, gen_number, filename, chunk_id): self._load_data() self._require_file_exists(gen_number, filename) generation = self._lookup_generation_by_gen_number(gen_number) - generation['files'][filename]['chunks'].append(chunk_id) + metadata = generation.get_file_metadata() + metadata.append_file_chunk_id(filename, chunk_id) def clear_file_chunk_ids(self, gen_number, filename): self._load_data() self._require_file_exists(gen_number, filename) generation = self._lookup_generation_by_gen_number(gen_number) - generation['files'][filename]['chunks'] = [] + metadata = generation.get_file_metadata() + metadata.clear_file_chunk_ids(filename) def get_generation_chunk_ids(self, gen_number): self._load_data() chunk_ids = set() generation = self._lookup_generation_by_gen_number(gen_number) - for filename in generation['files']: - file_chunk_ids = generation['files'][filename]['chunks'] + metadata = generation.get_file_metadata() + for filename in metadata: + file_chunk_ids = metadata.get_file_chunk_ids(filename) chunk_ids = chunk_ids.union(set(file_chunk_ids)) return list(chunk_ids) @@ -260,9 +300,338 @@ class GAClient(object): self._load_data() self._require_file_exists(gen_number, filename) generation = self._lookup_generation_by_gen_number(gen_number) + metadata = generation.get_file_metadata() return [ - x for x in generation['files'] + x for x in metadata if self._is_direct_child_of(x, filename)] def _is_direct_child_of(self, child, parent): return os.path.dirname(child) == parent and child != parent + + +class GAKeys(object): + + def __init__(self): + self._dict = {} + + def as_dict(self): + return self._dict + + def set_from_dict(self, keys_dict): + self._dict = keys_dict + + def get_key(self, key, default=None): + return self._dict.get(key, default) + + def set_key(self, key, value): + self._dict[key] = value + + +class GAGenerationList(object): + + def __init__(self): + self._generations = [] + + def __len__(self): + return len(self._generations) + + def __iter__(self): + for gen in self._generations[:]: + yield gen + + def get_latest(self): + return self._generations[-1] + + def append(self, gen): + self._generations.append(gen) + + def set_generations(self, generations): + self._generations = generations + + +class GAGeneration(object): + + def __init__(self): + self._id = None + self._keys = GAKeys() + self._file_metadata = GAFileMetadata() + self._root_object_id = None + + def as_dict(self): + return { + 'id': self._id, + 'keys': self._keys.as_dict(), + 'root_object_id': self._root_object_id, + } + + def set_from_dict(self, data): + self._id = data['id'] + self._keys = GAKeys() + self._keys.set_from_dict(data['keys']) + self._root_object_id = data['root_object_id'] + + def get_number(self): + return self._id + + def set_number(self, new_id): + self._id = new_id + + def keys(self): + return self._keys.keys() + + def get_key(self, key, default=None): + return self._keys.get_key(key, default=default) + + def set_key(self, key, value): + self._keys.set_key(key, value) + + def get_root_object_id(self): + return self._root_object_id + + def set_root_object_id(self, root_object_id): + self._root_object_id = root_object_id + + def get_file_metadata(self): + return self._file_metadata + + +class GAFileMetadata(object): + + def __init__(self): + self._blob_store = None + self._tree = None + self._added_files = AddedFiles() + + def set_blob_store(self, blob_store): + assert self._blob_store is None + assert self._tree is None + self._blob_store = blob_store + + def set_root_object_id(self, root_object_id): + assert self._blob_store is not None + assert self._tree is None + self._tree = obnamlib.GATree() + self._tree.set_blob_store(self._blob_store) + self._tree.set_root_directory_id(root_object_id) + + def get_root_object_id(self): + return self._tree.get_root_directory_id() + + def flush(self): + assert len(self._added_files) == 0 + self._tree.flush() + + def __iter__(self): + for filename in self._added_files: + yield filename + + stack = ['/'] + while stack: + dir_path = stack.pop() + dir_obj = self._tree.get_directory(dir_path) + if dir_obj is None: + continue + yield dir_path + for basename in dir_obj.get_file_basenames(): + if basename != '.': + pathname = os.path.join(dir_path, basename) + yield pathname + for basename in dir_obj.get_subdir_basenames(): + stack.append(os.path.join(dir_path, basename)) + + def file_exists(self, filename): + if filename in self._added_files: + return True + dir_obj, dir_path, basename = self._get_dir_obj(filename) + return dir_obj and basename in dir_obj.get_file_basenames() + + def _get_dir_obj(self, filename): + '''Return GADirectory and basename for filename. + + If filename refers to an existing directory, the GADirectory + for the directory, the path to the directory, and the basename + "." are returned. + + If filename refers to a file in an existing directory, the + GADirectory, the path to the directory, and the basename of + the file are returned. Note that in this case it is always a + file, never a subdirectory. The file need not exist yet. + + Otherwise, (None, None, None) is returned. + + ''' + + dir_obj = self._tree.get_directory(filename) + if dir_obj: + return dir_obj, filename, '.' + + parent_path = os.path.dirname(filename) + dir_obj = self._tree.get_directory(parent_path) + if dir_obj: + return dir_obj, parent_path, os.path.basename(filename) + + return None, None, None + + def add_file(self, filename): + if not self.file_exists(filename): + self._added_files.add_file(filename) + + def remove_file(self, filename): + if filename in self._added_files: + self._added_files.remove_file(filename) + + if filename == '/': + self._tree.remove_directory('/') + else: + parent_path = os.path.dirname(filename) + parent_obj = self._tree.get_directory(parent_path) + if parent_obj: + basename = os.path.basename(filename) + parent_obj = self._make_mutable(parent_obj) + parent_obj.remove_file(basename) + parent_obj.remove_subdir(basename) + self._tree.set_directory(parent_path, parent_obj) + + def get_file_key(self, filename, key): + if filename in self._added_files: + return self._added_files.get_file_key(filename, key) + dir_obj, dir_path, basename = self._get_dir_obj(filename) + if dir_obj: + return dir_obj.get_file_key(basename, key) + else: + return None + + def set_file_key(self, filename, key, value): + if filename in self._added_files: + self._added_files.set_file_key(filename, key, value) + if key == obnamlib.REPO_FILE_MODE: + self._flush_added_file(filename) + else: + dir_obj, basename = self._get_mutable_dir_obj(filename) + if dir_obj: + dir_obj.set_file_key(basename, key, value) + + def _get_mutable_dir_obj(self, filename): + dir_obj, dir_path, basename = self._get_dir_obj(filename) + if dir_obj: + if dir_obj.is_mutable(): + return dir_obj, basename + else: + new_obj = self._make_mutable(dir_obj) + self._tree.set_directory(dir_path, new_obj) + return new_obj, basename + else: + return dir_obj, basename + + def _make_mutable(self, dir_obj): + if dir_obj.is_mutable(): + return dir_obj + else: + return obnamlib.create_gadirectory_from_dict(dir_obj.as_dict()) + + def _flush_added_file(self, filename): + mode = self._added_files.get_file_key( + filename, obnamlib.REPO_FILE_MODE) + assert mode is not None + file_dict = self._added_files.get_file_dict(filename) + if stat.S_ISDIR(mode): + dir_obj = obnamlib.GADirectory() + dir_obj.add_file('.') + for key, value in file_dict['keys'].items(): + dir_obj.set_file_key('.', key, value) + self._tree.set_directory(filename, dir_obj) + else: + basename = os.path.basename(filename) + parent_path = os.path.dirname(filename) + parent_obj = self._tree.get_directory(parent_path) + if parent_obj is None: + parent_obj = obnamlib.GADirectory() + parent_obj.add_file('.') + else: + parent_obj = self._make_mutable(parent_obj) + + parent_obj.add_file(basename) + for key, value in file_dict['keys'].items(): + parent_obj.set_file_key(basename, key, value) + for chunk_id in file_dict['chunks']: + parent_obj.append_file_chunk_id(basename, chunk_id) + self._tree.set_directory(parent_path, parent_obj) + + self._added_files.remove_file(filename) + + def get_file_chunk_ids(self, filename): + if filename in self._added_files: + chunk_ids = self._added_files.get_file_chunk_ids(filename) + return chunk_ids + + dir_obj, dir_path, basename = self._get_dir_obj(filename) + if dir_obj: + chunk_ids = dir_obj.get_file_chunk_ids(basename) + return chunk_ids + else: + return [] + + def append_file_chunk_id(self, filename, chunk_id): + if filename in self._added_files: + self._added_files.append_file_chunk_id(filename, chunk_id) + return + dir_obj, basename = self._get_mutable_dir_obj(filename) + if dir_obj: + dir_obj.append_file_chunk_id(basename, chunk_id) + + def clear_file_chunk_ids(self, filename): + if filename in self._added_files: + self._added_files.clear_file_chunk_ids(filename) + return + dir_obj, basename = self._get_mutable_dir_obj(filename) + assert basename != '.' + if dir_obj: + dir_obj.clear_file_chunk_ids(basename) + + +class AddedFiles(object): + + def __init__(self): + self.clear() + + def clear(self): + self._files = {} + + def __contains__(self, filename): + return filename in self._files + + def __iter__(self): + for filename in self._files: + yield filename + + def __len__(self): + return len(self._files) + + def get_file_dict(self, filename): + return self._files[filename] + + def add_file(self, filename): + assert filename not in self._files + self._files[filename] = { + 'keys': {}, + 'chunks': [], + } + + def remove_file(self, filename): + assert filename in self._files + del self._files[filename] + + def get_file_key(self, filename, key): + return self._files[filename]['keys'].get(key) + + def set_file_key(self, filename, key, value): + self._files[filename]['keys'][key] = value + + def get_file_chunk_ids(self, filename): + return self._files[filename]['chunks'] + + def append_file_chunk_id(self, filename, chunk_id): + self._files[filename]['chunks'].append(chunk_id) + + def clear_file_chunk_ids(self, filename): + self._files[filename]['chunks'] = [] diff --git a/obnamlib/fmt_ga/dirobj.py b/obnamlib/fmt_ga/dirobj.py index e31f931a..b7f0915f 100644 --- a/obnamlib/fmt_ga/dirobj.py +++ b/obnamlib/fmt_ga/dirobj.py @@ -56,11 +56,13 @@ class GADirectory(object): return self._dict['metadata'].keys() def get_file_key(self, basename, key): - return self._dict['metadata'][basename].get(key) + key_name = obnamlib.repo_key_name(key) + return self._dict['metadata'][basename].get(key_name) def set_file_key(self, basename, key, value): self._require_mutable() - self._dict['metadata'][basename][key] = value + key_name = obnamlib.repo_key_name(key) + self._dict['metadata'][basename][key_name] = value def get_file_chunk_ids(self, basename): return self._dict['metadata'][basename]['chunk-ids'] diff --git a/obnamlib/fmt_ga/tree.py b/obnamlib/fmt_ga/tree.py index 8e5e01f2..cbe36c22 100644 --- a/obnamlib/fmt_ga/tree.py +++ b/obnamlib/fmt_ga/tree.py @@ -18,6 +18,8 @@ import os +import tracing + import obnamlib @@ -51,21 +53,29 @@ class GATree(object): def get_directory(self, pathname): if pathname in self._cache: + tracing.trace('cache hit: pathname=%r', pathname) return self._cache.get(pathname) + tracing.trace('cache miss: pathname=%r', pathname) + if self._root_dir_id is None: return None + dir_obj = None if pathname == '/': - return self._get_dir_obj(self._root_dir_id) + dir_obj = self._get_dir_obj(self._root_dir_id) else: parent_obj = self._get_containing_dir_obj(pathname) if parent_obj is not None: basename = os.path.basename(pathname) obj_id = parent_obj.get_subdir_object_id(basename) if obj_id is not None: - return self._get_dir_obj(obj_id) - return None # pragma: no cover + dir_obj = self._get_dir_obj(obj_id) + + if dir_obj is not None: + self._cache.set(pathname, dir_obj) + + return dir_obj def _get_dir_obj(self, dir_id): blob = self._blob_store.get_blob(dir_id) @@ -93,6 +103,10 @@ class GATree(object): parent_obj.as_dict()) else: parent_obj = obnamlib.GADirectory() + parent_obj.add_file('.') + if not parent_obj.is_mutable(): + parent_obj = obnamlib.create_gadirectory_from_dict( + parent_obj.as_dict()) parent_obj.add_subdir(basename, None) self.set_directory(parent_path, parent_obj) @@ -121,7 +135,8 @@ class GATree(object): self.set_directory(parent_path, parent_obj) def flush(self): - self._root_dir_id = self._fixup_subdir_refs('/') + if '/' in self._cache: + self._root_dir_id = self._fixup_subdir_refs('/') self._blob_store.flush() self._cache.clear() diff --git a/obnamlib/plugins/backup_plugin.py b/obnamlib/plugins/backup_plugin.py index 5f9d694c..8acd8f98 100644 --- a/obnamlib/plugins/backup_plugin.py +++ b/obnamlib/plugins/backup_plugin.py @@ -569,6 +569,7 @@ class BackupPlugin(obnamlib.ObnamPlugin): # thinking. self.progress.backed_up_count += 1 + self.backup_metadata(pathname, metadata) if stat.S_ISREG(metadata.st_mode): assert metadata.md5 is None metadata.md5 = self.backup_file_contents(pathname, metadata) diff --git a/obnamlib/repo_interface.py b/obnamlib/repo_interface.py index 59f92d95..1d6910e2 100644 --- a/obnamlib/repo_interface.py +++ b/obnamlib/repo_interface.py @@ -1526,12 +1526,16 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover def test_committing_remembers_file_add(self): gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') + self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) self.repo.commit_client('fooclient') self.assertTrue(self.repo.file_exists(gen_id, '/foo/bar')) def test_creating_generation_clones_previous_one(self): gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') + self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) self.repo.commit_client('fooclient') self.repo.lock_client('fooclient') @@ -1547,6 +1551,8 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover def test_unlocking_forgets_file_removal(self): gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') + self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) self.repo.commit_client('fooclient') self.repo.lock_client('fooclient') @@ -1559,6 +1565,8 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover def test_committing_remembers_file_removal(self): gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') + self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) self.repo.commit_client('fooclient') self.repo.lock_client('fooclient') @@ -1742,6 +1750,8 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) + self.repo.set_file_key( gen_id, '/foo/bar', obnamlib.REPO_FILE_TEST_KEY, 'yoyo') self.repo.commit_client('fooclient') value = self.repo.get_file_key( @@ -1752,6 +1762,8 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) + self.repo.set_file_key( gen_id, '/foo/bar', obnamlib.REPO_FILE_TEST_KEY, 'first') self.repo.commit_client('fooclient') @@ -1846,6 +1858,8 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover def test_unlocking_client_forgets_modified_file_chunk_ids(self): gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') + self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) self.repo.append_file_chunk_id(gen_id, '/foo/bar', 1) self.repo.commit_client('fooclient') @@ -1864,6 +1878,8 @@ class RepositoryInterfaceTests(unittest.TestCase): # pragma: no cover def test_committing_child_remembers_modified_file_chunk_ids(self): gen_id = self.create_generation() self.repo.add_file(gen_id, '/foo/bar') + self.repo.set_file_key( + gen_id, '/foo/bar', obnamlib.REPO_FILE_MODE, stat.S_IFREG) self.repo.append_file_chunk_id(gen_id, '/foo/bar', 1) self.repo.commit_client('fooclient') |