diff options
author | Lars Wirzenius <liw@liw.fi> | 2015-11-17 18:18:31 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2015-11-17 18:18:31 +0000 |
commit | ed6950c68c6ae3a8f6a72fba2a528e9732985453 (patch) | |
tree | 38534a0617722e280aa01459eddbf461a99e6e8d | |
parent | 5af7e9778d1565cce5390633e22ffde9207a1584 (diff) | |
download | obnam-ed6950c68c6ae3a8f6a72fba2a528e9732985453.tar.gz |
Change the data structure gaindexes use
This should allow faster lookups
-rw-r--r-- | obnamlib/fmt_ga/indexes.py | 87 |
1 files changed, 69 insertions, 18 deletions
diff --git a/obnamlib/fmt_ga/indexes.py b/obnamlib/fmt_ga/indexes.py index 8a2ed4a3..2a57a440 100644 --- a/obnamlib/fmt_ga/indexes.py +++ b/obnamlib/fmt_ga/indexes.py @@ -59,11 +59,21 @@ class GAChunkIndexes(object): def put_chunk_into_indexes(self, chunk_id, token, client_id): self._load_data() - self._data['index'].append({ - 'chunk-id': chunk_id, - 'sha512': token, - 'client-id': client_id, - }) + + by_chunk_id = self._data['by_chunk_id'] + by_chunk_id[chunk_id] = token + + by_checksum = self._data['by_checksum']['sha512'] + chunk_ids = by_checksum.get(token, []) + if chunk_id not in chunk_ids: + chunk_ids.append(chunk_id) + by_checksum[token] = chunk_ids + + used_by = self._data['used_by'] + client_ids = used_by.get(chunk_id, []) + if client_id not in client_ids: + client_ids.append(client_id) + used_by[chunk_id] = client_ids def _load_data(self): if not self._data_is_loaded: @@ -74,35 +84,76 @@ class GAChunkIndexes(object): assert self._data is not None else: self._data = { - 'index': [], + 'by_chunk_id': { + }, + 'by_checksum': { + 'sha512': {}, + }, + 'used_by': { + }, } self._data_is_loaded = True def find_chunk_ids_by_content(self, chunk_content): self._load_data() + token = self.prepare_chunk_for_indexes(chunk_content) - result = [record['chunk-id'] - for record in self._data['index'] - if record['sha512'] == token] + by_checksum = self._data['by_checksum']['sha512'] + result = by_checksum.get(token, []) + if not result: raise obnamlib.RepositoryChunkContentNotInIndexes() return result def remove_chunk_from_indexes(self, chunk_id, client_id): self._load_data() - self._data['index'] = self._filter_out( - self._data['index'], - lambda x: - x['chunk-id'] == chunk_id and x['client-id'] == client_id) - def _filter_out(self, records, pred): - return [record for record in records if not pred(record)] + used_by = self._data['used_by'] + client_ids = used_by.get(chunk_id, []) + if client_id in client_ids: + client_ids.remove(client_id) + if client_ids: + used_by[chunk_id] = client_ids + still_used = True + else: + del used_by[chunk_id] + still_used = False + + if not still_used: + by_chunk_id = self._data['by_chunk_id'] + token = by_chunk_id.get(chunk_id, None) + if token is not None: + del by_chunk_id[chunk_id] + + by_checksum = self._data['by_checksum']['sha512'] + chunk_ids = by_checksum.get(token, []) + if chunk_id in chunk_ids: + chunk_ids.remove(chunk_id) + if chunk_ids: + by_checksum[token] = chunk_ids + else: + del by_checksum[token] def remove_chunk_from_indexes_for_all_clients(self, chunk_id): self._load_data() - self._data['index'] = self._filter_out( - self._data['index'], - lambda x: x['chunk-id'] == chunk_id) + + by_chunk_id = self._data['by_chunk_id'] + token = by_chunk_id.get(chunk_id, None) + if token is not None: + del by_chunk_id[chunk_id] + + by_checksum = self._data['by_checksum']['sha512'] + chunk_ids = by_checksum.get(token, []) + if chunk_id in chunk_ids: + chunk_ids.remove(chunk_id) + if chunk_ids: + by_checksum[token] = chunk_ids + else: + del by_checksum[token] + + used_by = self._data['used_by'] + if chunk_id in used_by: + del used_by[chunk_id] def validate_chunk_content(self, chunk_id): return None |