diff options
author | Lars Wirzenius <liw@liw.fi> | 2012-04-24 08:58:23 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2012-04-24 08:58:23 +0100 |
commit | 4fe28031e7296e1c5270e94b2097acf7d063d094 (patch) | |
tree | 4f3594108965e8465662e6ab6581ef53723caf8c | |
parent | 7b377503897b07806590d751b816534ca9a4d38b (diff) | |
parent | ea79516de482f4867f243afd3c30f552b2f7fae7 (diff) | |
download | larch-4fe28031e7296e1c5270e94b2097acf7d063d094.tar.gz |
Merge read-only mode
-rw-r--r-- | NEWS | 7 | ||||
-rwxr-xr-x | insert-remove-test | 2 | ||||
-rw-r--r-- | larch/__init__.py | 2 | ||||
-rw-r--r-- | larch/forest.py | 10 | ||||
-rw-r--r-- | larch/forest_tests.py | 25 | ||||
-rw-r--r-- | larch/journal.py | 121 | ||||
-rw-r--r-- | larch/journal_tests.py | 104 | ||||
-rw-r--r-- | larch/nodestore_disk.py | 8 | ||||
-rw-r--r-- | larch/nodestore_disk_tests.py | 6 |
9 files changed, 234 insertions, 51 deletions
@@ -4,6 +4,13 @@ NEWS for larch These are the release notes for larch, a Python implementation of a copy-on-write B-tree, designed by Odah Rodeh. +Version 0.30, released UNRELEASED +--------------------------------- + +* `NodeStoreDisk` is now explicitly in read-only or read-write mode. + In read-only mode it does not replay or rollback to the journal, or + care about any changes made there. + Version 0.29, released 2012-04-15 --------------------------------- diff --git a/insert-remove-test b/insert-remove-test index 141051d..dd6cecf 100755 --- a/insert-remove-test +++ b/insert-remove-test @@ -95,7 +95,7 @@ def main(): if os.path.exists(location): raise Exception('%s exists already' % location) os.mkdir(location) - ns = larch.NodeStoreDisk(node_size, codec, dirname=location) + ns = larch.NodeStoreDisk(True, node_size, codec, dirname=location) forest = larch.Forest(ns) tree = forest.new_tree() diff --git a/larch/__init__.py b/larch/__init__.py index cc83b85..52b02d4 100644 --- a/larch/__init__.py +++ b/larch/__init__.py @@ -27,7 +27,7 @@ from refcountstore import RefcountStore from lru import LRUCache from uploadqueue import UploadQueue from idpath import IdPath -from journal import Journal +from journal import Journal, ReadOnlyMode from nodestore_disk import NodeStoreDisk, LocalFS, FormatProblem from nodestore_memory import NodeStoreMemory diff --git a/larch/forest.py b/larch/forest.py index 75705e2..881fb86 100644 --- a/larch/forest.py +++ b/larch/forest.py @@ -149,8 +149,8 @@ class Forest(object): self.node_store.commit() -def open_forest(key_size=None, node_size=None, codec=None, node_store=None, - **kwargs): +def open_forest(allow_writes=None, key_size=None, node_size=None, codec=None, + node_store=None, **kwargs): '''Create or open a forest. ``key_size`` and ``node_size`` are retrieved from the forest, unless @@ -167,6 +167,8 @@ def open_forest(key_size=None, node_size=None, codec=None, node_store=None, ''' tracing.trace('opening forest') + + assert allow_writes is not None codec = codec or larch.NodeCodec node_store = node_store or larch.NodeStoreDisk @@ -176,7 +178,7 @@ def open_forest(key_size=None, node_size=None, codec=None, node_store=None, # For this, we can use any values for node and key sizes, # since we won't be accessing nodes or keys. c_temp = codec(42) - ns_temp = node_store(42, c_temp, **kwargs) + ns_temp = node_store(False, 42, c_temp, **kwargs) assert 'key_size' in ns_temp.get_metadata_keys() assert 'node_size' in ns_temp.get_metadata_keys() @@ -187,7 +189,7 @@ def open_forest(key_size=None, node_size=None, codec=None, node_store=None, node_size = int(ns_temp.get_metadata('node_size')) c = codec(key_size) - ns = node_store(node_size, c, **kwargs) + ns = node_store(allow_writes, node_size, c, **kwargs) def check_size(keyname, wanted, exception): if keyname not in ns.get_metadata_keys(): diff --git a/larch/forest_tests.py b/larch/forest_tests.py index e8d8a17..7603b86 100644 --- a/larch/forest_tests.py +++ b/larch/forest_tests.py @@ -132,50 +132,53 @@ class OpenForestTests(unittest.TestCase): def test_creates_new_forest(self): f = larch.open_forest(key_size=self.key_size, node_size=self.node_size, - dirname=self.tempdir) + dirname=self.tempdir, allow_writes=True) self.assertEqual(f.node_store.codec.key_bytes, self.key_size) self.assertEqual(f.node_store.node_size, self.node_size) def test_fail_if_existing_tree_has_incompatible_key_size(self): f = larch.open_forest(key_size=self.key_size, node_size=self.node_size, - dirname=self.tempdir) + dirname=self.tempdir, allow_writes=True) f.commit() self.assertRaises(larch.BadKeySize, larch.open_forest, key_size=self.key_size + 1, node_size=self.node_size, - dirname=self.tempdir) + dirname=self.tempdir, + allow_writes=True) def test_opens_existing_tree_with_incompatible_node_size(self): - f = larch.open_forest(key_size=self.key_size, node_size=self.node_size, - dirname=self.tempdir) + f = larch.open_forest(allow_writes=True, key_size=self.key_size, + node_size=self.node_size, dirname=self.tempdir) f.commit() new_size = self.node_size + 1 f2 = larch.open_forest(key_size=self.key_size, node_size=new_size, - dirname=self.tempdir) + dirname=self.tempdir, + allow_writes=True) self.assertEqual(int(f2.node_store.get_metadata('node_size')), self.node_size) def test_opens_existing_tree_with_compatible_key_and_node_size(self): f = larch.open_forest(key_size=self.key_size, node_size=self.node_size, - dirname=self.tempdir) + dirname=self.tempdir, allow_writes=True) f.commit() f2 = larch.open_forest(key_size=self.key_size, node_size=self.node_size, - dirname=self.tempdir) + dirname=self.tempdir, + allow_writes=True) self.assert_(True) def test_opens_existing_tree_without_node_and_key_sizes_given(self): - f = larch.open_forest(key_size=self.key_size, node_size=self.node_size, - dirname=self.tempdir) + f = larch.open_forest(allow_writes=True, key_size=self.key_size, + node_size=self.node_size, dirname=self.tempdir) f.commit() - f2 = larch.open_forest(dirname=self.tempdir) + f2 = larch.open_forest(dirname=self.tempdir, allow_writes=True) self.assertEqual(f2.node_store.node_size, self.node_size) self.assertEqual(f2.node_store.codec.key_bytes, self.key_size) diff --git a/larch/journal.py b/larch/journal.py index ff6498d..4ea1e9f 100644 --- a/larch/journal.py +++ b/larch/journal.py @@ -20,6 +20,12 @@ import os import tracing +class ReadOnlyMode(Exception): # pragma: no cover + + def __str__(self): + return 'Larch B-tree is in read-only mode, no changes allowed' + + class Journal(object): '''A journal layer on top of a virtual filesystem. @@ -34,18 +40,18 @@ class Journal(object): * ``x`` is the real filename * ``new/x`` is a new or modified file - * ``delete/x`` is a deleted file, moved there immediately + * ``delete/x`` is a deleted file, created there as a flag file Commit does this: - * for every ``delete/x``, remove it + * for every ``delete/x``, remove ``x`` * for every ``new/x`` except ``new/metadata``, move to ``x`` * move ``new/metadata`` to ``metadata`` Rollback does this: * remove every ``new/x`` - * move every ``delete/x`` to ``x`` + * remove every ``delete/x`` When a journalled node store is opened, if ``new/metadata`` exists, the commit happens. Otherwise a rollback happens. This guarantees @@ -54,12 +60,17 @@ class Journal(object): We only provide enough of a filesystem interface as is needed by NodeStoreDisk. For example, we do not care about directory removal. + The journal can be opened in read-only mode, in which case it ignores + any changes in ``new`` and ``delete``, and does not try to rollback or + commit at start. + ''' flag_basename = 'metadata' - def __init__(self, fs, storedir): + def __init__(self, allow_writes, fs, storedir): logging.debug('Initializing Journal for %s' % storedir) + self.allow_writes = allow_writes self.fs = fs self.storedir = storedir if not self.storedir.endswith(os.sep): @@ -69,12 +80,18 @@ class Journal(object): self.flag_file = os.path.join(self.storedir, self.flag_basename) self.new_flag = os.path.join(self.newdir, self.flag_basename) - if self.fs.exists(self.new_flag): - logging.debug('Automatically committing remaining changes') - self.commit() - else: - logging.debug('Automatically rolling back remaining changes') - self.rollback() + if self.allow_writes: + if self.fs.exists(self.new_flag): + logging.debug('Automatically committing remaining changes') + self.commit() + else: + logging.debug('Automatically rolling back remaining changes') + self.rollback() + + def _require_rw(self): + '''Raise error if modifications are not allowed.''' + if not self.allow_writes: + raise ReadOnlyMode() def _relative(self, filename): '''Return the part of filename that is relative to storedir.''' @@ -88,29 +105,47 @@ class Journal(object): def _deleted(self, filename): '''Return name for temporary name for file to be deleted.''' return os.path.join(self.deletedir, self._relative(filename)) + + def _realname(self, journaldir, filename): + '''Return real name for a file in a journal temporary directory.''' + assert filename.startswith(journaldir) + return os.path.join(self.storedir, filename[len(journaldir):]) def exists(self, filename): - return (self.fs.exists(filename) or - self.fs.exists(self._new(filename))) + if self.allow_writes: + new = self._new(filename) + deleted = self._deleted(filename) + if self.fs.exists(new): + return True + elif self.fs.exists(deleted): + return False + return self.fs.exists(filename) def makedirs(self, dirname): tracing.trace(dirname) + self._require_rw() x = self._new(dirname) self.fs.makedirs(x) def overwrite_file(self, filename, contents): tracing.trace(filename) + self._require_rw() self.fs.overwrite_file(self._new(filename), contents) def cat(self, filename): - new = self._new(filename) - if self.fs.exists(new): - return self.fs.cat(new) - else: - return self.fs.cat(filename) + if self.allow_writes: + new = self._new(filename) + deleted = self._deleted(filename) + if self.fs.exists(new): + return self.fs.cat(new) + elif self.fs.exists(deleted): + raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), + filename)) + return self.fs.cat(filename) def remove(self, filename): tracing.trace(filename) + self._require_rw() new = self._new(filename) deleted = self._deleted(filename) @@ -120,10 +155,34 @@ class Journal(object): elif self.fs.exists(deleted): raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), filename)) else: - dirname = os.path.dirname(deleted) - if not self.fs.exists(dirname): - self.fs.makedirs(dirname) - self.fs.rename(filename, deleted) + self.fs.overwrite_file(deleted, '') + + def list_files(self, dirname): + '''List all files. + + Files only, no directories. + + ''' + + assert not dirname.startswith(self.newdir) + assert not dirname.startswith(self.deletedir) + + if self.allow_writes: + if self.fs.exists(dirname): + for x in self.climb(dirname, files_only=True): + if self.exists(x): + yield x + new = self._new(dirname) + if self.fs.exists(new): + for x in self.climb(new, files_only=True): + yield self._realname(self.newdir, x) + else: + if self.fs.exists(dirname): + for x in self.climb(dirname, files_only=True): + in_new = x.startswith(self.newdir) + in_deleted = x.startswith(self.deletedir) + if not in_new and not in_deleted: + yield x def climb(self, dirname, files_only=False): basenames = self.fs.listdir(dirname) @@ -155,7 +214,7 @@ class Journal(object): all_excludes = [dirname] + exclude for pathname in self.climb(dirname): if pathname not in all_excludes: - r = os.path.join(self.storedir, pathname[len(dirname):]) + r = self._realname(dirname, pathname) parent = os.path.dirname(r) if self.fs.isdir(pathname): if not self.fs.exists(r): @@ -169,20 +228,34 @@ class Journal(object): def rollback(self): tracing.trace('%s start' % self.storedir) + self._require_rw() if self.fs.exists(self.newdir): self._clear_directory(self.newdir) if self.fs.exists(self.deletedir): - self._vivify(self.deletedir, []) + self._clear_directory(self.deletedir) tracing.trace('%s done' % self.storedir) + def _really_delete(self, deletedir): + tracing.trace(deletedir) + for pathname in self.climb(deletedir, files_only=True): + if pathname != deletedir: + realname = self._realname(deletedir, pathname) + try: + self.fs.remove(realname) + except OSError, e: # pragma: no cover + if e.errno not in (errno.ENOENT, errno.EISDIR): + raise + self.fs.remove(pathname) + def commit(self, skip=[]): tracing.trace('%s start' % self.storedir) + self._require_rw() if self.fs.exists(self.deletedir): - self._clear_directory(self.deletedir) + self._really_delete(self.deletedir) if self.fs.exists(self.newdir): skip = [self._new(x) for x in skip] diff --git a/larch/journal_tests.py b/larch/journal_tests.py index 8d55b57..ddb5311 100644 --- a/larch/journal_tests.py +++ b/larch/journal_tests.py @@ -27,7 +27,7 @@ class JournalTests(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.fs = larch.LocalFS() - self.j = larch.Journal(self.fs, self.tempdir) + self.j = larch.Journal(True, self.fs, self.tempdir) def tearDown(self): shutil.rmtree(self.tempdir) @@ -92,6 +92,13 @@ class JournalTests(unittest.TestCase): self.j.overwrite_file(filename, 'yo') self.assertEqual(self.j.cat(filename), 'yo') + def test_cat_does_not_find_deleted_file(self): + filename = self.join('foo/bar') + self.j.overwrite_file(filename, 'bar') + self.j.commit() + self.j.remove(filename) + self.assertRaises(OSError, self.j.cat, filename) + def test_rollback_brings_back_old_file(self): filename = self.join('foo/bar') self.j.overwrite_file(filename, 'bar') @@ -162,7 +169,7 @@ class JournalTests(unittest.TestCase): filename = self.join('foo/bar') self.j.overwrite_file(filename, 'bar') - j2 = larch.Journal(self.fs, self.tempdir) + j2 = larch.Journal(True, self.fs, self.tempdir) self.assertFalse(j2.exists(filename)) def test_partial_commit_finished_by_new_instance(self): @@ -172,6 +179,97 @@ class JournalTests(unittest.TestCase): self.j.overwrite_file(metadata, '') self.j.commit(skip=[filename]) - j2 = larch.Journal(self.fs, self.tempdir) + j2 = larch.Journal(True, self.fs, self.tempdir) self.assertTrue(j2.exists(filename)) + +class ReadOnlyJournalTests(unittest.TestCase): + + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.fs = larch.LocalFS() + self.rw = larch.Journal(True, self.fs, self.tempdir) + self.ro = larch.Journal(False, self.fs, self.tempdir) + + def tearDown(self): + shutil.rmtree(self.tempdir) + + def join(self, *args): + return os.path.join(self.tempdir, *args) + + def test_does_not_know_random_directory_initially(self): + self.assertFalse(self.ro.exists(self.join('foo'))) + + def test_creating_directory_raises_error(self): + self.assertRaises(larch.ReadOnlyMode, self.ro.makedirs, 'foo') + + def test_calling_rollback_raises_error(self): + self.assertRaises(larch.ReadOnlyMode, self.ro.rollback) + + def test_readonly_mode_does_not_check_for_directory_creation(self): + dirname = self.join('foo/bar') + self.rw.makedirs(dirname) + self.assertFalse(self.ro.exists(dirname)) + + def test_write_file_raisees_error(self): + self.assertRaises(larch.ReadOnlyMode, + self.ro.overwrite_file, 'foo', 'bar') + + def test_readonly_mode_does_not_check_for_new_file(self): + filename = self.join('foo') + self.rw.overwrite_file(filename, 'bar') + self.assertFalse(self.ro.exists(filename)) + + def test_readonly_mode_does_not_check_for_modified_file(self): + filename = self.join('foo') + self.rw.overwrite_file(filename, 'first') + self.rw.commit() + self.assertEqual(self.ro.cat(filename), 'first') + self.rw.overwrite_file(filename, 'second') + self.assertEqual(self.ro.cat(filename), 'first') + + def test_readonly_mode_does_not_know_file_is_deleted_in_journal(self): + filename = self.join('foo/bar') + self.rw.overwrite_file(filename, 'bar') + self.rw.commit() + self.rw.remove(filename) + self.assertEqual(self.ro.cat(filename), 'bar') + + def tests_lists_no_files_initially(self): + dirname = self.join('foo') + self.assertEqual(list(self.ro.list_files(dirname)), []) + + def test_lists_files_correctly_when_no_changes(self): + dirname = self.join('foo') + filename = self.join('foo/bar') + self.rw.overwrite_file(filename, 'bar') + self.rw.commit() + self.assertEqual(list(self.ro.list_files(dirname)), [filename]) + + def test_lists_added_file_correctly(self): + dirname = self.join('foo') + filename = self.join('foo/bar') + self.rw.overwrite_file(filename, 'bar') + self.assertEqual(list(self.rw.list_files(dirname)), [filename]) + self.assertEqual(list(self.ro.list_files(dirname)), []) + + def test_lists_added_file_correctly_when_dir_existed_already(self): + dirname = self.join('foo') + filename = self.join('foo/bar') + filename2 = self.join('foo/foobar') + self.rw.overwrite_file(filename, 'bar') + self.rw.commit() + self.rw.overwrite_file(filename2, 'yoyo') + self.assertEqual(sorted(list(self.rw.list_files(dirname))), + sorted([filename, filename2])) + self.assertEqual(list(self.ro.list_files(dirname)), [filename]) + + def test_lists_removed_file_correctly(self): + dirname = self.join('foo') + filename = self.join('foo/bar') + self.rw.overwrite_file(filename, 'bar') + self.rw.commit() + self.rw.remove(filename) + self.assertEqual(list(self.rw.list_files(dirname)), []) + self.assertEqual(list(self.ro.list_files(dirname)), [filename]) + diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py index 01cae30..cece8d4 100644 --- a/larch/nodestore_disk.py +++ b/larch/nodestore_disk.py @@ -112,8 +112,8 @@ class NodeStoreDisk(larch.NodeStore): nodedir = 'nodes' - def __init__(self, node_size, codec, dirname=None, upload_max=1024, - lru_size=500, vfs=None, format=None): + def __init__(self, allow_writes, node_size, codec, dirname=None, + upload_max=1024, lru_size=500, vfs=None, format=None): tracing.trace('new NodeStoreDisk: %s', dirname) assert dirname is not None if format is not None: @@ -130,7 +130,7 @@ class NodeStoreDisk(larch.NodeStore): self.upload_queue = larch.UploadQueue(self._really_put_node, self.upload_max) self.vfs = vfs if vfs != None else LocalFS() - self.journal = larch.Journal(self.vfs, dirname) + self.journal = larch.Journal(allow_writes, self.vfs, dirname) self.idpath = larch.IdPath(os.path.join(dirname, self.nodedir), DIR_DEPTH, DIR_BITS, DIR_SKIP) @@ -263,7 +263,7 @@ class NodeStoreDisk(larch.NodeStore): nodedir = os.path.join(self.dirname, self.nodedir) uploaded = [] if self.journal.exists(nodedir): - for filename in self.journal.climb(nodedir, files_only=True): + for filename in self.journal.list_files(nodedir): uploaded.append(int(os.path.basename(filename), 16)) return queued + uploaded diff --git a/larch/nodestore_disk_tests.py b/larch/nodestore_disk_tests.py index ba21270..7f22c31 100644 --- a/larch/nodestore_disk_tests.py +++ b/larch/nodestore_disk_tests.py @@ -35,9 +35,9 @@ class NodeStoreDiskTests(unittest.TestCase, larch.NodeStoreTests): shutil.rmtree(self.tempdir) def new_ns(self, format=None): - return nodestore_disk.NodeStoreDisk(self.node_size, self.codec, - dirname=self.tempdir, - format=format) + return nodestore_disk.NodeStoreDisk(True, self.node_size, self.codec, + dirname=self.tempdir, + format=format) def test_metadata_has_format_version(self): self.assertEqual(self.ns.get_metadata('format'), |