summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2012-04-24 08:58:23 +0100
committerLars Wirzenius <liw@liw.fi>2012-04-24 08:58:23 +0100
commit4fe28031e7296e1c5270e94b2097acf7d063d094 (patch)
tree4f3594108965e8465662e6ab6581ef53723caf8c
parent7b377503897b07806590d751b816534ca9a4d38b (diff)
parentea79516de482f4867f243afd3c30f552b2f7fae7 (diff)
downloadlarch-4fe28031e7296e1c5270e94b2097acf7d063d094.tar.gz
Merge read-only mode
-rw-r--r--NEWS7
-rwxr-xr-xinsert-remove-test2
-rw-r--r--larch/__init__.py2
-rw-r--r--larch/forest.py10
-rw-r--r--larch/forest_tests.py25
-rw-r--r--larch/journal.py121
-rw-r--r--larch/journal_tests.py104
-rw-r--r--larch/nodestore_disk.py8
-rw-r--r--larch/nodestore_disk_tests.py6
9 files changed, 234 insertions, 51 deletions
diff --git a/NEWS b/NEWS
index f9a80c0..ae871f6 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,13 @@ NEWS for larch
These are the release notes for larch, a Python implementation of a
copy-on-write B-tree, designed by Odah Rodeh.
+Version 0.30, released UNRELEASED
+---------------------------------
+
+* `NodeStoreDisk` is now explicitly in read-only or read-write mode.
+ In read-only mode it does not replay or rollback to the journal, or
+ care about any changes made there.
+
Version 0.29, released 2012-04-15
---------------------------------
diff --git a/insert-remove-test b/insert-remove-test
index 141051d..dd6cecf 100755
--- a/insert-remove-test
+++ b/insert-remove-test
@@ -95,7 +95,7 @@ def main():
if os.path.exists(location):
raise Exception('%s exists already' % location)
os.mkdir(location)
- ns = larch.NodeStoreDisk(node_size, codec, dirname=location)
+ ns = larch.NodeStoreDisk(True, node_size, codec, dirname=location)
forest = larch.Forest(ns)
tree = forest.new_tree()
diff --git a/larch/__init__.py b/larch/__init__.py
index cc83b85..52b02d4 100644
--- a/larch/__init__.py
+++ b/larch/__init__.py
@@ -27,7 +27,7 @@ from refcountstore import RefcountStore
from lru import LRUCache
from uploadqueue import UploadQueue
from idpath import IdPath
-from journal import Journal
+from journal import Journal, ReadOnlyMode
from nodestore_disk import NodeStoreDisk, LocalFS, FormatProblem
from nodestore_memory import NodeStoreMemory
diff --git a/larch/forest.py b/larch/forest.py
index 75705e2..881fb86 100644
--- a/larch/forest.py
+++ b/larch/forest.py
@@ -149,8 +149,8 @@ class Forest(object):
self.node_store.commit()
-def open_forest(key_size=None, node_size=None, codec=None, node_store=None,
- **kwargs):
+def open_forest(allow_writes=None, key_size=None, node_size=None, codec=None,
+ node_store=None, **kwargs):
'''Create or open a forest.
``key_size`` and ``node_size`` are retrieved from the forest, unless
@@ -167,6 +167,8 @@ def open_forest(key_size=None, node_size=None, codec=None, node_store=None,
'''
tracing.trace('opening forest')
+
+ assert allow_writes is not None
codec = codec or larch.NodeCodec
node_store = node_store or larch.NodeStoreDisk
@@ -176,7 +178,7 @@ def open_forest(key_size=None, node_size=None, codec=None, node_store=None,
# For this, we can use any values for node and key sizes,
# since we won't be accessing nodes or keys.
c_temp = codec(42)
- ns_temp = node_store(42, c_temp, **kwargs)
+ ns_temp = node_store(False, 42, c_temp, **kwargs)
assert 'key_size' in ns_temp.get_metadata_keys()
assert 'node_size' in ns_temp.get_metadata_keys()
@@ -187,7 +189,7 @@ def open_forest(key_size=None, node_size=None, codec=None, node_store=None,
node_size = int(ns_temp.get_metadata('node_size'))
c = codec(key_size)
- ns = node_store(node_size, c, **kwargs)
+ ns = node_store(allow_writes, node_size, c, **kwargs)
def check_size(keyname, wanted, exception):
if keyname not in ns.get_metadata_keys():
diff --git a/larch/forest_tests.py b/larch/forest_tests.py
index e8d8a17..7603b86 100644
--- a/larch/forest_tests.py
+++ b/larch/forest_tests.py
@@ -132,50 +132,53 @@ class OpenForestTests(unittest.TestCase):
def test_creates_new_forest(self):
f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
- dirname=self.tempdir)
+ dirname=self.tempdir, allow_writes=True)
self.assertEqual(f.node_store.codec.key_bytes, self.key_size)
self.assertEqual(f.node_store.node_size, self.node_size)
def test_fail_if_existing_tree_has_incompatible_key_size(self):
f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
- dirname=self.tempdir)
+ dirname=self.tempdir, allow_writes=True)
f.commit()
self.assertRaises(larch.BadKeySize,
larch.open_forest,
key_size=self.key_size + 1,
node_size=self.node_size,
- dirname=self.tempdir)
+ dirname=self.tempdir,
+ allow_writes=True)
def test_opens_existing_tree_with_incompatible_node_size(self):
- f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
- dirname=self.tempdir)
+ f = larch.open_forest(allow_writes=True, key_size=self.key_size,
+ node_size=self.node_size, dirname=self.tempdir)
f.commit()
new_size = self.node_size + 1
f2 = larch.open_forest(key_size=self.key_size,
node_size=new_size,
- dirname=self.tempdir)
+ dirname=self.tempdir,
+ allow_writes=True)
self.assertEqual(int(f2.node_store.get_metadata('node_size')),
self.node_size)
def test_opens_existing_tree_with_compatible_key_and_node_size(self):
f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
- dirname=self.tempdir)
+ dirname=self.tempdir, allow_writes=True)
f.commit()
f2 = larch.open_forest(key_size=self.key_size,
node_size=self.node_size,
- dirname=self.tempdir)
+ dirname=self.tempdir,
+ allow_writes=True)
self.assert_(True)
def test_opens_existing_tree_without_node_and_key_sizes_given(self):
- f = larch.open_forest(key_size=self.key_size, node_size=self.node_size,
- dirname=self.tempdir)
+ f = larch.open_forest(allow_writes=True, key_size=self.key_size,
+ node_size=self.node_size, dirname=self.tempdir)
f.commit()
- f2 = larch.open_forest(dirname=self.tempdir)
+ f2 = larch.open_forest(dirname=self.tempdir, allow_writes=True)
self.assertEqual(f2.node_store.node_size, self.node_size)
self.assertEqual(f2.node_store.codec.key_bytes, self.key_size)
diff --git a/larch/journal.py b/larch/journal.py
index ff6498d..4ea1e9f 100644
--- a/larch/journal.py
+++ b/larch/journal.py
@@ -20,6 +20,12 @@ import os
import tracing
+class ReadOnlyMode(Exception): # pragma: no cover
+
+ def __str__(self):
+ return 'Larch B-tree is in read-only mode, no changes allowed'
+
+
class Journal(object):
'''A journal layer on top of a virtual filesystem.
@@ -34,18 +40,18 @@ class Journal(object):
* ``x`` is the real filename
* ``new/x`` is a new or modified file
- * ``delete/x`` is a deleted file, moved there immediately
+ * ``delete/x`` is a deleted file, created there as a flag file
Commit does this:
- * for every ``delete/x``, remove it
+ * for every ``delete/x``, remove ``x``
* for every ``new/x`` except ``new/metadata``, move to ``x``
* move ``new/metadata`` to ``metadata``
Rollback does this:
* remove every ``new/x``
- * move every ``delete/x`` to ``x``
+ * remove every ``delete/x``
When a journalled node store is opened, if ``new/metadata`` exists,
the commit happens. Otherwise a rollback happens. This guarantees
@@ -54,12 +60,17 @@ class Journal(object):
We only provide enough of a filesystem interface as is needed by
NodeStoreDisk. For example, we do not care about directory removal.
+ The journal can be opened in read-only mode, in which case it ignores
+ any changes in ``new`` and ``delete``, and does not try to rollback or
+ commit at start.
+
'''
flag_basename = 'metadata'
- def __init__(self, fs, storedir):
+ def __init__(self, allow_writes, fs, storedir):
logging.debug('Initializing Journal for %s' % storedir)
+ self.allow_writes = allow_writes
self.fs = fs
self.storedir = storedir
if not self.storedir.endswith(os.sep):
@@ -69,12 +80,18 @@ class Journal(object):
self.flag_file = os.path.join(self.storedir, self.flag_basename)
self.new_flag = os.path.join(self.newdir, self.flag_basename)
- if self.fs.exists(self.new_flag):
- logging.debug('Automatically committing remaining changes')
- self.commit()
- else:
- logging.debug('Automatically rolling back remaining changes')
- self.rollback()
+ if self.allow_writes:
+ if self.fs.exists(self.new_flag):
+ logging.debug('Automatically committing remaining changes')
+ self.commit()
+ else:
+ logging.debug('Automatically rolling back remaining changes')
+ self.rollback()
+
+ def _require_rw(self):
+ '''Raise error if modifications are not allowed.'''
+ if not self.allow_writes:
+ raise ReadOnlyMode()
def _relative(self, filename):
'''Return the part of filename that is relative to storedir.'''
@@ -88,29 +105,47 @@ class Journal(object):
def _deleted(self, filename):
'''Return name for temporary name for file to be deleted.'''
return os.path.join(self.deletedir, self._relative(filename))
+
+ def _realname(self, journaldir, filename):
+ '''Return real name for a file in a journal temporary directory.'''
+ assert filename.startswith(journaldir)
+ return os.path.join(self.storedir, filename[len(journaldir):])
def exists(self, filename):
- return (self.fs.exists(filename) or
- self.fs.exists(self._new(filename)))
+ if self.allow_writes:
+ new = self._new(filename)
+ deleted = self._deleted(filename)
+ if self.fs.exists(new):
+ return True
+ elif self.fs.exists(deleted):
+ return False
+ return self.fs.exists(filename)
def makedirs(self, dirname):
tracing.trace(dirname)
+ self._require_rw()
x = self._new(dirname)
self.fs.makedirs(x)
def overwrite_file(self, filename, contents):
tracing.trace(filename)
+ self._require_rw()
self.fs.overwrite_file(self._new(filename), contents)
def cat(self, filename):
- new = self._new(filename)
- if self.fs.exists(new):
- return self.fs.cat(new)
- else:
- return self.fs.cat(filename)
+ if self.allow_writes:
+ new = self._new(filename)
+ deleted = self._deleted(filename)
+ if self.fs.exists(new):
+ return self.fs.cat(new)
+ elif self.fs.exists(deleted):
+ raise OSError((errno.ENOENT, os.strerror(errno.ENOENT),
+ filename))
+ return self.fs.cat(filename)
def remove(self, filename):
tracing.trace(filename)
+ self._require_rw()
new = self._new(filename)
deleted = self._deleted(filename)
@@ -120,10 +155,34 @@ class Journal(object):
elif self.fs.exists(deleted):
raise OSError((errno.ENOENT, os.strerror(errno.ENOENT), filename))
else:
- dirname = os.path.dirname(deleted)
- if not self.fs.exists(dirname):
- self.fs.makedirs(dirname)
- self.fs.rename(filename, deleted)
+ self.fs.overwrite_file(deleted, '')
+
+ def list_files(self, dirname):
+ '''List all files.
+
+ Files only, no directories.
+
+ '''
+
+ assert not dirname.startswith(self.newdir)
+ assert not dirname.startswith(self.deletedir)
+
+ if self.allow_writes:
+ if self.fs.exists(dirname):
+ for x in self.climb(dirname, files_only=True):
+ if self.exists(x):
+ yield x
+ new = self._new(dirname)
+ if self.fs.exists(new):
+ for x in self.climb(new, files_only=True):
+ yield self._realname(self.newdir, x)
+ else:
+ if self.fs.exists(dirname):
+ for x in self.climb(dirname, files_only=True):
+ in_new = x.startswith(self.newdir)
+ in_deleted = x.startswith(self.deletedir)
+ if not in_new and not in_deleted:
+ yield x
def climb(self, dirname, files_only=False):
basenames = self.fs.listdir(dirname)
@@ -155,7 +214,7 @@ class Journal(object):
all_excludes = [dirname] + exclude
for pathname in self.climb(dirname):
if pathname not in all_excludes:
- r = os.path.join(self.storedir, pathname[len(dirname):])
+ r = self._realname(dirname, pathname)
parent = os.path.dirname(r)
if self.fs.isdir(pathname):
if not self.fs.exists(r):
@@ -169,20 +228,34 @@ class Journal(object):
def rollback(self):
tracing.trace('%s start' % self.storedir)
+ self._require_rw()
if self.fs.exists(self.newdir):
self._clear_directory(self.newdir)
if self.fs.exists(self.deletedir):
- self._vivify(self.deletedir, [])
+ self._clear_directory(self.deletedir)
tracing.trace('%s done' % self.storedir)
+ def _really_delete(self, deletedir):
+ tracing.trace(deletedir)
+ for pathname in self.climb(deletedir, files_only=True):
+ if pathname != deletedir:
+ realname = self._realname(deletedir, pathname)
+ try:
+ self.fs.remove(realname)
+ except OSError, e: # pragma: no cover
+ if e.errno not in (errno.ENOENT, errno.EISDIR):
+ raise
+ self.fs.remove(pathname)
+
def commit(self, skip=[]):
tracing.trace('%s start' % self.storedir)
+ self._require_rw()
if self.fs.exists(self.deletedir):
- self._clear_directory(self.deletedir)
+ self._really_delete(self.deletedir)
if self.fs.exists(self.newdir):
skip = [self._new(x) for x in skip]
diff --git a/larch/journal_tests.py b/larch/journal_tests.py
index 8d55b57..ddb5311 100644
--- a/larch/journal_tests.py
+++ b/larch/journal_tests.py
@@ -27,7 +27,7 @@ class JournalTests(unittest.TestCase):
def setUp(self):
self.tempdir = tempfile.mkdtemp()
self.fs = larch.LocalFS()
- self.j = larch.Journal(self.fs, self.tempdir)
+ self.j = larch.Journal(True, self.fs, self.tempdir)
def tearDown(self):
shutil.rmtree(self.tempdir)
@@ -92,6 +92,13 @@ class JournalTests(unittest.TestCase):
self.j.overwrite_file(filename, 'yo')
self.assertEqual(self.j.cat(filename), 'yo')
+ def test_cat_does_not_find_deleted_file(self):
+ filename = self.join('foo/bar')
+ self.j.overwrite_file(filename, 'bar')
+ self.j.commit()
+ self.j.remove(filename)
+ self.assertRaises(OSError, self.j.cat, filename)
+
def test_rollback_brings_back_old_file(self):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
@@ -162,7 +169,7 @@ class JournalTests(unittest.TestCase):
filename = self.join('foo/bar')
self.j.overwrite_file(filename, 'bar')
- j2 = larch.Journal(self.fs, self.tempdir)
+ j2 = larch.Journal(True, self.fs, self.tempdir)
self.assertFalse(j2.exists(filename))
def test_partial_commit_finished_by_new_instance(self):
@@ -172,6 +179,97 @@ class JournalTests(unittest.TestCase):
self.j.overwrite_file(metadata, '')
self.j.commit(skip=[filename])
- j2 = larch.Journal(self.fs, self.tempdir)
+ j2 = larch.Journal(True, self.fs, self.tempdir)
self.assertTrue(j2.exists(filename))
+
+class ReadOnlyJournalTests(unittest.TestCase):
+
+ def setUp(self):
+ self.tempdir = tempfile.mkdtemp()
+ self.fs = larch.LocalFS()
+ self.rw = larch.Journal(True, self.fs, self.tempdir)
+ self.ro = larch.Journal(False, self.fs, self.tempdir)
+
+ def tearDown(self):
+ shutil.rmtree(self.tempdir)
+
+ def join(self, *args):
+ return os.path.join(self.tempdir, *args)
+
+ def test_does_not_know_random_directory_initially(self):
+ self.assertFalse(self.ro.exists(self.join('foo')))
+
+ def test_creating_directory_raises_error(self):
+ self.assertRaises(larch.ReadOnlyMode, self.ro.makedirs, 'foo')
+
+ def test_calling_rollback_raises_error(self):
+ self.assertRaises(larch.ReadOnlyMode, self.ro.rollback)
+
+ def test_readonly_mode_does_not_check_for_directory_creation(self):
+ dirname = self.join('foo/bar')
+ self.rw.makedirs(dirname)
+ self.assertFalse(self.ro.exists(dirname))
+
+ def test_write_file_raisees_error(self):
+ self.assertRaises(larch.ReadOnlyMode,
+ self.ro.overwrite_file, 'foo', 'bar')
+
+ def test_readonly_mode_does_not_check_for_new_file(self):
+ filename = self.join('foo')
+ self.rw.overwrite_file(filename, 'bar')
+ self.assertFalse(self.ro.exists(filename))
+
+ def test_readonly_mode_does_not_check_for_modified_file(self):
+ filename = self.join('foo')
+ self.rw.overwrite_file(filename, 'first')
+ self.rw.commit()
+ self.assertEqual(self.ro.cat(filename), 'first')
+ self.rw.overwrite_file(filename, 'second')
+ self.assertEqual(self.ro.cat(filename), 'first')
+
+ def test_readonly_mode_does_not_know_file_is_deleted_in_journal(self):
+ filename = self.join('foo/bar')
+ self.rw.overwrite_file(filename, 'bar')
+ self.rw.commit()
+ self.rw.remove(filename)
+ self.assertEqual(self.ro.cat(filename), 'bar')
+
+ def tests_lists_no_files_initially(self):
+ dirname = self.join('foo')
+ self.assertEqual(list(self.ro.list_files(dirname)), [])
+
+ def test_lists_files_correctly_when_no_changes(self):
+ dirname = self.join('foo')
+ filename = self.join('foo/bar')
+ self.rw.overwrite_file(filename, 'bar')
+ self.rw.commit()
+ self.assertEqual(list(self.ro.list_files(dirname)), [filename])
+
+ def test_lists_added_file_correctly(self):
+ dirname = self.join('foo')
+ filename = self.join('foo/bar')
+ self.rw.overwrite_file(filename, 'bar')
+ self.assertEqual(list(self.rw.list_files(dirname)), [filename])
+ self.assertEqual(list(self.ro.list_files(dirname)), [])
+
+ def test_lists_added_file_correctly_when_dir_existed_already(self):
+ dirname = self.join('foo')
+ filename = self.join('foo/bar')
+ filename2 = self.join('foo/foobar')
+ self.rw.overwrite_file(filename, 'bar')
+ self.rw.commit()
+ self.rw.overwrite_file(filename2, 'yoyo')
+ self.assertEqual(sorted(list(self.rw.list_files(dirname))),
+ sorted([filename, filename2]))
+ self.assertEqual(list(self.ro.list_files(dirname)), [filename])
+
+ def test_lists_removed_file_correctly(self):
+ dirname = self.join('foo')
+ filename = self.join('foo/bar')
+ self.rw.overwrite_file(filename, 'bar')
+ self.rw.commit()
+ self.rw.remove(filename)
+ self.assertEqual(list(self.rw.list_files(dirname)), [])
+ self.assertEqual(list(self.ro.list_files(dirname)), [filename])
+
diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py
index 01cae30..cece8d4 100644
--- a/larch/nodestore_disk.py
+++ b/larch/nodestore_disk.py
@@ -112,8 +112,8 @@ class NodeStoreDisk(larch.NodeStore):
nodedir = 'nodes'
- def __init__(self, node_size, codec, dirname=None, upload_max=1024,
- lru_size=500, vfs=None, format=None):
+ def __init__(self, allow_writes, node_size, codec, dirname=None,
+ upload_max=1024, lru_size=500, vfs=None, format=None):
tracing.trace('new NodeStoreDisk: %s', dirname)
assert dirname is not None
if format is not None:
@@ -130,7 +130,7 @@ class NodeStoreDisk(larch.NodeStore):
self.upload_queue = larch.UploadQueue(self._really_put_node,
self.upload_max)
self.vfs = vfs if vfs != None else LocalFS()
- self.journal = larch.Journal(self.vfs, dirname)
+ self.journal = larch.Journal(allow_writes, self.vfs, dirname)
self.idpath = larch.IdPath(os.path.join(dirname, self.nodedir),
DIR_DEPTH, DIR_BITS, DIR_SKIP)
@@ -263,7 +263,7 @@ class NodeStoreDisk(larch.NodeStore):
nodedir = os.path.join(self.dirname, self.nodedir)
uploaded = []
if self.journal.exists(nodedir):
- for filename in self.journal.climb(nodedir, files_only=True):
+ for filename in self.journal.list_files(nodedir):
uploaded.append(int(os.path.basename(filename), 16))
return queued + uploaded
diff --git a/larch/nodestore_disk_tests.py b/larch/nodestore_disk_tests.py
index ba21270..7f22c31 100644
--- a/larch/nodestore_disk_tests.py
+++ b/larch/nodestore_disk_tests.py
@@ -35,9 +35,9 @@ class NodeStoreDiskTests(unittest.TestCase, larch.NodeStoreTests):
shutil.rmtree(self.tempdir)
def new_ns(self, format=None):
- return nodestore_disk.NodeStoreDisk(self.node_size, self.codec,
- dirname=self.tempdir,
- format=format)
+ return nodestore_disk.NodeStoreDisk(True, self.node_size, self.codec,
+ dirname=self.tempdir,
+ format=format)
def test_metadata_has_format_version(self):
self.assertEqual(self.ns.get_metadata('format'),