summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2011-06-10 09:25:06 +0100
committerLars Wirzenius <liw@liw.fi>2011-06-10 09:25:06 +0100
commit13d1c7a27c1766fd83b68d6d51e2331043433f6a (patch)
treeda8a8023c07347e09e286b739279646084011877
parente034337e5871c4dc868b5adfcde418fe009cd7ac (diff)
downloadlarch-13d1c7a27c1766fd83b68d6d51e2331043433f6a.tar.gz
Move separate LRU cache implementation into larch.
PyPI has a bunch of other implementations, so there's no point in my adding more. On the other hand, I don't want to rely on those, since I'd have to adapt larch code to use them. I can do that later, to reduce the amount of code I have in larch.
-rw-r--r--doc/index.rst1
-rw-r--r--doc/lru.rst7
-rw-r--r--larch/__init__.py1
-rw-r--r--larch/lru.py138
-rw-r--r--larch/lru_tests.py111
-rw-r--r--larch/nodestore_disk.py3
-rw-r--r--larch/nodestore_disk_tests.py3
-rw-r--r--larch/refcountstore.py1
-rw-r--r--larch/refcountstore_tests.py1
-rw-r--r--larch/uploadqueue.py3
-rw-r--r--larch/uploadqueue_tests.py1
11 files changed, 261 insertions, 9 deletions
diff --git a/doc/index.rst b/doc/index.rst
index 8f7974d..6b257b6 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -37,6 +37,7 @@ available as ``larch.bar``.
nodestore
refcountstore
uploadqueue
+ lru
Quick start
diff --git a/doc/lru.rst b/doc/lru.rst
new file mode 100644
index 0000000..82a23fd
--- /dev/null
+++ b/doc/lru.rst
@@ -0,0 +1,7 @@
+Least recently used object cache
+================================
+
+.. automodule:: larch.lru
+ :members:
+ :undoc-members:
+
diff --git a/larch/__init__.py b/larch/__init__.py
index 0c561d0..c34188e 100644
--- a/larch/__init__.py
+++ b/larch/__init__.py
@@ -24,6 +24,7 @@ from forest import Forest, open_forest, BadKeySize, BadNodeSize
from nodestore import (NodeStore, NodeStoreTests, NodeMissing, NodeTooBig,
NodeExists, NodeCannotBeModified)
from refcountstore import RefcountStore
+from lru import LRUCache
from uploadqueue import UploadQueue
from nodestore_disk import NodeStoreDisk, LocalFS
from nodestore_memory import NodeStoreMemory
diff --git a/larch/lru.py b/larch/lru.py
new file mode 100644
index 0000000..5531615
--- /dev/null
+++ b/larch/lru.py
@@ -0,0 +1,138 @@
+# Copyright 2010 Lars Wirzenius, Richard Braakman
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import heapq
+import logging
+
+
+class LRUCache(object):
+
+ '''A least-recently-used cache.
+
+ This class caches objects, based on keys. The cache has a fixed size,
+ in number of objects. When a new object is added to the cache, the
+ least recently used old object is dropped. Each object is associated
+ with a key, and use is defined as retrieval of the object using the key.
+
+ Two hooks are provided for: for removing an object by user request,
+ and when it is automatically removed due to cache overflow. Either
+ hook is called with the key and object as arguments.
+
+ '''
+
+ def __init__(self, max_size, remove_hook=None, forget_hook=None):
+ self.max_size = max_size
+ # Together, obj_before and obj_after form a random access
+ # double-linked sequence. None used as the sentinel on both ends.
+ self.obj_before = dict()
+ self.obj_after = dict()
+ self.obj_before[None] = None
+ self.obj_after[None] = None
+ self.ids = dict() # maps key to object
+ self.objs = dict() # maps object to key
+ self.remove_hook = remove_hook
+ self.forget_hook = forget_hook
+ self.hits = 0
+ self.misses = 0
+
+ def __del__(self):
+ logging.info('LRUCache %s: hits=%s misses=%s' %
+ (self, self.hits, self.misses))
+
+ def __len__(self):
+ return len(self.ids)
+
+ def keys(self):
+ '''List keys for objects in cache.'''
+ return self.ids.keys()
+
+ def add(self, key, obj):
+ '''Add new item to cache.'''
+ if key in self.ids:
+ self.remove(key)
+ before = self.obj_before[None]
+ self.obj_before[None] = obj
+ self.obj_before[obj] = before
+ self.obj_after[before] = obj
+ self.obj_after[obj] = None
+ self.ids[key] = obj
+ self.objs[obj] = key
+ while len(self.ids) > self.max_size:
+ self._forget_oldest()
+
+ def _forget_oldest(self):
+ obj = self.obj_after[None]
+ key = self.objs[obj]
+ self._remove(key)
+ if self.forget_hook:
+ self.forget_hook(key, obj)
+
+ def _remove(self, key):
+ obj = self.ids[key]
+ before = self.obj_before[obj]
+ after = self.obj_after[obj]
+ self.obj_before[after] = before
+ self.obj_after[before] = after
+ del self.obj_before[obj]
+ del self.obj_after[obj]
+ del self.ids[key]
+ del self.objs[obj]
+
+ def get(self, key):
+ '''Retrieve item from cache.
+
+ Return object associated with key, or None.
+
+ '''
+
+ if key in self.ids:
+ self.hits += 1
+ obj = self.ids[key]
+ self.remove(key)
+ self.add(key, obj)
+ return obj
+ else:
+ self.misses += 1
+ return None
+
+ def remove(self, key):
+ '''Remove an item from the cache.
+
+ Return True if item was in cache, False otherwise.
+
+ '''
+
+ if key in self.ids:
+ obj = self.ids[key]
+ self._remove(key)
+ if self.remove_hook:
+ self.remove_hook(key, obj)
+ return True
+ else:
+ return False
+
+ def remove_oldest(self):
+ '''Remove oldest object.
+
+ Return key and object.
+
+ '''
+
+ obj = self.obj_after[None]
+ key = self.objs[obj]
+ self.remove(key)
+ return key, obj
+
diff --git a/larch/lru_tests.py b/larch/lru_tests.py
new file mode 100644
index 0000000..c749ff9
--- /dev/null
+++ b/larch/lru_tests.py
@@ -0,0 +1,111 @@
+# Copyright 2010 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import unittest
+
+import larch
+
+
+class LRUCacheTests(unittest.TestCase):
+
+ def setUp(self):
+ self.cache = larch.LRUCache(4)
+ self.cache.remove_hook = self.remove_hook
+ self.cache.forget_hook = self.forget_hook
+ self.removed = []
+ self.forgotten = []
+
+ def remove_hook(self, key, obj):
+ self.removed.append((key, obj))
+
+ def forget_hook(self, key, obj):
+ self.forgotten.append((key, obj))
+
+ def test_does_not_have_remove_hook_initially(self):
+ cache = larch.LRUCache(4)
+ self.assertEqual(cache.remove_hook, None)
+
+ def test_sets_remove_hook_via_init(self):
+ cache = larch.LRUCache(4, remove_hook=self.remove_hook)
+ self.assertEqual(cache.remove_hook, self.remove_hook)
+
+ def test_does_not_have_forget_hook_initially(self):
+ cache = larch.LRUCache(4)
+ self.assertEqual(cache.forget_hook, None)
+
+ def test_sets_forget_hook_via_init(self):
+ cache = larch.LRUCache(4, forget_hook=self.forget_hook)
+ self.assertEqual(cache.forget_hook, self.forget_hook)
+
+ def test_does_not_contain_object_initially(self):
+ self.assertEqual(self.cache.get('foo'), None)
+
+ def test_does_contain_object_after_it_is_added(self):
+ self.cache.add('foo', 'bar')
+ self.assertEqual(self.cache.get('foo'), 'bar')
+
+ def test_oldest_object_dropped_first(self):
+ for i in range(self.cache.max_size + 1):
+ self.cache.add(i, i)
+ self.assertEqual(self.cache.get(0), None)
+ self.assertEqual(self.forgotten, [(0, 0)])
+ for i in range(1, self.cache.max_size + 1):
+ self.assertEqual(self.cache.get(i), i)
+
+ def test_getting_object_prevents_it_from_being_dropped(self):
+ for i in range(self.cache.max_size + 1):
+ self.cache.add(i, i)
+ self.cache.get(0)
+ self.assertEqual(self.cache.get(1), None)
+ self.assertEqual(self.forgotten, [(1, 1)])
+ for i in [0] + range(2, self.cache.max_size + 1):
+ self.assertEqual(self.cache.get(i), i)
+
+ def test_adding_key_twice_changes_object(self):
+ self.cache.add('foo', 'foo')
+ self.cache.add('foo', 'bar')
+ self.assertEqual(self.cache.get('foo'), 'bar')
+
+ def test_removes_object(self):
+ self.cache.add('foo', 'bar')
+ gotit = self.cache.remove('foo')
+ self.assertEqual(gotit, True)
+ self.assertEqual(self.cache.get('foo'), None)
+ self.assertEqual(self.removed, [('foo', 'bar')])
+
+ def test_remove_returns_False_for_unknown_object(self):
+ self.assertEqual(self.cache.remove('foo'), False)
+
+ def test_removes_oldest_object(self):
+ self.cache.add(0, 0)
+ self.cache.add(1, 1)
+ self.assertEqual(self.cache.remove_oldest(), (0, 0))
+ self.assertEqual(self.cache.get(0), None)
+
+ def test_length_is_initially_zero(self):
+ self.assertEqual(len(self.cache), 0)
+
+ def test_length_is_correct_after_adds(self):
+ self.cache.add(0, 0)
+ self.assertEqual(len(self.cache), 1)
+
+ def test_has_initially_no_keys(self):
+ self.assertEqual(self.cache.keys(), [])
+
+ def test_has_keys_after_add(self):
+ self.cache.add(0, 1)
+ self.assertEqual(self.cache.keys(), [0])
+
diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py
index 3dd7e3d..ca04262 100644
--- a/larch/nodestore_disk.py
+++ b/larch/nodestore_disk.py
@@ -16,7 +16,6 @@
import ConfigParser
import logging
-import lru
import os
import StringIO
import struct
@@ -88,7 +87,7 @@ class NodeStoreDisk(larch.NodeStore):
self.metadata_name = os.path.join(dirname, 'metadata')
self.metadata = None
self.rs = larch.RefcountStore(self)
- self.cache = lru.LRUCache(lru_size)
+ self.cache = larch.LRUCache(lru_size)
self.upload_max = upload_max
self.upload_queue = larch.UploadQueue(self._really_put_node,
self.upload_max)
diff --git a/larch/nodestore_disk_tests.py b/larch/nodestore_disk_tests.py
index 8da2063..ac24996 100644
--- a/larch/nodestore_disk_tests.py
+++ b/larch/nodestore_disk_tests.py
@@ -14,7 +14,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import lru
import os
import shutil
import tempfile
@@ -68,7 +67,7 @@ class NodeStoreDiskTests(unittest.TestCase, larch.NodeStoreTests):
def test_puts_and_gets_same_with_cache_emptied(self):
node = larch.LeafNode(0, [], [])
self.ns.put_node(node)
- self.ns.cache = lru.LRUCache(100)
+ self.ns.cache = larch.LRUCache(100)
self.assertEqualNodes(self.ns.get_node(0), node)
def test_put_uploads_queue_overflow(self):
diff --git a/larch/refcountstore.py b/larch/refcountstore.py
index 73ba654..aaf01f0 100644
--- a/larch/refcountstore.py
+++ b/larch/refcountstore.py
@@ -16,7 +16,6 @@
import ConfigParser
import logging
-import lru
import os
import StringIO
import struct
diff --git a/larch/refcountstore_tests.py b/larch/refcountstore_tests.py
index f631856..9f5c869 100644
--- a/larch/refcountstore_tests.py
+++ b/larch/refcountstore_tests.py
@@ -14,7 +14,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import lru
import os
import shutil
import tempfile
diff --git a/larch/uploadqueue.py b/larch/uploadqueue.py
index 64abb4a..cacf43a 100644
--- a/larch/uploadqueue.py
+++ b/larch/uploadqueue.py
@@ -16,7 +16,6 @@
import ConfigParser
import logging
-import lru
import os
import StringIO
import struct
@@ -44,7 +43,7 @@ class UploadQueue(object):
def __init__(self, really_put, max_length):
self.really_put = really_put
- self.lru = lru.LRUCache(max_length, forget_hook=self._push_oldest)
+ self.lru = larch.LRUCache(max_length, forget_hook=self._push_oldest)
def put(self, node):
'''Put a node into the queue.'''
diff --git a/larch/uploadqueue_tests.py b/larch/uploadqueue_tests.py
index 8faecb5..70bee5c 100644
--- a/larch/uploadqueue_tests.py
+++ b/larch/uploadqueue_tests.py
@@ -14,7 +14,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import lru
import os
import shutil
import tempfile