diff options
author | Lars Wirzenius <liw@liw.fi> | 2011-06-10 09:25:06 +0100 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2011-06-10 09:25:06 +0100 |
commit | 13d1c7a27c1766fd83b68d6d51e2331043433f6a (patch) | |
tree | da8a8023c07347e09e286b739279646084011877 | |
parent | e034337e5871c4dc868b5adfcde418fe009cd7ac (diff) | |
download | larch-13d1c7a27c1766fd83b68d6d51e2331043433f6a.tar.gz |
Move separate LRU cache implementation into larch.
PyPI has a bunch of other implementations, so there's no
point in my adding more. On the other hand, I don't want
to rely on those, since I'd have to adapt larch code to
use them. I can do that later, to reduce the amount of
code I have in larch.
-rw-r--r-- | doc/index.rst | 1 | ||||
-rw-r--r-- | doc/lru.rst | 7 | ||||
-rw-r--r-- | larch/__init__.py | 1 | ||||
-rw-r--r-- | larch/lru.py | 138 | ||||
-rw-r--r-- | larch/lru_tests.py | 111 | ||||
-rw-r--r-- | larch/nodestore_disk.py | 3 | ||||
-rw-r--r-- | larch/nodestore_disk_tests.py | 3 | ||||
-rw-r--r-- | larch/refcountstore.py | 1 | ||||
-rw-r--r-- | larch/refcountstore_tests.py | 1 | ||||
-rw-r--r-- | larch/uploadqueue.py | 3 | ||||
-rw-r--r-- | larch/uploadqueue_tests.py | 1 |
11 files changed, 261 insertions, 9 deletions
diff --git a/doc/index.rst b/doc/index.rst index 8f7974d..6b257b6 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -37,6 +37,7 @@ available as ``larch.bar``. nodestore refcountstore uploadqueue + lru Quick start diff --git a/doc/lru.rst b/doc/lru.rst new file mode 100644 index 0000000..82a23fd --- /dev/null +++ b/doc/lru.rst @@ -0,0 +1,7 @@ +Least recently used object cache +================================ + +.. automodule:: larch.lru + :members: + :undoc-members: + diff --git a/larch/__init__.py b/larch/__init__.py index 0c561d0..c34188e 100644 --- a/larch/__init__.py +++ b/larch/__init__.py @@ -24,6 +24,7 @@ from forest import Forest, open_forest, BadKeySize, BadNodeSize from nodestore import (NodeStore, NodeStoreTests, NodeMissing, NodeTooBig, NodeExists, NodeCannotBeModified) from refcountstore import RefcountStore +from lru import LRUCache from uploadqueue import UploadQueue from nodestore_disk import NodeStoreDisk, LocalFS from nodestore_memory import NodeStoreMemory diff --git a/larch/lru.py b/larch/lru.py new file mode 100644 index 0000000..5531615 --- /dev/null +++ b/larch/lru.py @@ -0,0 +1,138 @@ +# Copyright 2010 Lars Wirzenius, Richard Braakman +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import heapq +import logging + + +class LRUCache(object): + + '''A least-recently-used cache. + + This class caches objects, based on keys. The cache has a fixed size, + in number of objects. When a new object is added to the cache, the + least recently used old object is dropped. Each object is associated + with a key, and use is defined as retrieval of the object using the key. + + Two hooks are provided for: for removing an object by user request, + and when it is automatically removed due to cache overflow. Either + hook is called with the key and object as arguments. + + ''' + + def __init__(self, max_size, remove_hook=None, forget_hook=None): + self.max_size = max_size + # Together, obj_before and obj_after form a random access + # double-linked sequence. None used as the sentinel on both ends. + self.obj_before = dict() + self.obj_after = dict() + self.obj_before[None] = None + self.obj_after[None] = None + self.ids = dict() # maps key to object + self.objs = dict() # maps object to key + self.remove_hook = remove_hook + self.forget_hook = forget_hook + self.hits = 0 + self.misses = 0 + + def __del__(self): + logging.info('LRUCache %s: hits=%s misses=%s' % + (self, self.hits, self.misses)) + + def __len__(self): + return len(self.ids) + + def keys(self): + '''List keys for objects in cache.''' + return self.ids.keys() + + def add(self, key, obj): + '''Add new item to cache.''' + if key in self.ids: + self.remove(key) + before = self.obj_before[None] + self.obj_before[None] = obj + self.obj_before[obj] = before + self.obj_after[before] = obj + self.obj_after[obj] = None + self.ids[key] = obj + self.objs[obj] = key + while len(self.ids) > self.max_size: + self._forget_oldest() + + def _forget_oldest(self): + obj = self.obj_after[None] + key = self.objs[obj] + self._remove(key) + if self.forget_hook: + self.forget_hook(key, obj) + + def _remove(self, key): + obj = self.ids[key] + before = self.obj_before[obj] + after = self.obj_after[obj] + self.obj_before[after] = before + self.obj_after[before] = after + del self.obj_before[obj] + del self.obj_after[obj] + del self.ids[key] + del self.objs[obj] + + def get(self, key): + '''Retrieve item from cache. + + Return object associated with key, or None. + + ''' + + if key in self.ids: + self.hits += 1 + obj = self.ids[key] + self.remove(key) + self.add(key, obj) + return obj + else: + self.misses += 1 + return None + + def remove(self, key): + '''Remove an item from the cache. + + Return True if item was in cache, False otherwise. + + ''' + + if key in self.ids: + obj = self.ids[key] + self._remove(key) + if self.remove_hook: + self.remove_hook(key, obj) + return True + else: + return False + + def remove_oldest(self): + '''Remove oldest object. + + Return key and object. + + ''' + + obj = self.obj_after[None] + key = self.objs[obj] + self.remove(key) + return key, obj + diff --git a/larch/lru_tests.py b/larch/lru_tests.py new file mode 100644 index 0000000..c749ff9 --- /dev/null +++ b/larch/lru_tests.py @@ -0,0 +1,111 @@ +# Copyright 2010 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import unittest + +import larch + + +class LRUCacheTests(unittest.TestCase): + + def setUp(self): + self.cache = larch.LRUCache(4) + self.cache.remove_hook = self.remove_hook + self.cache.forget_hook = self.forget_hook + self.removed = [] + self.forgotten = [] + + def remove_hook(self, key, obj): + self.removed.append((key, obj)) + + def forget_hook(self, key, obj): + self.forgotten.append((key, obj)) + + def test_does_not_have_remove_hook_initially(self): + cache = larch.LRUCache(4) + self.assertEqual(cache.remove_hook, None) + + def test_sets_remove_hook_via_init(self): + cache = larch.LRUCache(4, remove_hook=self.remove_hook) + self.assertEqual(cache.remove_hook, self.remove_hook) + + def test_does_not_have_forget_hook_initially(self): + cache = larch.LRUCache(4) + self.assertEqual(cache.forget_hook, None) + + def test_sets_forget_hook_via_init(self): + cache = larch.LRUCache(4, forget_hook=self.forget_hook) + self.assertEqual(cache.forget_hook, self.forget_hook) + + def test_does_not_contain_object_initially(self): + self.assertEqual(self.cache.get('foo'), None) + + def test_does_contain_object_after_it_is_added(self): + self.cache.add('foo', 'bar') + self.assertEqual(self.cache.get('foo'), 'bar') + + def test_oldest_object_dropped_first(self): + for i in range(self.cache.max_size + 1): + self.cache.add(i, i) + self.assertEqual(self.cache.get(0), None) + self.assertEqual(self.forgotten, [(0, 0)]) + for i in range(1, self.cache.max_size + 1): + self.assertEqual(self.cache.get(i), i) + + def test_getting_object_prevents_it_from_being_dropped(self): + for i in range(self.cache.max_size + 1): + self.cache.add(i, i) + self.cache.get(0) + self.assertEqual(self.cache.get(1), None) + self.assertEqual(self.forgotten, [(1, 1)]) + for i in [0] + range(2, self.cache.max_size + 1): + self.assertEqual(self.cache.get(i), i) + + def test_adding_key_twice_changes_object(self): + self.cache.add('foo', 'foo') + self.cache.add('foo', 'bar') + self.assertEqual(self.cache.get('foo'), 'bar') + + def test_removes_object(self): + self.cache.add('foo', 'bar') + gotit = self.cache.remove('foo') + self.assertEqual(gotit, True) + self.assertEqual(self.cache.get('foo'), None) + self.assertEqual(self.removed, [('foo', 'bar')]) + + def test_remove_returns_False_for_unknown_object(self): + self.assertEqual(self.cache.remove('foo'), False) + + def test_removes_oldest_object(self): + self.cache.add(0, 0) + self.cache.add(1, 1) + self.assertEqual(self.cache.remove_oldest(), (0, 0)) + self.assertEqual(self.cache.get(0), None) + + def test_length_is_initially_zero(self): + self.assertEqual(len(self.cache), 0) + + def test_length_is_correct_after_adds(self): + self.cache.add(0, 0) + self.assertEqual(len(self.cache), 1) + + def test_has_initially_no_keys(self): + self.assertEqual(self.cache.keys(), []) + + def test_has_keys_after_add(self): + self.cache.add(0, 1) + self.assertEqual(self.cache.keys(), [0]) + diff --git a/larch/nodestore_disk.py b/larch/nodestore_disk.py index 3dd7e3d..ca04262 100644 --- a/larch/nodestore_disk.py +++ b/larch/nodestore_disk.py @@ -16,7 +16,6 @@ import ConfigParser import logging -import lru import os import StringIO import struct @@ -88,7 +87,7 @@ class NodeStoreDisk(larch.NodeStore): self.metadata_name = os.path.join(dirname, 'metadata') self.metadata = None self.rs = larch.RefcountStore(self) - self.cache = lru.LRUCache(lru_size) + self.cache = larch.LRUCache(lru_size) self.upload_max = upload_max self.upload_queue = larch.UploadQueue(self._really_put_node, self.upload_max) diff --git a/larch/nodestore_disk_tests.py b/larch/nodestore_disk_tests.py index 8da2063..ac24996 100644 --- a/larch/nodestore_disk_tests.py +++ b/larch/nodestore_disk_tests.py @@ -14,7 +14,6 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -import lru import os import shutil import tempfile @@ -68,7 +67,7 @@ class NodeStoreDiskTests(unittest.TestCase, larch.NodeStoreTests): def test_puts_and_gets_same_with_cache_emptied(self): node = larch.LeafNode(0, [], []) self.ns.put_node(node) - self.ns.cache = lru.LRUCache(100) + self.ns.cache = larch.LRUCache(100) self.assertEqualNodes(self.ns.get_node(0), node) def test_put_uploads_queue_overflow(self): diff --git a/larch/refcountstore.py b/larch/refcountstore.py index 73ba654..aaf01f0 100644 --- a/larch/refcountstore.py +++ b/larch/refcountstore.py @@ -16,7 +16,6 @@ import ConfigParser import logging -import lru import os import StringIO import struct diff --git a/larch/refcountstore_tests.py b/larch/refcountstore_tests.py index f631856..9f5c869 100644 --- a/larch/refcountstore_tests.py +++ b/larch/refcountstore_tests.py @@ -14,7 +14,6 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -import lru import os import shutil import tempfile diff --git a/larch/uploadqueue.py b/larch/uploadqueue.py index 64abb4a..cacf43a 100644 --- a/larch/uploadqueue.py +++ b/larch/uploadqueue.py @@ -16,7 +16,6 @@ import ConfigParser import logging -import lru import os import StringIO import struct @@ -44,7 +43,7 @@ class UploadQueue(object): def __init__(self, really_put, max_length): self.really_put = really_put - self.lru = lru.LRUCache(max_length, forget_hook=self._push_oldest) + self.lru = larch.LRUCache(max_length, forget_hook=self._push_oldest) def put(self, node): '''Put a node into the queue.''' diff --git a/larch/uploadqueue_tests.py b/larch/uploadqueue_tests.py index 8faecb5..70bee5c 100644 --- a/larch/uploadqueue_tests.py +++ b/larch/uploadqueue_tests.py @@ -14,7 +14,6 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. -import lru import os import shutil import tempfile |