summaryrefslogtreecommitdiff
path: root/larch/nodes.py
blob: e480fff81f2706e681d27385a850f41394ae79db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# Copyright 2010  Lars Wirzenius
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import bisect

import larch


class FrozenNode(larch.Error):

    '''User tried to modify node that is frozen.'''

    def __init__(self, node):
        self.msg = 'Node %s is frozen against modifications' % node.id


class Node(object):

    '''Abstract base class for index and leaf nodes.
    
    A node may be initialized with a list of (key, value) pairs. For
    leaf nodes, the values are the actual values. For index nodes, they
    are references to other nodes.
    
    A node can be indexed using keys, and give the corresponding value.
    Setting key/value pairs cannot be done using indexing. However,
    ``key in node`` does work, as does iteration over a key's values.
    ``len(node)`` returns the number if keys.
    
    Two nodes compare equal if they have the same key/value pairs.
    The node ids do not need to match.
    
    Nodes can be modified, bt only if the ``frozen`` property is false.
    If it is set to true, any attempt at modifying the node causes
    the ``FrozenNode`` exception to be raised.
    
    '''

    def __init__(self, node_id, keys, values):
        self._keys = list(keys)
        self._values = list(values)
        self._dict = dict()
        for i in range(len(keys)):
            self._dict[keys[i]] = values[i]
        self.id = node_id
        self.size = None
        self.frozen = False

    def __getitem__(self, key):
        return self._dict[key]

    def __contains__(self, key):
        return key in self._dict

    def __eq__(self, other):
        return self._keys == other._keys and self._values == other._values

    def __iter__(self):
        for key in self._keys:
            yield key

    def __len__(self):
        return len(self._keys)

    def __nonzero__(self):
        return True

    def keys(self):
        '''Return keys in the node, sorted.'''
        return self._keys

    def values(self):
        '''Return value in the node, in same order as keys.'''
        return self._values

    def first_key(self):
        '''Return smallest key in the node.'''
        return self._keys[0]

    def find_potential_range(self, minkey, maxkey):
        '''Find pairs whose key is in desired range.

        ``minkey`` and ``maxkey`` are inclusive.

        We take into account that for index nodes, a child's key
        really represents a range of keys, from the key up to (but
        not including) the next child's key. The last child's key
        represents a range up to infinity.

        Thus we return the first child, if its key lies between
        ``minkey`` and ``maxkey``, and the last child, if its key is at most
        ``maxkey``.

        '''
        
        def helper(key, default):
            x = bisect.bisect_left(self._keys, key)
            if x < len(self._keys):
                if self._keys[x] > key:
                    if x == 0:
                        x = default
                    else:
                        x -= 1
            else:
                if x == 0:
                    x = None
                else:
                    x -= 1
            return x

        i = helper(minkey, 0)
        j = helper(maxkey, None)
        if j is None:
            i = None

        return i, j

    def _error_if_frozen(self):
        if self.frozen:
            raise FrozenNode(self)

    def add(self, key, value):
        '''Insert a key/value pair into the right place in a node.'''

        self._error_if_frozen()

        i = bisect.bisect_left(self._keys, key)
        if i < len(self._keys) and self._keys[i] == key:
            self._keys[i] = key
            self._values[i] = value
        else:
            self._keys.insert(i, key)
            self._values.insert(i, value)

        self._dict[key] = value
        self.size = None

    def remove(self, key):
        '''Remove a key from the node.
        
        Raise KeyError if key does not exist in node.
        
        '''
        
        self._error_if_frozen()

        i = bisect.bisect_left(self._keys, key)
        if i >= len(self._keys) or self._keys[i] != key:
            raise KeyError(key)
        del self._keys[i]
        del self._values[i]
        del self._dict[key]
        self.size = None
        
    def remove_index_range(self, lo, hi):
        '''Remove keys given a range of indexes into pairs.
        
        lo and hi are inclusive.
        
        '''
        
        self._error_if_frozen()

        del self._keys[lo:hi+1]
        del self._values[lo:hi+1]
        self.size = None


class LeafNode(Node):

    '''Leaf node in the tree.
    
    A leaf node contains key/value pairs (both strings), and has no children.
    
    '''

    def find_keys_in_range(self, minkey, maxkey):
        '''Find pairs whose key is in desired range.
        
        ``minkey`` and ``maxkey`` are inclusive.
        
        '''
        
        i = bisect.bisect_left(self._keys, minkey)
        j = bisect.bisect_left(self._keys, maxkey)
        if j < len(self._keys) and self._keys[j] == maxkey:
            j += 1
        return self._keys[i:j]


class IndexNode(Node):

    '''Index node in the tree.
    
    An index node contains pairs of keys and references to other nodes
    (node ids, which are integers).
    The other nodes may be either index nodes or leaf nodes.
    
    '''

    def find_key_for_child_containing(self, key):
        '''Return key for the child that contains ``key``.'''

        i = bisect.bisect_left(self._keys, key)
        if i < len(self._keys):
            if self._keys[i] == key:
                return key
            elif i:
                return self._keys[i-1]
        elif i:
            return self._keys[i-1]

    def find_children_in_range(self, minkey, maxkey):
        '''Find all children whose key is in the range.
        
        ``minkey`` and ``maxkey`` are inclusive. Note that a child might
        be returned even if not all of its keys are in the range,
        just some of them. Also, we consider potential keys here,
        not actual keys. We have no way to retrieve the children
        to check which keys they actually have, so instead we
        return which keys might have the desired keys, and the
        caller can go look at those.
        
        '''
        
        i, j = self.find_potential_range(minkey, maxkey)
        if i is not None and j is not None:
            return self._values[i:j+1]
        else:
            return []