diff options
author | Lars Wirzenius <liw@liw.fi> | 2015-11-01 17:12:50 +0200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2015-11-01 17:12:50 +0200 |
commit | 1cacf69c7e969688e5edfc8a418dd8c7e9f6bcbd (patch) | |
tree | d0f6c6cf668145611a7d68e1d7268fa8a54d197e | |
parent | 310ce5cd9a8e49826f306a21856d4f33a393ac1f (diff) | |
download | obnam-1cacf69c7e969688e5edfc8a418dd8c7e9f6bcbd.tar.gz |
Encode a list of strings specially
Dict keys are strings, handling them specially makes things go
a bit faster. ./serialise-speed went from 71/s to 97/s.
-rw-r--r-- | obnamlib/obj_serialiser.py | 44 |
1 files changed, 38 insertions, 6 deletions
diff --git a/obnamlib/obj_serialiser.py b/obnamlib/obj_serialiser.py index 3bcdb470..0eb70b5a 100644 --- a/obnamlib/obj_serialiser.py +++ b/obnamlib/obj_serialiser.py @@ -146,22 +146,54 @@ def _next_object(pos, length): # Dicts. def _serialise_dict(obj): - pairs = ''.join( - _serialise_str(key) + serialise_object(value) - for key, value in obj.iteritems()) - return _DICT + _serialise_length(len(pairs)) + pairs + keys = obj.keys() + str_keys = [key for key in keys if type(obj[key]) is str] + other_keys = [key for key in keys if key not in str_keys] + parts = [] + parts.append(_serialise_str_list(str_keys)) + parts.append(_serialise_str_list([obj[key] for key in str_keys])) + parts.append(_serialise_str_list(other_keys)) + for key in other_keys: + parts.append(serialise_object(obj[key])) + + encoded = ''.join(parts) + return _DICT + _serialise_length(len(encoded)) + encoded def _deserialise_dict(serialised): result = {} pos = 0 - while pos < len(serialised): - key, pos = _deserialise_prefix(serialised, pos) + + str_keys, pos = _deserialise_str_list(serialised, pos) + str_values, pos = _deserialise_str_list(serialised, pos) + result.update(zip(str_keys, str_values)) + + other_keys, pos = _deserialise_str_list(serialised, pos) + for key in other_keys: value, pos = _deserialise_prefix(serialised, pos) result[key] = value + return result +def _serialise_str_list(strings): + n = len(strings) + encoded_lengths = struct.pack('!' + 'Q' * n, *[len(s) for s in strings]) + return _serialise_integer(n) + encoded_lengths + ''.join(strings) + + +def _deserialise_str_list(serialised, pos): + n, pos = _deserialise_prefix(serialised, pos) + int_size = struct.calcsize('!Q') + lengths = struct.unpack('!' + 'Q' * n, serialised[pos:pos + n*int_size]) + pos += n * int_size + strings = [] + for i in range(n): + strings.append(serialised[pos:pos+lengths[i]]) + pos += lengths[i] + return strings, pos + + def _deserialise_prefix(serialised, pos): length = _extract_length(serialised, pos) end = _next_object(pos, length) |