diff options
Diffstat (limited to 'genbackupdatalib')
-rw-r--r-- | genbackupdatalib/__init__.py | 20 | ||||
-rw-r--r-- | genbackupdatalib/generator.py | 62 | ||||
-rw-r--r-- | genbackupdatalib/generator_tests.py | 42 | ||||
-rw-r--r-- | genbackupdatalib/names.py | 75 | ||||
-rw-r--r-- | genbackupdatalib/names_tests.py | 84 |
5 files changed, 283 insertions, 0 deletions
diff --git a/genbackupdatalib/__init__.py b/genbackupdatalib/__init__.py new file mode 100644 index 0000000..b7771db --- /dev/null +++ b/genbackupdatalib/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2010 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +version = '1.3' + +from generator import DataGenerator +from names import NameGenerator diff --git a/genbackupdatalib/generator.py b/genbackupdatalib/generator.py new file mode 100644 index 0000000..8cf349c --- /dev/null +++ b/genbackupdatalib/generator.py @@ -0,0 +1,62 @@ +# Copyright 2010 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import random +import struct + + +class DataGenerator(object): + + '''Generate random binary data.''' + + # We generate data by using a blob of suitable size. The output + # sequence repeats the blob, where each repetition is preceded by + # a 64-bit counter. + # + # We need to be relatively prime with obnam's chunk size, which + # defaults to 64 KiB (65536 bytes). This is so that obnam does not + # notice a lot of duplicated data, resulting in unrealistically + # high amounts of compression in the backup store. + # + # Ideally, we would not generate any repeating data, but the random + # number generator is not fast enough for that. We need to generate + # data about as fast as the disk can write it, and the random number + # generator is orders of magnitude slower than that. + + _blob_size = 65521 + _blob_size = 1021 + + def __init__(self, seed): + self._random = random.Random(seed) + self._blob = self._generate_blob() + self._counter = 0 + self._buffer = '' + + def _generate_blob(self): + return ''.join(chr(self._random.randint(0, 255)) + for i in range(self._blob_size)) + + def generate(self, size): + while size > len(self._buffer): + self._buffer += self._generate_more_data() + data = self._buffer[:size] + self._buffer = self._buffer[size:] + return data + + def _generate_more_data(self): + self._counter += 1 + return struct.pack('!Q', self._counter) + self._blob + diff --git a/genbackupdatalib/generator_tests.py b/genbackupdatalib/generator_tests.py new file mode 100644 index 0000000..80d12b4 --- /dev/null +++ b/genbackupdatalib/generator_tests.py @@ -0,0 +1,42 @@ +# Copyright 2010 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import unittest + +import genbackupdatalib + + +class DataGeneratorTests(unittest.TestCase): + + def setUp(self): + self.g1 = genbackupdatalib.DataGenerator(0) + self.g2 = genbackupdatalib.DataGenerator(0) + + def test_every_generator_returns_same_sequence(self): + amount = 1024 + self.assertEqual(self.g1.generate(amount), self.g2.generate(amount)) + + def test_returns_different_sequence_for_different_seed(self): + amount = 1024 + g3 = genbackupdatalib.DataGenerator(1) + self.assertNotEqual(self.g1.generate(amount), g3.generate(amount)) + + def test_returns_distinct_64k_chunks(self): + size = 64 * 1024 + chunk1 = self.g1.generate(size) + num_chunks = 100 + for i in range(num_chunks): + self.assertNotEqual(self.g1.generate(size), chunk1) diff --git a/genbackupdatalib/names.py b/genbackupdatalib/names.py new file mode 100644 index 0000000..287112d --- /dev/null +++ b/genbackupdatalib/names.py @@ -0,0 +1,75 @@ +# Copyright 2011 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import os + + +class NameGenerator(object): + + '''Generate names for new output files. + + If the target directory is empty, the sequence of output files is + always the same for the same parameters. + + A directory structure is also generated. The shape of the tree is + defined by two parameters: 'max' and 'depth'. 'depth' is the number + of levels of subdirectories to create, and 'max' is the maximum + number of files/dirs to allow per output directory. Thus, if max is + 3 and depth is 2, the output files are: 0/0/0, 0/0/1, 0/0/2, + 0/1/0, 0/1/1, etc. + + If depth is zero, all output files go directly to the target + directory, and max is ignored. + + ''' + + def __init__(self, dirname, depth, max): + self.dirname = dirname + self.depth = depth + self.max = max + self.counter = 0 + + def _path_tuple(self, n): + '''Return tuple for dir/file numbers for nth output file. + + The last item in the tuple gives the file number, the precding + items the directory numbers. Thus, a tuple (1, 2, 3) would + mean path '1/2/3', but it is given as a tuple for easier + manipulation. + + ''' + + if self.depth == 0: + return (n,) + else: + items = [] + for i in range(self.depth): + items.append(n % self.max) + n /= self.max + items.append(n) + items.reverse() + return tuple(items) + + def _next_candidate_name(self): + items = self._path_tuple(self.counter) + self.counter += 1 + return os.path.join(self.dirname, *[str(i) for i in items]) + + def new(self): + while True: + name = self._next_candidate_name() + if not os.path.exists(name): + return name diff --git a/genbackupdatalib/names_tests.py b/genbackupdatalib/names_tests.py new file mode 100644 index 0000000..60b4d79 --- /dev/null +++ b/genbackupdatalib/names_tests.py @@ -0,0 +1,84 @@ +# Copyright 2011 Lars Wirzenius +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +import os +import shutil +import tempfile +import unittest + +import genbackupdatalib + + +class NameGeneratorTests(unittest.TestCase): + + def setUp(self): + self.tempdir = tempfile.mkdtemp() + self.depth = 2 + self.max = 3 + self.names = self.new() + + def tearDown(self): + shutil.rmtree(self.tempdir) + + def new(self): + return genbackupdatalib.NameGenerator(self.tempdir, self.depth, + self.max) + + def test_generates_name_that_is_inside_target_directory(self): + name = self.names.new() + self.assert_(name.startswith(self.tempdir + os.sep)) + + def test_generates_different_names_every_time(self): + names = set(self.names.new() for i in range(10)) + self.assertEqual(len(names), 10) + + def test_generates_names_that_do_not_exist(self): + for i in range(10): + name = self.names.new() + self.assertFalse(os.path.exists(name)) + + def test_generates_the_same_sequence_with_every_instance(self): + n = 10 + first = [self.names.new() for i in range(n)] + names2 = self.new() + second = [names2.new() for i in range(n)] + self.assertEqual(first, second) + + def test_does_not_generate_names_of_existing_files(self): + name = self.names.new() + os.makedirs(os.path.dirname(name)) + file(name, 'w').close() + names2 = self.new() + name2 = names2.new() + self.assertNotEqual(name, name2) + self.assertFalse(os.path.exists(name2)) + + def test_converts_file_sequence_number_into_right_path_tuple(self): + self.assertEqual(self.names._path_tuple(0), (0, 0, 0)) + self.assertEqual(self.names._path_tuple(1), (0, 0, 1)) + self.assertEqual(self.names._path_tuple(2), (0, 0, 2)) + self.assertEqual(self.names._path_tuple(3), (0, 1, 0)) + self.assertEqual(self.names._path_tuple(4), (0, 1, 1)) + self.assertEqual(self.names._path_tuple(5), (0, 1, 2)) + self.assertEqual(self.names._path_tuple(6), (0, 2, 0)) + self.assertEqual(self.names._path_tuple(9), (1, 0, 0)) + self.assertEqual(self.names._path_tuple(18), (2, 0, 0)) + self.assertEqual(self.names._path_tuple(27), (3, 0, 0)) + + def test_returns_1tuple_for_depth_zero(self): + names = genbackupdatalib.NameGenerator(self.tempdir, 0, 1) + self.assertEqual(names._path_tuple(42), (42,)) + |