summaryrefslogtreecommitdiff
path: root/genbackupdatalib
diff options
context:
space:
mode:
Diffstat (limited to 'genbackupdatalib')
-rw-r--r--genbackupdatalib/__init__.py20
-rw-r--r--genbackupdatalib/generator.py62
-rw-r--r--genbackupdatalib/generator_tests.py42
-rw-r--r--genbackupdatalib/names.py75
-rw-r--r--genbackupdatalib/names_tests.py84
5 files changed, 283 insertions, 0 deletions
diff --git a/genbackupdatalib/__init__.py b/genbackupdatalib/__init__.py
new file mode 100644
index 0000000..b7771db
--- /dev/null
+++ b/genbackupdatalib/__init__.py
@@ -0,0 +1,20 @@
+# Copyright 2010 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+version = '1.3'
+
+from generator import DataGenerator
+from names import NameGenerator
diff --git a/genbackupdatalib/generator.py b/genbackupdatalib/generator.py
new file mode 100644
index 0000000..8cf349c
--- /dev/null
+++ b/genbackupdatalib/generator.py
@@ -0,0 +1,62 @@
+# Copyright 2010 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import random
+import struct
+
+
+class DataGenerator(object):
+
+ '''Generate random binary data.'''
+
+ # We generate data by using a blob of suitable size. The output
+ # sequence repeats the blob, where each repetition is preceded by
+ # a 64-bit counter.
+ #
+ # We need to be relatively prime with obnam's chunk size, which
+ # defaults to 64 KiB (65536 bytes). This is so that obnam does not
+ # notice a lot of duplicated data, resulting in unrealistically
+ # high amounts of compression in the backup store.
+ #
+ # Ideally, we would not generate any repeating data, but the random
+ # number generator is not fast enough for that. We need to generate
+ # data about as fast as the disk can write it, and the random number
+ # generator is orders of magnitude slower than that.
+
+ _blob_size = 65521
+ _blob_size = 1021
+
+ def __init__(self, seed):
+ self._random = random.Random(seed)
+ self._blob = self._generate_blob()
+ self._counter = 0
+ self._buffer = ''
+
+ def _generate_blob(self):
+ return ''.join(chr(self._random.randint(0, 255))
+ for i in range(self._blob_size))
+
+ def generate(self, size):
+ while size > len(self._buffer):
+ self._buffer += self._generate_more_data()
+ data = self._buffer[:size]
+ self._buffer = self._buffer[size:]
+ return data
+
+ def _generate_more_data(self):
+ self._counter += 1
+ return struct.pack('!Q', self._counter) + self._blob
+
diff --git a/genbackupdatalib/generator_tests.py b/genbackupdatalib/generator_tests.py
new file mode 100644
index 0000000..80d12b4
--- /dev/null
+++ b/genbackupdatalib/generator_tests.py
@@ -0,0 +1,42 @@
+# Copyright 2010 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import unittest
+
+import genbackupdatalib
+
+
+class DataGeneratorTests(unittest.TestCase):
+
+ def setUp(self):
+ self.g1 = genbackupdatalib.DataGenerator(0)
+ self.g2 = genbackupdatalib.DataGenerator(0)
+
+ def test_every_generator_returns_same_sequence(self):
+ amount = 1024
+ self.assertEqual(self.g1.generate(amount), self.g2.generate(amount))
+
+ def test_returns_different_sequence_for_different_seed(self):
+ amount = 1024
+ g3 = genbackupdatalib.DataGenerator(1)
+ self.assertNotEqual(self.g1.generate(amount), g3.generate(amount))
+
+ def test_returns_distinct_64k_chunks(self):
+ size = 64 * 1024
+ chunk1 = self.g1.generate(size)
+ num_chunks = 100
+ for i in range(num_chunks):
+ self.assertNotEqual(self.g1.generate(size), chunk1)
diff --git a/genbackupdatalib/names.py b/genbackupdatalib/names.py
new file mode 100644
index 0000000..287112d
--- /dev/null
+++ b/genbackupdatalib/names.py
@@ -0,0 +1,75 @@
+# Copyright 2011 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+
+
+class NameGenerator(object):
+
+ '''Generate names for new output files.
+
+ If the target directory is empty, the sequence of output files is
+ always the same for the same parameters.
+
+ A directory structure is also generated. The shape of the tree is
+ defined by two parameters: 'max' and 'depth'. 'depth' is the number
+ of levels of subdirectories to create, and 'max' is the maximum
+ number of files/dirs to allow per output directory. Thus, if max is
+ 3 and depth is 2, the output files are: 0/0/0, 0/0/1, 0/0/2,
+ 0/1/0, 0/1/1, etc.
+
+ If depth is zero, all output files go directly to the target
+ directory, and max is ignored.
+
+ '''
+
+ def __init__(self, dirname, depth, max):
+ self.dirname = dirname
+ self.depth = depth
+ self.max = max
+ self.counter = 0
+
+ def _path_tuple(self, n):
+ '''Return tuple for dir/file numbers for nth output file.
+
+ The last item in the tuple gives the file number, the precding
+ items the directory numbers. Thus, a tuple (1, 2, 3) would
+ mean path '1/2/3', but it is given as a tuple for easier
+ manipulation.
+
+ '''
+
+ if self.depth == 0:
+ return (n,)
+ else:
+ items = []
+ for i in range(self.depth):
+ items.append(n % self.max)
+ n /= self.max
+ items.append(n)
+ items.reverse()
+ return tuple(items)
+
+ def _next_candidate_name(self):
+ items = self._path_tuple(self.counter)
+ self.counter += 1
+ return os.path.join(self.dirname, *[str(i) for i in items])
+
+ def new(self):
+ while True:
+ name = self._next_candidate_name()
+ if not os.path.exists(name):
+ return name
diff --git a/genbackupdatalib/names_tests.py b/genbackupdatalib/names_tests.py
new file mode 100644
index 0000000..60b4d79
--- /dev/null
+++ b/genbackupdatalib/names_tests.py
@@ -0,0 +1,84 @@
+# Copyright 2011 Lars Wirzenius
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import shutil
+import tempfile
+import unittest
+
+import genbackupdatalib
+
+
+class NameGeneratorTests(unittest.TestCase):
+
+ def setUp(self):
+ self.tempdir = tempfile.mkdtemp()
+ self.depth = 2
+ self.max = 3
+ self.names = self.new()
+
+ def tearDown(self):
+ shutil.rmtree(self.tempdir)
+
+ def new(self):
+ return genbackupdatalib.NameGenerator(self.tempdir, self.depth,
+ self.max)
+
+ def test_generates_name_that_is_inside_target_directory(self):
+ name = self.names.new()
+ self.assert_(name.startswith(self.tempdir + os.sep))
+
+ def test_generates_different_names_every_time(self):
+ names = set(self.names.new() for i in range(10))
+ self.assertEqual(len(names), 10)
+
+ def test_generates_names_that_do_not_exist(self):
+ for i in range(10):
+ name = self.names.new()
+ self.assertFalse(os.path.exists(name))
+
+ def test_generates_the_same_sequence_with_every_instance(self):
+ n = 10
+ first = [self.names.new() for i in range(n)]
+ names2 = self.new()
+ second = [names2.new() for i in range(n)]
+ self.assertEqual(first, second)
+
+ def test_does_not_generate_names_of_existing_files(self):
+ name = self.names.new()
+ os.makedirs(os.path.dirname(name))
+ file(name, 'w').close()
+ names2 = self.new()
+ name2 = names2.new()
+ self.assertNotEqual(name, name2)
+ self.assertFalse(os.path.exists(name2))
+
+ def test_converts_file_sequence_number_into_right_path_tuple(self):
+ self.assertEqual(self.names._path_tuple(0), (0, 0, 0))
+ self.assertEqual(self.names._path_tuple(1), (0, 0, 1))
+ self.assertEqual(self.names._path_tuple(2), (0, 0, 2))
+ self.assertEqual(self.names._path_tuple(3), (0, 1, 0))
+ self.assertEqual(self.names._path_tuple(4), (0, 1, 1))
+ self.assertEqual(self.names._path_tuple(5), (0, 1, 2))
+ self.assertEqual(self.names._path_tuple(6), (0, 2, 0))
+ self.assertEqual(self.names._path_tuple(9), (1, 0, 0))
+ self.assertEqual(self.names._path_tuple(18), (2, 0, 0))
+ self.assertEqual(self.names._path_tuple(27), (3, 0, 0))
+
+ def test_returns_1tuple_for_depth_zero(self):
+ names = genbackupdatalib.NameGenerator(self.tempdir, 0, 1)
+ self.assertEqual(names._path_tuple(42), (42,))
+