diff options
Diffstat (limited to 'obnam/app.py')
-rw-r--r-- | obnam/app.py | 511 |
1 files changed, 0 insertions, 511 deletions
diff --git a/obnam/app.py b/obnam/app.py deleted file mode 100644 index c085a70b..00000000 --- a/obnam/app.py +++ /dev/null @@ -1,511 +0,0 @@ -# Copyright (C) 2008 Lars Wirzenius <liw@iki.fi> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - -"""Main program for Obnam.""" - - -import logging -import os -import re -import stat -import time - -import obnam - - - -# Maximum number of files per file group we create. -MAX_PER_FILEGROUP = 16 - - -class Application: - - """Main program logic for Obnam, a backup application.""" - - def __init__(self, context): - self._context = context - self._exclusion_strings = [] - self._exclusion_regexps = [] - self._filelist = None - self._prev_gen = None - self._store = obnam.Store(self._context) - self._total = 0 - - # When we traverse the file system tree while making a backup, - # we process children before the parent. This is necessary for - # functional updates of trees. For every directory, we need - # to keep track of its children. This dict is used for that. - # It is indexed by the absolute path to the directory, and - # contains a list of the subdirectories in that directory. - # When we're done with a directory (i.e., we generate its - # DirObject), we remove the directory from this dict. This - # means that we need only data for one path from the root of - # the directory tree to the current directory, not for the - # entire directory tree. - self._subdirs = {} - - def get_context(self): - """Get the context for the backup application.""" - return self._context - - def get_store(self): - """Get the Store for the backup application.""" - return self._store - - def load_host(self): - """Load the host block into memory.""" - self.get_store().fetch_host_block() - return self.get_store().get_host_block() - - def get_exclusion_regexps(self): - """Return list of regexp to exclude things from backup.""" - - config = self.get_context().config - strings = config.getvalues("backup", "exclude") - strings = [s.strip() for s in strings if s.strip()] - if self._exclusion_strings != strings: - self._exclusion_strings = strings - self._exclusion_regexps = [] - for string in strings: - logging.debug("Compiling exclusion pattern '%s'" % string) - self._exclusion_regexps.append(re.compile(string)) - - return self._exclusion_regexps - - def prune(self, dirname, dirnames, filenames): - """Remove excluded items from dirnames and filenames. - - Because this is called by obnam.walk.depth_first, the lists - are modified in place. - - """ - - self._prune_one_list(dirname, dirnames) - self._prune_one_list(dirname, filenames) - - def _prune_one_list(self, dirname, basenames): - """Prune one list of basenames based on exlusion list. - - Because this is called from self.prune, the list is modified - in place. - - """ - - dirname = obnam.io.unsolve(self._context, dirname) - - i = 0 - while i < len(basenames): - path = os.path.join(dirname, basenames[i]) - for regexp in self.get_exclusion_regexps(): - if regexp.search(path): - logging.debug("Excluding %s" % path) - logging.debug(" based on %s" % regexp.pattern) - del basenames[i] - break - else: - i += 1 - - def file_is_unchanged(self, stat1, stat2): - """Is a file unchanged from the previous generation? - - Given the stat results from the previous generation and the - current file, return True if the file is identical from the - previous generation (i.e., no new data to back up). - - """ - - fields = ("mode", "dev", "nlink", "uid", "gid", "size", "mtime") - for field in fields: - field = "st_" + field - if getattr(stat1, field) != getattr(stat2, field): - return False - return True - - def filegroup_is_unchanged(self, dirname, fg, filenames, stat=os.lstat): - """Is a filegroup unchanged from the previous generation? - - Given a filegroup and a list of files in the given directory, - return True if all files in the filegroup are unchanged from - the previous generation. - - The optional stat argument can be used by unit tests to - override the use of os.lstat. - - """ - - for old_name in fg.get_names(): - if old_name not in filenames: - return False # file has been deleted - - old_stat = fg.get_stat(old_name) - new_stat = stat(os.path.join(dirname, old_name)) - if not self.file_is_unchanged(old_stat, new_stat): - return False # file has changed - - return True # everything seems to be as before - - def dir_is_unchanged(self, old, new): - """Has a directory changed since the previous generation? - - Return True if a directory, or its files or subdirectories, - has changed since the previous generation. - - """ - - return (old.get_name() == new.get_name() and - self.file_is_unchanged(old.get_stat(), new.get_stat()) and - sorted(old.get_dirrefs()) == sorted(new.get_dirrefs()) and - sorted(old.get_filegrouprefs()) == - sorted(new.get_filegrouprefs())) - - def set_prevgen_filelist(self, filelist): - """Set the Filelist object from the previous generation. - - This is used when looking up files in previous generations. We - only look at one generation's Filelist, since they're big. Note - that Filelist objects are the _old_ way of storing file meta - data, and we will no use better ways that let us look further - back in history. - - """ - - logging.debug("Setting previous generation FILELIST.") - self._filelist = filelist - - def get_previous_generation(self): - """Get the previous generation for a backup run.""" - return self._prev_gen - - def set_previous_generation(self, gen): - """Set the previous generation for a backup run.""" - self._prev_gen = gen - - def find_file_by_name(self, filename): - """Find a backed up file given its filename. - - Return FILE component, or None if no file with the given name - could be found. - - """ - - if self._filelist: - fc = self._filelist.find(filename) - if fc != None: - return fc - - return None - - def compute_signature(self, filename): - """Compute rsync signature for a filename. - - Return the identifier. Put the signature object in the queue to - be uploaded. - - """ - - logging.debug("Computing rsync signature for %s" % filename) - sigdata = obnam.rsync.compute_signature(self._context, filename) - id = obnam.obj.object_id_new() - sig = obnam.obj.SignatureObject(id=id, sigdata=sigdata) - self.get_store().queue_object(sig) - return sig - - def find_unchanged_filegroups(self, dirname, filegroups, filenames, - stat=os.lstat): - """Return list of filegroups that are unchanged. - - The filenames and stat arguments have the same meaning as - for the filegroup_is_unchanged method. - - """ - - unchanged = [] - - for filegroup in filegroups: - if self.filegroup_is_unchanged(dirname, filegroup, filenames, - stat=stat): - unchanged.append(filegroup) - - logging.debug("There are %d unchanged filegroups in %s" % - (len(unchanged), dirname)) - return unchanged - - def get_file_in_previous_generation(self, pathname): - """Return non-directory file in previous generation, or None.""" - if self._filelist: - logging.debug("Have FILELIST, searching it for %s" % pathname) - file = self.find_file_by_name(pathname) - if file: - logging.debug("Found in prevgen FILELIST: %s" % pathname) - return file - else: - logging.debug("Not found in FILELIST.") - else: - logging.debug("No FILELIST for previous generation.") - gen = self.get_previous_generation() - if gen: - logging.debug("Looking up file in previous gen: %s" % pathname) - return self.get_store().lookup_file(gen, pathname) - else: - logging.debug("No previous gen in which to find %s" % pathname) - return None - - def _reuse_existing(self, old_file): - logging.debug("Re-using existing file contents: %s" % - old_file.first_string_by_kind(obnam.cmp.FILENAME)) - return (old_file.first_string_by_kind(obnam.cmp.CONTREF), - old_file.first_string_by_kind(obnam.cmp.SIGREF), - old_file.first_string_by_kind(obnam.cmp.DELTAREF)) - - def _get_old_sig(self, old_file): - old_sigref = old_file.first_string_by_kind(obnam.cmp.SIGREF) - if not old_sigref: - return None - old_sig = self.get_store().get_object(old_sigref) - if not old_sig: - return None - return old_sig.first_string_by_kind(obnam.cmp.SIGDATA) - - def _compute_delta(self, old_file, filename): - old_sig_data = self._get_old_sig(old_file) - if old_sig_data: - logging.debug("Computing delta for %s" % filename) - old_contref = old_file.first_string_by_kind(obnam.cmp.CONTREF) - old_deltaref = old_file.first_string_by_kind(obnam.cmp.DELTAREF) - deltapart_ids = obnam.rsync.compute_delta(self.get_context(), - old_sig_data, filename) - delta_id = obnam.obj.object_id_new() - delta = obnam.obj.DeltaObject(id=delta_id, - deltapart_refs=deltapart_ids, - cont_ref=old_contref, - delta_ref=old_deltaref) - self.get_store().queue_object(delta) - - sig = self.compute_signature(filename) - - return None, sig.get_id(), delta.get_id() - else: - logging.debug("Signature for previous version not found for %s" % - filename) - return self._backup_new(filename) - - def _backup_new(self, filename): - logging.debug("Storing new file %s" % filename) - contref = obnam.io.create_file_contents_object(self._context, - filename) - sig = self.compute_signature(filename) - sigref = sig.get_id() - deltaref = None - return contref, sigref, deltaref - - def add_to_filegroup(self, fg, filename): - """Add a file to a filegroup.""" - logging.debug("Backing up %s" % filename) - self._context.progress.update_current_action(filename) - st = os.lstat(filename) - if stat.S_ISREG(st.st_mode): - unsolved = obnam.io.unsolve(self.get_context(), filename) - old_file = self.get_file_in_previous_generation(unsolved) - if old_file: - old_st = old_file.first_by_kind(obnam.cmp.STAT) - old_st = obnam.cmp.parse_stat_component(old_st) - if self.file_is_unchanged(old_st, st): - contref, sigref, deltaref = self._reuse_existing(old_file) - else: - contref, sigref, deltaref = self._compute_delta(old_file, - filename) - else: - contref, sigref, deltaref = self._backup_new(filename) - else: - contref = None - sigref = None - deltaref = None - fg.add_file(os.path.basename(filename), st, contref, sigref, deltaref) - - def make_filegroups(self, filenames): - """Make list of new FILEGROUP objects. - - Return list of object identifiers to the FILEGROUP objects. - - """ - - list = [] - for filename in filenames: - if (not list or - len(list[-1].get_files()) >= MAX_PER_FILEGROUP): - id = obnam.obj.object_id_new() - list.append(obnam.obj.FileGroupObject(id=id)) - self.add_to_filegroup(list[-1], filename) - - self.get_store().queue_objects(list) - return list - - def _make_absolute(self, basename, relatives): - return [os.path.join(basename, name) for name in relatives] - - def get_dir_in_previous_generation(self, dirname): - """Return directory in previous generation, or None.""" - gen = self.get_previous_generation() - if gen: - logging.debug("Looking up in previous generation: %s" % dirname) - return self.get_store().lookup_dir(gen, dirname) - else: - logging.debug("No previous generation to search for %s" % dirname) - return None - - def select_files_to_back_up(self, dirname, filenames, stat=os.lstat): - """Select files to backup in a directory, compared to previous gen. - - Look up the directory in the previous generation, and see which - files need backing up compared to that generation. - - Return list of unchanged filegroups, plus list of filenames - that need backing up. - - """ - - unsolved = obnam.io.unsolve(self.get_context(), dirname) - logging.debug("Selecting files to backup in %s (unsolved)" % unsolved) - logging.debug("There are %d filenames currently" % len(filenames)) - - filenames = filenames[:] - old_dir = self.get_dir_in_previous_generation(unsolved) - if old_dir: - logging.debug("Found directory in previous generation") - old_groups = [self.get_store().get_object(id) - for id in old_dir.get_filegrouprefs()] - filegroups = self.find_unchanged_filegroups(dirname, old_groups, - filenames, - stat=stat) - for fg in filegroups: - for name in fg.get_names(): - filenames.remove(name) - - return filegroups, filenames - else: - logging.debug("Did not find directory in previous generation") - return [], filenames - - def backup_one_dir(self, dirname, subdirs, filenames, is_root=False): - """Back up non-recursively one directory. - - Return obnam.obj.DirObject that refers to the directory. - - subdirs is the list of subdirectories (as DirObject) for this - directory. - - """ - - logging.debug("Backing up non-recursively: %s" % dirname) - filegroups, filenames = self.select_files_to_back_up(dirname, - filenames) - logging.debug("Selected %d existing file groups, %d filenames" % - (len(filegroups), len(filenames))) - filenames = self._make_absolute(dirname, filenames) - - filegroups += self.make_filegroups(filenames) - filegrouprefs = [fg.get_id() for fg in filegroups] - - dirrefs = [subdir.get_id() for subdir in subdirs] - - basename = os.path.basename(dirname) - if not basename and dirname.endswith(os.sep): - basename = os.path.basename(dirname[:-len(os.sep)]) - assert basename - logging.debug("Creating DirObject, basename: %s" % basename) - if is_root: - name = obnam.io.unsolve(self.get_context(), dirname) - else: - name = basename - dir = obnam.obj.DirObject(id=obnam.obj.object_id_new(), - name=name, - stat=os.lstat(dirname), - dirrefs=dirrefs, - filegrouprefs=filegrouprefs) - - unsolved = obnam.io.unsolve(self.get_context(), dirname) - old_dir = self.get_dir_in_previous_generation(unsolved) - if old_dir and self.dir_is_unchanged(old_dir, dir): - logging.debug("Dir is unchanged: %s" % dirname) - return old_dir - else: - logging.debug("Dir has changed: %s" % dirname) - self.get_store().queue_object(dir) - return dir - - def backup_one_root(self, root): - """Backup one root for the next generation.""" - - logging.debug("Backing up root %s" % root) - - resolved = obnam.io.resolve(self._context, root) - logging.debug("Root resolves to %s" % resolved) - - if not os.path.isdir(resolved): - raise obnam.ObnamException("Not a directory: %s" % root) - # FIXME: This needs to be able to handle non-directories, too! - - subdirs_for_dir = {} - root_object = None - - for tuple in obnam.walk.depth_first(resolved, prune=self.prune): - dirname, dirnames, filenames = tuple - filenames.sort() - logging.debug("Walked to directory %s" % dirname) - logging.debug(" with dirnames: %s" % dirnames) - logging.debug(" and filenames: %s" % filenames) - self.get_context().progress.update_current_action(dirname) - - subdirs = subdirs_for_dir.get(dirname, []) - - is_root = (dirname == resolved) - - dir = self.backup_one_dir(dirname, subdirs, filenames, - is_root=is_root) - - if not is_root: - parent = os.path.dirname(dirname) - if parent not in subdirs_for_dir: - subdirs_for_dir[parent] = [] - subdirs_for_dir[parent].append(dir) - else: - root_object = dir - - if dirname in subdirs_for_dir: - del subdirs_for_dir[dirname] - - self._total += 1 + len(filenames) - self.get_context().progress.update_total_files(self._total) - - return root_object - - def backup(self, roots): - """Backup all the roots.""" - - start = int(time.time()) - root_objs = [] - self._total = 0 - for root in roots: - root_objs.append(self.backup_one_root(root)) - end = int(time.time()) - - dirrefs = [o.get_id() for o in root_objs] - gen = obnam.obj.GenerationObject(id=obnam.obj.object_id_new(), - dirrefs=dirrefs, start=start, - end=end) - self.get_store().queue_object(gen) - return gen |