summaryrefslogtreecommitdiff
path: root/obnam/backend.py
diff options
context:
space:
mode:
Diffstat (limited to 'obnam/backend.py')
-rw-r--r--obnam/backend.py388
1 files changed, 0 insertions, 388 deletions
diff --git a/obnam/backend.py b/obnam/backend.py
deleted file mode 100644
index 73c812ad..00000000
--- a/obnam/backend.py
+++ /dev/null
@@ -1,388 +0,0 @@
-# Copyright (C) 2006, 2007 Lars Wirzenius <liw@iki.fi>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-
-"""Backup program backend for communicating with the backup server."""
-
-
-import logging
-import os
-import pwd
-import stat
-import urlparse
-
-import paramiko
-
-import uuid
-import obnam.cache
-import obnam.cmp
-import obnam.map
-import obnam.obj
-
-
-# Block filenames are created using the following scheme:
-#
-# For each backup run, we create one directory, named by a UUID. Inside
-# this directory we create sub-directories, named by sequential integers,
-# up to a certain number of levels. The actual block files are created at
-# the lowest level, and we create the next lowest level directory when
-# we've reached some maximum of files in the directory.
-#
-# The rationale is that having too many files in one directory makes all
-# operations involving that directory slow, in many filesystems, because
-# of linear searches. By putting, say, only 256 files per directory, we
-# can keep things reasonably fast. However, if we create a a lot of blocks,
-# we'll end up creating a lot of directories, too. Thus, several levels of
-# directories are needed.
-#
-# With 256 files per directory, and three levels of directories, and one
-# megabyte per block file, we can create 16 terabytes of backup data without
-# exceeding contraints. After that, we get more than 256 entries per
-# directory, making things slow, but it'll still work.
-
-MAX_BLOCKS_PER_DIR = 256
-LEVELS = 3
-
-
-def parse_store_url(url):
- """Parse a store url
-
- The url must either be a plain pathname, or it starts with sftp://
- and specifies a remote store. Return a tuple username, host, port,
- path, where elements can be None if they are meant to be the default
- or are not relevant.
-
- Note that we follow the bzr (and lftp?) syntax: sftp://foo/bar is an
- absolute path, /foo, and sftp://foo/~/bar is "bar" relative to the
- user's home directory.
-
- """
-
- user = host = port = path = None
- (scheme, netloc, path, query, fragment) = urlparse.urlsplit(url)
-
- if scheme == "sftp":
- if "@" in netloc:
- (user, netloc) = netloc.split("@", 1)
- if ":" in netloc:
- (host, port) = netloc.split(":", 1)
- port = int(port)
- else:
- host = netloc
- if path.startswith("/~/"):
- path = path[3:]
- else:
- path = url
-
- return user, host, port, path
-
-
-class DummyProgressReporter:
-
- def nop(self, *args):
- pass
-
- update_current_action = nop
- update_uploaded = nop
- update_downloaded = nop
-
-
-class Backend:
-
- def __init__(self, config, cache):
- self.config = config
- self.url = config.get("backup", "store")
-
- self.user, self.host, self.port, self.path = parse_store_url(self.url)
- if self.user is None:
- self.user = get_default_user()
- if self.port is None:
- self.port = 22 # 22 is the default port for ssh
-
- self.blockdir = None
- self.dircounts = [0] * LEVELS
- self.sftp_transport = None
- self.sftp_client = None
- self.bytes_read = 0
- self.bytes_written = 0
- self.set_progress_reporter(DummyProgressReporter())
- self.cache = cache
- self.blockdir = str(uuid.uuid4())
-
- def set_progress_reporter(self, progress):
- """Set progress reporter to be used"""
- self.progress = progress
-
- def get_bytes_read(self):
- """Return number of bytes read from the store during this run"""
- return self.bytes_read
-
- def get_bytes_written(self):
- """Return number of bytes written to the store during this run"""
- return self.bytes_written
-
- def increment_dircounts(self):
- """Increment the counter for lowest dir level, and more if need be"""
- level = len(self.dircounts) - 1
- while level >= 0:
- self.dircounts[level] += 1
- if self.dircounts[level] <= MAX_BLOCKS_PER_DIR:
- break
- self.dircounts[level] = 0
- level -= 1
-
- def generate_block_id(self):
- """Generate a new identifier for the block, when stored remotely"""
- self.increment_dircounts()
- id = self.blockdir
- for i in self.dircounts:
- id = os.path.join(id, "%d" % i)
- return id
-
- def block_remote_pathname(self, block_id):
- """Return pathname on server for a given block id"""
- return os.path.join(self.path, block_id)
-
- def use_gpg(self):
- """Should we use gpg to encrypt/decrypt blocks?"""
- no_gpg = self.config.getboolean("backup", "no-gpg")
- if no_gpg:
- return False
- encrypt_to = self.config.get("backup", "gpg-encrypt-to").strip()
- return encrypt_to
-
- def upload_block(self, block_id, block, to_cache):
- """Upload block to server, and possibly to cache as well."""
- logging.debug("Uploading block %s" % block_id)
- if self.use_gpg():
- logging.debug("Encrypting block %s before upload" % block_id)
- block = obnam.gpg.encrypt(self.config, block)
- logging.debug("Uploading block %s (%d bytes)" % (block_id, len(block)))
- self.progress.update_current_action("Uploading block")
- self.really_upload_block(block_id, block)
- if to_cache and self.config.get("backup", "cache"):
- logging.debug("Putting uploaded block to cache, as well")
- self.cache.put_block(block_id, block)
-
- def download_block(self, block_id):
- """Download a block from the remote server
-
- Return the unparsed block (a string), or raise an exception for errors.
-
- """
-
- logging.debug("Downloading block %s" % block_id)
- self.progress.update_current_action("Downloading block")
- block = self.really_download_block(block_id)
-
- if self.use_gpg():
- logging.debug("Decrypting downloaded block %s before using it" %
- block_id)
- block = obnam.gpg.decrypt(self.config, block)
-
- return block
-
- def remove(self, block_id):
- """Remove a block from the remote server"""
- pathname = self.block_remote_pathname(block_id)
- try:
- self.remove_pathname(pathname)
- except IOError:
- # We ignore any errors in removing a file.
- pass
-
-
-class SftpBackend(Backend):
-
- io_size = 64 * 1024
-
- def load_key(self, filename):
- """Load an SSH private key from a file."""
- try:
- return paramiko.DSSKey.from_private_key_file(filename)
- except paramiko.SSHException:
- return paramiko.RSAKey.from_private_key_file(filename)
-
- def connect_sftp(self):
- """Connect to the server, unless already connected"""
- if self.sftp_transport is None:
- ssh_key_file = self.config.get("backup", "ssh-key")
- logging.debug("Getting private key from %s" % ssh_key_file)
- pkey = self.load_key(ssh_key_file)
-
- logging.debug("Connecting to sftp server: host=%s, port=%d" %
- (self.host, self.port))
- self.sftp_transport = paramiko.Transport((self.host, self.port))
-
- logging.debug("Authenticating as user %s" % self.user)
- self.sftp_transport.connect(username=self.user, pkey=pkey)
-
- logging.debug("Opening sftp client")
- self.sftp_client = self.sftp_transport.open_sftp_client()
-
- def close(self):
- """Close the connection, if any."""
- if self.sftp_transport:
- self.sftp_transport.close()
-
- def sftp_makedirs(self, dirname, mode=0777):
- """Create dirname, if it doesn't exist, and all its parents, too"""
- stack = []
- while dirname:
- stack.append(dirname)
- dirname2 = os.path.dirname(dirname)
- if dirname2 == dirname:
- dirname = None
- else:
- dirname = dirname2
-
- while stack:
- dirname, stack = stack[-1], stack[:-1]
- try:
- self.sftp_client.lstat(dirname).st_mode
- except IOError:
- exists = False
- else:
- exists = True
- if not exists:
- logging.debug("Creating remote directory %s" % dirname)
- self.sftp_client.mkdir(dirname, mode=mode)
-
- def really_upload_block(self, block_id, block):
- self.connect_sftp()
- pathname = self.block_remote_pathname(block_id)
- self.sftp_makedirs(os.path.dirname(pathname))
- f = self.sftp_client.file(pathname, "w")
- self.sftp_client.chmod(pathname, 0600)
- for offset in range(0, len(block), self.io_size):
- block_part = block[offset:offset+self.io_size]
- f.write(block_part)
- self.bytes_written += len(block_part)
- self.progress.update_uploaded(self.bytes_written)
- f.close()
-
- def really_download_block(self, block_id):
- try:
- self.connect_sftp()
- f = self.sftp_client.file(self.block_remote_pathname(block_id),
- "r")
- block_parts = []
- while True:
- block_part = f.read(self.io_size)
- if not block_part:
- break
- block_parts.append(block_part)
- self.bytes_read += len(block_part)
- self.progress.update_downloaded(self.bytes_read)
- block = "".join(block_parts)
- f.close()
- if self.config.get("backup", "cache"):
- self.cache.put_block(block_id, block)
- except IOError, e:
- logging.warning("I/O error: %s" % str(e))
- raise e
- return block
-
- def sftp_listdir_abs(self, dirname):
- """Like SFTPClient's listdir_attr, but absolute pathnames"""
- items = self.sftp_client.listdir_attr(dirname)
- for item in items:
- item.filename = os.path.join(dirname, item.filename)
- return items
-
- def sftp_recursive_listdir(self, dirname="."):
- """Similar to SFTPClient's listdir_attr, but recursively"""
- list = []
- logging.debug("sftp: listing files in %s" % dirname)
- unprocessed = self.sftp_listdir_abs(dirname)
- while unprocessed:
- item, unprocessed = unprocessed[0], unprocessed[1:]
- if stat.S_ISDIR(item.st_mode):
- logging.debug("sftp: listing files in %s" % item.filename)
- unprocessed += self.sftp_listdir_abs(item.filename)
- elif stat.S_ISREG(item.st_mode):
- list.append(item.filename)
- return list
-
- def list(self):
- """Return list of all files on the remote server"""
- return self.sftp_recursive_listdir(self.path)
-
- def remove_pathname(self, pathname):
- self.sftp_client.remove(pathname)
-
-
-class FileBackend(Backend):
-
- def close(self):
- pass
-
- def really_upload_block(self, block_id, block):
- dir_full = os.path.join(self.path, os.path.dirname(block_id))
- if not os.path.isdir(dir_full):
- os.makedirs(dir_full, 0700)
- fd = os.open(self.block_remote_pathname(block_id),
- os.O_WRONLY | os.O_TRUNC | os.O_CREAT,
- 0600)
- f = os.fdopen(fd, "w")
- f.write(block)
- self.bytes_written += len(block)
- self.progress.update_uploaded(self.bytes_written)
- f.close()
-
- def really_download_block(self, block_id):
- try:
- f = file(self.block_remote_pathname(block_id), "r")
- block = f.read()
- self.bytes_read += len(block)
- self.progress.update_uploaded(self.bytes_read)
- f.close()
- except IOError, e:
- raise e
- return block
-
- def list(self):
- """Return list of all files on the remote server"""
- list = []
- for dirpath, _, filenames in os.walk(self.path):
- if dirpath.startswith(self.path):
- dirpath = dirpath[len(self.path):]
- if dirpath.startswith(os.sep):
- dirpath = dirpath[len(os.sep):]
- list += [os.path.join(dirpath, x) for x in filenames]
- return list
-
- def remove_pathname(self, pathname):
- """Remove a block from the remote server"""
- if os.path.exists(pathname):
- os.remove(pathname)
-
-
-def get_default_user():
- """Return the username of the current user"""
- if "LOGNAME" in os.environ:
- return os.environ["LOGNAME"]
- else:
- return pwd.getpwuid(os.getuid())[0]
-
-
-def init(config, cache):
- """Initialize the subsystem and return an opaque backend object"""
- _, host, _, _ = parse_store_url(config.get("backup", "store"))
- if host is None:
- return FileBackend(config, cache)
- else:
- return SftpBackend(config, cache)