summaryrefslogtreecommitdiff
path: root/obnamlib/metadata.py
blob: ba76cf6b39ab59caaf55c486cecd0f947e79af8e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# Copyright (C) 2009-2016  Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import errno
import grp
import logging
import os
import pwd
import stat
import struct
import sys
import tracing

import obnamlib


metadata_verify_fields = (
    'st_mode', 'st_mtime_sec', 'st_mtime_nsec',
    'st_nlink', 'st_size', 'st_uid', 'groupname', 'username', 'target',
    'xattr',
)
metadata_fields = metadata_verify_fields + (
    'st_blocks', 'st_dev', 'st_gid', 'st_ino', 'st_atime_sec',
    'st_atime_nsec', 'md5', 'sha224', 'sha256', 'sha384', 'sha512', 'test',
)


class Metadata(object):

    '''Represent metadata for a filesystem entry.

    The metadata for a filesystem entry (file, directory, device, ...)
    consists of its stat(2) result, plus ACL and xattr.

    This class represents them as fields.

    We do not store all stat(2) fields. Here's a commentary on all fields:

        field?          stored? why

        st_atime_sec    yes     mutt compares atime, mtime to see ifmsg is new
        st_atime_nsec   yes     mutt compares atime, mtime to see ifmsg is new
        st_blksize      no      no way to restore, not useful backed up
        st_blocks       yes     should restore create holes in file?
        st_ctime        no      no way to restore, not useful backed up
        st_dev          yes     used to restore hardlinks
        st_gid          yes     used to restore group ownership
        st_ino          yes     used to restore hardlinks
        st_mode         yes     used to restore permissions
        st_mtime_sec    yes     used to restore mtime
        st_mtime_nsec   yes     used to restore mtime
        st_nlink        yes     used to restore hardlinks
        st_rdev         no      no use (correct me if I'm wrong about this)
        st_size         yes     user needs it to see size of file in backup
        st_uid          yes     used to restored ownership

    The field 'target' stores the target of a symlink.

    Additionally, the fields 'groupname' and 'username' are stored. They
    contain the textual names that correspond to st_gid and st_uid. When
    restoring, the names will be preferred by default.

    The 'md5' field optionally stores the whole-file checksum for the file.

    The 'xattr' field optionally stores extended attributes encoded as
    a binary blob.

    '''

    def __init__(self, **kwargs):
        self.md5 = None  # Silence pylint.
        self.st_size = None  # Silence pylint.
        self.st_mode = None  # Silence pylint.
        self.st_uid = None  # Silence pylint.
        self.st_gid = None  # Silence pylint.
        for field in metadata_fields:
            setattr(self, field, None)
        for field, value in kwargs.iteritems():
            setattr(self, field, value)

    def isdir(self):
        return self.st_mode is not None and stat.S_ISDIR(self.st_mode)

    def islink(self):
        return self.st_mode is not None and stat.S_ISLNK(self.st_mode)

    def isfile(self):
        return self.st_mode is not None and stat.S_ISREG(self.st_mode)

    def __repr__(self):  # pragma: no cover
        fields = ', '.join('%s=%s' % (k, getattr(self, k))
                           for k in metadata_fields)
        return 'Metadata(%s)' % fields

    def __cmp__(self, other):
        for field in metadata_fields:
            ours = getattr(self, field)
            theirs = getattr(other, field)
            if ours == theirs:
                continue
            if ours < theirs:
                return -1
            if ours > theirs:
                return +1
        return 0


# Caching versions of username/groupname lookups.
# These work on the assumption that the mappings from uid/gid do not
# change during the runtime of the backup.

_uid_to_username = {}


def _cached_getpwuid(uid):  # pragma: no cover
    if uid not in _uid_to_username:
        _uid_to_username[uid] = pwd.getpwuid(uid)
    return _uid_to_username[uid]


_gid_to_groupname = {}


def _cached_getgrgid(gid):  # pragma: no cover
    if gid not in _gid_to_groupname:
        _gid_to_groupname[gid] = grp.getgrgid(gid)
    return _gid_to_groupname[gid]


def get_xattrs_as_blob(fs, filename):  # pragma: no cover
    tracing.trace('filename=%s' % filename)

    try:
        names = fs.llistxattr(filename)
    except (OSError, IOError), e:
        if e.errno in (errno.EOPNOTSUPP, errno.EACCES):
            return None
        raise
    tracing.trace('names=%s' % repr(names))
    if not names:
        return None

    values = []
    for name in names[:]:
        tracing.trace('trying name %s' % repr(name))
        try:
            value = fs.lgetxattr(filename, name)
        except OSError, e:
            # On btrfs, at least, this can happen: the filesystem returns
            # a list of attribute names, but then fails when looking up
            # the value for one or more of the names. We pretend that the
            # name was never returned in that case.
            #
            # Obviously this can happen due to race conditions as well.
            if e.errno == errno.ENODATA:
                names.remove(name)
                logging.warning(
                    '%s has extended attribute named %s without value, '
                    'ignoring attribute',
                    filename, name)
            else:
                raise
        else:
            tracing.trace('lgetxattr(%s)=%s' % (name, value))
            values.append(value)
    assert len(names) == len(values)

    name_blob = ''.join('%s\0' % name for name in names)

    lengths = [len(v) for v in values]
    fmt = '!' + 'Q' * len(values)
    value_blob = struct.pack(fmt, *lengths) + ''.join(values)

    return ('%s%s%s' %
            (struct.pack('!Q', len(name_blob)),
             name_blob,
             value_blob))


def set_xattrs_from_blob(fs, filename, blob, user_only):  # pragma: no cover
    sizesize = struct.calcsize('!Q')
    name_blob_size = struct.unpack('!Q', blob[:sizesize])[0]
    name_blob = blob[sizesize:sizesize + name_blob_size]
    value_blob = blob[sizesize + name_blob_size:]

    names = [s for s in name_blob.split('\0')[:-1]]
    fmt = '!' + 'Q' * len(names)
    lengths_size = sizesize * len(names)
    lengths = struct.unpack(fmt, value_blob[:lengths_size])

    pos = lengths_size
    for i, name in enumerate(names):
        value = value_blob[pos:pos + lengths[i]]
        pos += lengths[i]
        if not user_only or name.startswith('user.'):
            fs.lsetxattr(filename, name, value)
        else:
            logging.warning(
                '%s: Not setting extended attribute %s due to not being root',
                filename, name)


def read_metadata(fs, filename, st=None, getpwuid=None, getgrgid=None):
    '''Return object detailing metadata for a filesystem entry.'''
    metadata = Metadata()
    stat_result = st or fs.lstat(filename)
    for field in metadata_fields:
        if field.startswith('st_') and hasattr(stat_result, field):
            setattr(metadata, field, getattr(stat_result, field))

    if stat.S_ISLNK(stat_result.st_mode):
        metadata.target = fs.readlink(filename)
    else:
        metadata.target = ''

    getgrgid = getgrgid or _cached_getgrgid
    try:
        metadata.groupname = getgrgid(metadata.st_gid)[0]
    except KeyError:
        metadata.groupname = None

    getpwuid = getpwuid or _cached_getpwuid
    try:
        metadata.username = getpwuid(metadata.st_uid)[0]
    except KeyError:
        metadata.username = None

    metadata.xattr = get_xattrs_as_blob(fs, filename)

    return metadata


class SetMetadataError(obnamlib.ObnamError):

    msg = "{filename}: Couldn't set metadata {metadata}: {errno}: {strerror}"


def _set_something(filename, what, func):  # pragma: no cover
    try:
        func()
    except OSError as e:
        logging.error(str(e), exc_info=True)
        raise SetMetadataError(
            filename=filename,
            metadata=what,
            errno=e.errno,
            strerror=e.strerror)


def set_metadata(fs, filename, metadata,
                 getuid=None, always_set_id_bits=False):
    '''Set metadata for a filesystem entry.

    We only set metadata that can sensibly be set: st_atime, st_mode,
    st_mtime. We also attempt to set ownership st_uid, st_gid), if
    running as root, otherwise only st_gid is attempted ignoring failures.
    We ignore the username, groupname fields: we assume the caller
    will change st_uid, st_gid accordingly if they want to mess with
    things. This makes the user take care of error situations and
    looking up user preferences.

    Raise SetMetadataError if setting any metadata fails.

    '''

    symlink = stat.S_ISLNK(metadata.st_mode)
    if symlink:
        _set_something(
            filename, 'symlink target',
            lambda: fs.symlink(metadata.target, filename))

    # Set owner before mode, so that a setuid bit does not get reset.
    getuid = getuid or os.getuid
    if getuid() == 0:
        _set_something(
            filename, 'uid and gid',
            lambda: fs.lchown(filename, metadata.st_uid, metadata.st_gid))
    else:
        # normal users can set the group if they are in the group, try to
        # restore the group, ignoring any errors
        try:
            uid = -1  # no change to user
            fs.lchown(filename, uid, metadata.st_gid)
        except OSError:
            sys.exc_clear()

    # If we are not the owner, and not root, do not restore setuid/setgid,
    # unless explicitly told to do so.
    mode = metadata.st_mode
    set_id_bits = always_set_id_bits or (getuid() in (0, metadata.st_uid))
    if not set_id_bits:  # pragma: no cover
        mode = mode & (~stat.S_ISUID)
        mode = mode & (~stat.S_ISGID)
    if symlink:
        _set_something(
            filename, 'symlink chmod',
            lambda: fs.chmod_symlink(filename, mode))
    else:
        _set_something(
            filename, 'chmod',
            lambda: fs.chmod_not_symlink(filename, mode))

    if metadata.xattr:  # pragma: no cover
        user_only = getuid() != 0
        _set_something(
            filename, 'xattrs',
            lambda:
            set_xattrs_from_blob(fs, filename, metadata.xattr, user_only))

    _set_something(
        filename, 'timestamps',
        lambda:
        fs.lutimes(
            filename, metadata.st_atime_sec, metadata.st_atime_nsec,
            metadata.st_mtime_sec, metadata.st_mtime_nsec))