trunk/dimbola/copier.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236

# Copyright (C) 2009  Lars Wirzenius <liw@liw.fi>
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import optparse
import os
import pwd
import re

import gnomevfs
import pyexiv2

import dimbola


# The default template for renaming files.
TEMPLATE = "%(date)s/%(username)s-%(date)s-%(counter)s%(suffix)s"


class ImageDict:

    """Hold data for templating output filenames.
    
    The output filename template is just a string suitable for Python
    string substitution, with the % operator. The data for the substition
    needs to come from a dictionary. Since we want to get data both from
    the image meta data (EXIF headers), and elsewhere, we use this class
    to collect them all.
    
    During initialization, we copy all the EXIF headers of the image into
    our own dictionary, and then add some more data.
    
    To do this, the caller must supply the constructor a pathname to the
    image, a pyexiv2.Image instance, and a counter. The counter should be
    incremented by the caller for each image; this allows the user to
    specify a template like "%(year)s-%(counter)s".
    
    We require the caller to supply the pyexiv2.Image instance so that
    we don't need to do any I/O. This means there should be no reason for
    this class to fail.
    
    """

    def __init__(self, pathname, image, counter):
        date = image["Exif.Image.DateTime"]
        
        self.dict = {
            "username": self.get_username(),
            "suffix": self.get_input_suffix(pathname),
            "cameracounter": self.get_camera_counter(pathname),
            "counter": counter,
            "date": "%04d-%02d-%02d" % (date.year, date.month, date.day),
            "year": date.year,
            "month": date.month,
            "day": date.day,
            "hour": date.hour,
            "min": date.minute,
            "sec": date.second,
        }

        optional_key_prefix = "Exif.Image."
        for key in image.exifKeys():
            self.dict[key] = image[key]
            if key.startswith(optional_key_prefix):
                key2 = key[len(optional_key_prefix):]
                self.dict[key2] = image[key]
               
    def __getitem__(self, key):
        return self.dict[key]

    def __contains__(self, key):
        return key in self.dict

    def get_input_suffix(self, pathname):
        """Return the suffix of the input filename, or empty."""
        dummy, suffix = os.path.splitext(pathname)
        return suffix
        
    def get_camera_counter(self, pathname):
        """Return the image counter in the input filename, or empty."""
        basename = os.path.basename(pathname)
        basename, ext = os.path.splitext(basename)
        m = re.search(r"\d+", basename)
        if m:
            return m.group()
        else:
            return ""
            
    def get_username(self): # pragma: no cover
        """Return username of the user."""
        return pwd.getpwuid(os.getuid()).pw_name 


class Copier:

    """Copy digital photograps from memory card into desired location.
    
    We scan the desired location recursively for files that have one
    of the desired MIME types. Each image that we find, we copy to the
    desired output location. Optionally, we delete the original.
    The output filenames may be identical to the basenames of the 
    originals, or they may be constructed based on a template that
    gets filled in with data from the input pathname, EXIF headers,
    and other places.
    
    """

    known_image_types = set([
            "image/x-canon-cr2",
            "image/x-nikon-nef",
            "image/jpeg",
        ])

    def __init__(self):
        self.counter = 0
        
    def is_image_file(self, pathname): # pragma: no cover
        """Determine whether a given file is a (supported) image file."""
        uri = gnomevfs.get_uri_from_local_path(os.path.abspath(pathname))
        mime_type = gnomevfs.get_mime_type(uri)
        return mime_type in self.known_image_types

    def find_input_files(self, root): # pragma: no cover
        """Recursively generate list of input files in a directory tree."""
        all_names = []
        for x, y, names in dimbola.filterabswalk(self.is_image_file, root):
            all_names += names
        all_names.sort()
        return all_names
                
    def read_exif(self, pathname): # pragma: no cover
        """Read the EXIF data from a given file."""
        image = pyexiv2.Image(pathname)
        image.readMetadata()
        return image
                
    def output_name(self, input_name, options):
        """Return the output name for a given input file."""
        
        if options.rename:
            image = self.read_exif(input_name)
            image_dict = ImageDict(input_name, image, self.counter)
            basename = options.template % image_dict
        else:
            basename = os.path.basename(input_name)

        return os.path.join(options.output, basename)

    def create_option_parser(self): # pragma: no cover
        """Create an OptionParser instance for this app."""
        parser = optparse.OptionParser()
        parser.add_option("-i", "--input", metavar="DIR", default=".",
                          help="Scan DIR for files to import. "
                               "(Default: %default)")
        parser.add_option("-o", "--output", metavar="DIR", default=".",
                          help="Write output to DIR. (Default: %default)")
        parser.add_option("-t", "--template", metavar="TEMPLATE",
                          default=TEMPLATE,
                          help="Use TEMPLATE when renaming files. "
                               "(Default: %default)")
        parser.add_option("-r", "--rename", action="store_true",
                          help="Rename files when copying.")
        parser.add_option("--move", action="store_true",
                          help="Move files: delete originals after they "
                               "have been copied.")
        parser.add_option("--verbose", action="store_true",
                          help="provide some progress output")
        return parser

    def parse_command_line(self): # pragma: no cover
        """Parse the command line for this app."""
        parser = self.create_option_parser()
        options, args = parser.parse_args()
        if args:
            raise Exception("No non-option command line arguments allows.")
        return options

    def copy_file(self, input_name, options): # pragma: no cover
        """Copy an input file according to options."""
        while True:
            self.counter += 1
            try:
                output_name = self.output_name(input_name, options)
            except KeyError:
                print 'ERROR: exif problem with %s' % input_name
                return
            except AttributeError:
                print 'ERROR: exif problem with %s' % input_name
                return
            except IOError:
                print 'ERROR: exif problem with %s' % input_name
                return
            if not os.path.exists(output_name):
                break
        output_dir = os.path.dirname(output_name) or "."
        if os.path.exists(options.output) and not os.path.exists(output_dir):
            os.makedirs(output_dir)
        if options.verbose:
            i = self.copied_files + 1
            n = len(self.input_files)
            print "%d/%d: %s -> %s" % (i, n, input_name, output_name)
        if options.move:
            os.rename(input_name, output_name)
        else:
            dimbola.safe_copy(input_name, output_name, None)

    def find_total_bytes(self, pathnames): # pragma: no cover
        """Find the total number of bytes in the given files."""
        return sum([os.stat(x).st_size for x in pathnames])

    def run(self): # pragma: no cover
        """Main program of the application."""
        options = self.parse_command_line()
        self.input_files = self.find_input_files(options.input)
        self.total_bytes = self.find_total_bytes(self.input_files)
        self.copied_files = 0
        self.copied_bytes = 0
        for input_name in self.input_files:
            self.this_file_bytes = 0
            self.copy_file(input_name, options)
            self.copied_files += 1
            self.copied_bytes += self.this_file_bytes