summaryrefslogtreecommitdiff
path: root/musictomp3
blob: 08e59c8f7cbc9163207bf79532938343f9d6fb5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#!/usr/bin/python


'''Make music available as MP3 files.

Given a directory tree of music, convert files in FLAC format to
# MP3. The MP3 files will be put in a parallel tree. Existing MP3
# files will be copied or hard-linked over as well.

The use case for this is that we collect all our master music files
in a central location, /xxxx/content/music. The music is in either FLAC
format (when ripped from CDs, sometimes when bought as downloads), or
as MP3 format. Many portable devices can only handle MP3, or at least 
there's no point in storing huge FLAC files on them. Thus converting
everything to MP3 seems like a reasonable thing to do.

This script does that.

Usage: musictomp3 /path/to/master /path/to/output

A FLAC file (*.flac) will be converted to MP3. An existing MP3 file
will be hard-linked (if possible) or copied to the output directory.
The relative paths beneath the master directory will be kept in the
output directory.

If an output file already exists, conversion or copying of the input
file will be skipped. Files in the output directory that have no
corresponding input file are deleted. It is thus possible to run this 
script from a cron job to make sure the output directory is synced
with the master directory.

'''


import logging
import multiprocessing
import optparse
import os
import Queue
import shutil
import subprocess
import sys
import tempfile


def mktemp(filename):
    fd, tempname = tempfile.mkstemp(dir=os.path.dirname(filename) or '.')
    os.close(fd)
    return tempname


def mp3_filename(filename):
    prefix, ext = os.path.splitext(filename)
    return prefix + '.mp3'


def make_dirs_for_file(filename):
    dirname = os.path.dirname(filename)
    try:
        os.makedirs(dirname)
    except:
        pass


def convert_to_mp3(input_root, output_root, filename):
    input_path = os.path.join(input_root, filename)
    outputname = mp3_filename(filename)
    output_path = os.path.join(output_root, outputname)
    
    logging.info('Converting: %s -> %s' % (input_path, output_path))

    make_dirs_for_file(output_path)

    tempname = mktemp(output_path)
    argv = ['gst-launch', 
            '-q', 
            'filesrc', 
            'location=%s' % input_path,
            '!', 
            'decodebin', 
            '!', 
            'lame', 
            '!',
            'filesink', 
            'location=%s' % tempname]
    logging.debug('argv: %s' % argv)        
    p = subprocess.Popen(argv, stdout=subprocess.PIPE)
    p.communicate()
    if p.returncode != 0:
        raise Exception('gst-launch failed')

    os.rename(tempname, output_path)


def hardlink_or_copy(input_root, output_root, filename):
    input_path = os.path.join(input_root, filename)
    output_path = os.path.join(output_root, filename)

    logging.info('Hardlink or copy: %s -> %s' % (input_path, output_path))
    
    make_dirs_for_file(output_path)
        
    try:
        os.link(input_path, output_path)
    except OSError, e:
        tempname = mktemp(output_path)
        shutil.copy(input_path, tempname)
        os.rename(tempname, output_path)

    
def remove(root, filename):
    pathname = os.path.join(root, filename)
    logging.info('Removing %s' % pathname)
    os.remove(pathname)


def do_job(job_queue, result_queue):
    for func, args in iter(job_queue.get, None):
        result_queue.put(func(*args))


class ProgressReporter(object):

    def __init__(self):
        self.max_width = 79
        self.written = ''
        if sys.stdout.isatty():
            self.output = self.real_output
        else:
            self.output = self.dummy_output

    def dummy_output(self, text):
        pass
    
    def real_output(self, text):
        sys.stdout.write('\b \b' * len(self.written))
        text = text[:self.max_width]
        sys.stdout.write(text)
        sys.stdout.flush()
        self.written = text

    def update(self, done, total, msg):
        if total > 0:
            percent = 100.0 * float(done) / float(total)
        else:
            percent = 0
        self.output('%.0f%% done %s' % (percent, msg))

    def notify(self, msg):
        self.output('')
        sys.stdout.write('%s\n' % msg)
        
    def finish(self):
        self.output('')


class MusicToMp3(object):

    def create_option_parser(self):
        parser = optparse.OptionParser()
        
        parser.add_option('--no-act', action='store_true',
                          help='do not actually convert or modify anything')
        
        return parser
    
    def parse_command_line(self):
        p = self.create_option_parser()
        opts, args = p.parse_args()
        if len(args) != 2:
            raise Exception('Must give exactly two directories as arguments.')

        if not os.path.isdir(args[0]):
            raise Exception('Input directory %s is not a directory.' % args[0])
        if not os.path.isdir(args[1]):
            raise Exception('Output directory %s is not a directory.' % 
                            args[1])

        return opts, args

    def find_files(self, root):
        pathnames = set()
        for dirname, subdirs, filenames in os.walk(root):
            for pathname in [os.path.join(dirname, x) for x in filenames]:
                pathname = pathname[len(root + os.sep):]
                pathnames.add(pathname)
        return pathnames

    def needs_converting(self, filename):
        return filename.endswith('.flac')

    def needs_copying(self, filename):
        dummy, ext = os.path.splitext(filename)
        return ext.lower() in ['.mp3', '.m4a']

    def run(self):
        logging.basicConfig(filename='musictomp3.log', level=logging.DEBUG)
        opts, args = self.parse_command_line()

        self.progress = ProgressReporter()
        self.progress.update(0, 0, 'Finding files')
        
        inputs = self.find_files(args[0])
        outputs = self.find_files(args[1])
        
        converted = 0
        copied = 0
        ignored = 0
        removed = 0

        jobs = []

        for filename in inputs:
            basename = os.path.basename(filename)
            if self.needs_converting(filename):
                output = mp3_filename(filename)
                job = (convert_to_mp3, (args[0], args[1], filename))
            elif self.needs_copying(filename):
                output = filename
                job = (hardlink_or_copy, (args[0], args[1], filename))
            else:
                output = None
                ignored += 1
            if output is not None:
                if not os.path.exists(os.path.join(args[1], output)):
                    jobs.append(job)
                if output in outputs:
                    outputs.remove(output)

        for filename in outputs:
            jobs.append((remove, (args[1], filename)))

        job_queue = multiprocessing.Queue()
        result_queue = multiprocessing.Queue()
        for job in jobs:
            job_queue.put(job)
        for i in range(multiprocessing.cpu_count()):
            p = multiprocessing.Process(target=do_job, 
                                        args=(job_queue, result_queue))
            p.start()
        total = len(jobs)
        for done in range(total):
            result = result_queue.get()
            self.progress.update(done + 1, total, 'Processing')

        for i in range(multiprocessing.cpu_count()):
            job_queue.put(None)

        self.progress.finish()


if __name__ == '__main__':
    MusicToMp3().run()