#!/usr/bin/python '''Make music available as MP3 files. Given a directory tree of music, convert files in FLAC format to # MP3. The MP3 files will be put in a parallel tree. Existing MP3 # files will be copied or hard-linked over as well. The use case for this is that we collect all our master music files in a central location, /xxxx/content/music. The music is in either FLAC format (when ripped from CDs, sometimes when bought as downloads), or as MP3 format. Many portable devices can only handle MP3, or at least there's no point in storing huge FLAC files on them. Thus converting everything to MP3 seems like a reasonable thing to do. This script does that. Usage: musictomp3 /path/to/master /path/to/output A FLAC file (*.flac) will be converted to MP3. An existing MP3 file will be hard-linked (if possible) or copied to the output directory. The relative paths beneath the master directory will be kept in the output directory. If an output file already exists, conversion or copying of the input file will be skipped. Files in the output directory that have no corresponding input file are deleted. It is thus possible to run this script from a cron job to make sure the output directory is synced with the master directory. ''' import logging import multiprocessing import optparse import os import Queue import shutil import subprocess import sys import tempfile def mktemp(filename): fd, tempname = tempfile.mkstemp(dir=os.path.dirname(filename) or '.') os.close(fd) return tempname def mp3_filename(filename): prefix, ext = os.path.splitext(filename) return prefix + '.mp3' def make_dirs_for_file(filename): dirname = os.path.dirname(filename) try: os.makedirs(dirname) except: pass def convert_to_mp3(input_root, output_root, filename): input_path = os.path.join(input_root, filename) outputname = mp3_filename(filename) output_path = os.path.join(output_root, outputname) logging.info('Converting: %s -> %s' % (input_path, output_path)) make_dirs_for_file(output_path) tempname = mktemp(output_path) argv = ['gst-launch', '-q', 'filesrc', 'location=%s' % input_path, '!', 'decodebin', '!', 'lame', '!', 'filesink', 'location=%s' % tempname] logging.debug('argv: %s' % argv) p = subprocess.Popen(argv, stdout=subprocess.PIPE) p.communicate() if p.returncode != 0: raise Exception('gst-launch failed') os.rename(tempname, output_path) def hardlink_or_copy(input_root, output_root, filename): input_path = os.path.join(input_root, filename) output_path = os.path.join(output_root, filename) logging.info('Hardlink or copy: %s -> %s' % (input_path, output_path)) make_dirs_for_file(output_path) try: os.link(input_path, output_path) except OSError, e: tempname = mktemp(output_path) shutil.copy(input_path, tempname) os.rename(tempname, output_path) def remove(root, filename): pathname = os.path.join(root, filename) logging.info('Removing %s' % pathname) os.remove(pathname) def do_job(job_queue, result_queue): for func, args in iter(job_queue.get, None): result_queue.put(func(*args)) class ProgressReporter(object): def __init__(self): self.max_width = 79 self.written = '' if sys.stdout.isatty(): self.output = self.real_output else: self.output = self.dummy_output def dummy_output(self, text): pass def real_output(self, text): sys.stdout.write('\b \b' * len(self.written)) text = text[:self.max_width] sys.stdout.write(text) sys.stdout.flush() self.written = text def update(self, done, total, msg): if total > 0: percent = 100.0 * float(done) / float(total) else: percent = 0 self.output('%.0f%% done %s' % (percent, msg)) def notify(self, msg): self.output('') sys.stdout.write('%s\n' % msg) def finish(self): self.output('') class MusicToMp3(object): def create_option_parser(self): parser = optparse.OptionParser() parser.add_option('--no-act', action='store_true', help='do not actually convert or modify anything') return parser def parse_command_line(self): p = self.create_option_parser() opts, args = p.parse_args() if len(args) != 2: raise Exception('Must give exactly two directories as arguments.') if not os.path.isdir(args[0]): raise Exception('Input directory %s is not a directory.' % args[0]) if not os.path.isdir(args[1]): raise Exception('Output directory %s is not a directory.' % args[1]) return opts, args def find_files(self, root): pathnames = set() for dirname, subdirs, filenames in os.walk(root): for pathname in [os.path.join(dirname, x) for x in filenames]: pathname = pathname[len(root + os.sep):] pathnames.add(pathname) return pathnames def needs_converting(self, filename): return filename.endswith('.flac') def needs_copying(self, filename): dummy, ext = os.path.splitext(filename) return ext.lower() in ['.mp3', '.m4a'] def run(self): logging.basicConfig(filename='musictomp3.log', level=logging.DEBUG) opts, args = self.parse_command_line() self.progress = ProgressReporter() self.progress.update(0, 0, 'Finding files') inputs = self.find_files(args[0]) outputs = self.find_files(args[1]) converted = 0 copied = 0 ignored = 0 removed = 0 jobs = [] for filename in inputs: basename = os.path.basename(filename) if self.needs_converting(filename): output = mp3_filename(filename) job = (convert_to_mp3, (args[0], args[1], filename)) elif self.needs_copying(filename): output = filename job = (hardlink_or_copy, (args[0], args[1], filename)) else: output = None ignored += 1 if output is not None: if not os.path.exists(os.path.join(args[1], output)): jobs.append(job) if output in outputs: outputs.remove(output) for filename in outputs: jobs.append((remove, (args[1], filename))) job_queue = multiprocessing.Queue() result_queue = multiprocessing.Queue() for job in jobs: job_queue.put(job) for i in range(multiprocessing.cpu_count()): p = multiprocessing.Process(target=do_job, args=(job_queue, result_queue)) p.start() total = len(jobs) for done in range(total): result = result_queue.get() self.progress.update(done + 1, total, 'Processing') for i in range(multiprocessing.cpu_count()): job_queue.put(None) self.progress.finish() if __name__ == '__main__': MusicToMp3().run()