diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-04-18 08:06:17 +1200 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-04-18 08:06:17 +1200 |
commit | 5446c802c3ca0c751b3c49488f61fa315d9700a5 (patch) | |
tree | a442542a369e6ce3b4553831f29cc0fb9b3ff848 /dupfiles | |
parent | bedcaf5add86438c1fcd715070053e1e5370fdfa (diff) | |
download | dupfiles-5446c802c3ca0c751b3c49488f61fa315d9700a5.tar.gz |
Add option to replace names with hardlinks to duplicate content.
Add Makefile to run 'make check'.
Diffstat (limited to 'dupfiles')
-rwxr-xr-x | dupfiles | 27 |
1 files changed, 24 insertions, 3 deletions
@@ -18,6 +18,7 @@ import hashlib +import optparse import os import sys @@ -56,13 +57,33 @@ class DuplicateFileFinder(object): return hashlib.md5(file(pathname, 'rb').read()).digest() +def make_hardlinks(duplicates): + canonical = duplicates.pop() + for pathname in duplicates: + os.remove(pathname) + os.link(canonical, pathname) + + +def report(duplicates): + sys.stdout.write('\n'.join(duplicates)) + sys.stdout.write('\n') + + def main(): + parser = optparse.OptionParser() + parser.add_option('--make-hardlinks', action='store_true', + help='hardlink duplicate files to each other') + + opts, args = parser.parse_args() + dupfinder = DuplicateFileFinder() - for dirname in sys.argv[1:]: + for dirname in args: dupfinder.collect(dirname) for duplicates in dupfinder.duplicates(): - print '\n'.join(duplicates) - print + if opts.make_hardlinks: + make_hardlinks(duplicates) + else: + report(duplicates) if __name__ == '__main__': |