summaryrefslogtreecommitdiff
path: root/dupfiles
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-18 08:06:17 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-18 08:06:17 +1200
commit5446c802c3ca0c751b3c49488f61fa315d9700a5 (patch)
treea442542a369e6ce3b4553831f29cc0fb9b3ff848 /dupfiles
parentbedcaf5add86438c1fcd715070053e1e5370fdfa (diff)
downloaddupfiles-5446c802c3ca0c751b3c49488f61fa315d9700a5.tar.gz
Add option to replace names with hardlinks to duplicate content.
Add Makefile to run 'make check'.
Diffstat (limited to 'dupfiles')
-rwxr-xr-xdupfiles27
1 files changed, 24 insertions, 3 deletions
diff --git a/dupfiles b/dupfiles
index 6c0d2b8..950c170 100755
--- a/dupfiles
+++ b/dupfiles
@@ -18,6 +18,7 @@
import hashlib
+import optparse
import os
import sys
@@ -56,13 +57,33 @@ class DuplicateFileFinder(object):
return hashlib.md5(file(pathname, 'rb').read()).digest()
+def make_hardlinks(duplicates):
+ canonical = duplicates.pop()
+ for pathname in duplicates:
+ os.remove(pathname)
+ os.link(canonical, pathname)
+
+
+def report(duplicates):
+ sys.stdout.write('\n'.join(duplicates))
+ sys.stdout.write('\n')
+
+
def main():
+ parser = optparse.OptionParser()
+ parser.add_option('--make-hardlinks', action='store_true',
+ help='hardlink duplicate files to each other')
+
+ opts, args = parser.parse_args()
+
dupfinder = DuplicateFileFinder()
- for dirname in sys.argv[1:]:
+ for dirname in args:
dupfinder.collect(dirname)
for duplicates in dupfinder.duplicates():
- print '\n'.join(duplicates)
- print
+ if opts.make_hardlinks:
+ make_hardlinks(duplicates)
+ else:
+ report(duplicates)
if __name__ == '__main__':