From 25936f0ccea8bfa7bfc6226f47ea0ef651a8ef7e Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 12 Feb 2011 22:13:18 +0000 Subject: Make hardlinks only when uid, gid, mode match. --- dupfiles | 20 +++++++++++++++----- dupfiles.1 | 3 +++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/dupfiles b/dupfiles index b6f3df1..c2c7dc3 100755 --- a/dupfiles +++ b/dupfiles @@ -217,12 +217,22 @@ class Dupfiles(cliapp.Application): else: self.report(duplicates) - def make_hardlinks(self, duplicates): - canonical = duplicates.pop() - for pathname in duplicates: - os.remove(pathname) - os.link(canonical, pathname) + def get_meta(self, name): + st = os.lstat(name) + return st.st_uid, st.st_gid, st.st_mode + def make_hardlinks(self, duplicates): + dups = [(name, self.get_meta(name)) for name in duplicates] + + while dups: + canonical, meta = dups[0] + dups = dups[1:] + same = [x for x, y in dups if y == meta] + dups = [(x,y) for x, y in dups if y != meta] + for pathname in same: + os.remove(pathname) + os.link(canonical, pathname) + def remove_all_but_one(self, duplicates): keep = duplicates.pop() for pathname in duplicates: diff --git a/dupfiles.1 b/dupfiles.1 index ee66d9b..ef0eaa8 100644 --- a/dupfiles.1 +++ b/dupfiles.1 @@ -36,6 +36,9 @@ Note that after hardlinking, the files are the same file: if you modify one, both names show the modified file. Thus, if you the files should not be the same file, but just copies of each other, do not hardlink them together. +.PP +Files must have same owner, same group, and same permissions bits +to be hardlinked together. .SH OPTIONS .TP .BR --progress -- cgit v1.2.1