From 58398ff7e4b47ec39a9106e193c2ea54eda4f723 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Fri, 26 Nov 2010 18:29:48 +0200 Subject: Optionall run program under profiling. --- dupfiles | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dupfiles b/dupfiles index b1de7fc..1956c1d 100755 --- a/dupfiles +++ b/dupfiles @@ -228,4 +228,9 @@ def main(): if __name__ == '__main__': - main() + profname = os.environ.get('DUPFILES_PROFILE') + if profname: + import cProfile + cProfile.run('main()', profname) + else: + main() -- cgit v1.2.1 From 279879c20a82614f3c323be77e1d4af601bdb70f Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 28 Nov 2010 00:14:37 +0200 Subject: Use os.read for speed. --- dupfiles | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dupfiles b/dupfiles index 1956c1d..1c2550d 100755 --- a/dupfiles +++ b/dupfiles @@ -58,30 +58,31 @@ class File(object): def read(self, num_bytes): if self.pathname in filepool: - f = filepool[self.pathname] + fd = filepool[self.pathname] filestats.hit_count += 1 else: try: - f = file(self.pathname) - except IOError, e: + fd = os.open(self.pathname, os.O_RDONLY) + except OSError, e: if e.errno != errno.EMFILE: raise victim = random.choice(filepool.keys()) - filepool[victim].close() + os.close(filepool[victim]) del filepool[victim] filestats.close_count += 1 - f = file(self.pathname) - f.seek(self.offset) - filepool[self.pathname] = f + fd = os.open(self.pathname, os.O_RDONLY) + os.lseek(fd, self.offset, os.SEEK_SET) + filepool[self.pathname] = fd filestats.open_count += 1 - data = f.read(num_bytes) + data = os.read(fd, num_bytes) self.offset += len(data) return data def close(self): if self.pathname in filepool: - filepool[self.pathname].close() + os.close(filepool[self.pathname]) del filepool[self.pathname] + filestats.close_count += 1 class DuplicateFileFinder(object): @@ -220,6 +221,7 @@ def main(): for dirname in sorted(args): dupfinder.collect(dirname) + for duplicates in dupfinder.duplicates(): if opts.make_hardlinks: make_hardlinks(duplicates) -- cgit v1.2.1 From ee06c931432ed23019e34baf66627ae3269ed998 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 28 Nov 2010 10:16:20 +0200 Subject: Read only 4 KiB, for speed. --- dupfiles | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dupfiles b/dupfiles index 1c2550d..7c8af79 100755 --- a/dupfiles +++ b/dupfiles @@ -185,7 +185,7 @@ class DuplicateFileFinder(object): done = [] not_done = [] - chunk_size = 64 * 1024 + chunk_size = 4 * 1024 for pathname, f in group: data = f.read(chunk_size) if not data: -- cgit v1.2.1