From 279879c20a82614f3c323be77e1d4af601bdb70f Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sun, 28 Nov 2010 00:14:37 +0200 Subject: Use os.read for speed. --- dupfiles | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dupfiles b/dupfiles index 1956c1d..1c2550d 100755 --- a/dupfiles +++ b/dupfiles @@ -58,30 +58,31 @@ class File(object): def read(self, num_bytes): if self.pathname in filepool: - f = filepool[self.pathname] + fd = filepool[self.pathname] filestats.hit_count += 1 else: try: - f = file(self.pathname) - except IOError, e: + fd = os.open(self.pathname, os.O_RDONLY) + except OSError, e: if e.errno != errno.EMFILE: raise victim = random.choice(filepool.keys()) - filepool[victim].close() + os.close(filepool[victim]) del filepool[victim] filestats.close_count += 1 - f = file(self.pathname) - f.seek(self.offset) - filepool[self.pathname] = f + fd = os.open(self.pathname, os.O_RDONLY) + os.lseek(fd, self.offset, os.SEEK_SET) + filepool[self.pathname] = fd filestats.open_count += 1 - data = f.read(num_bytes) + data = os.read(fd, num_bytes) self.offset += len(data) return data def close(self): if self.pathname in filepool: - filepool[self.pathname].close() + os.close(filepool[self.pathname]) del filepool[self.pathname] + filestats.close_count += 1 class DuplicateFileFinder(object): @@ -220,6 +221,7 @@ def main(): for dirname in sorted(args): dupfinder.collect(dirname) + for duplicates in dupfinder.duplicates(): if opts.make_hardlinks: make_hardlinks(duplicates) -- cgit v1.2.1