diff options
author | Lars Wirzenius <liw@liw.fi> | 2010-12-22 09:06:46 +0000 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2010-12-22 09:06:46 +0000 |
commit | 4d09413cba113e424e4227752a2b6d905fcadeab (patch) | |
tree | 94592738de2080391850ee5dcba8bed8f839aad7 /dupfiles | |
parent | 9a0b263b7fc9d43e6acd16729ebfc2120ae501d6 (diff) | |
parent | ee06c931432ed23019e34baf66627ae3269ed998 (diff) | |
download | dupfiles-4d09413cba113e424e4227752a2b6d905fcadeab.tar.gz |
Merged from master repo.
Diffstat (limited to 'dupfiles')
-rwxr-xr-x | dupfiles | 29 |
1 files changed, 18 insertions, 11 deletions
@@ -46,30 +46,31 @@ class File(object): def read(self, num_bytes): if self.pathname in filepool: - f = filepool[self.pathname] + fd = filepool[self.pathname] filestats.hit_count += 1 else: try: - f = file(self.pathname) - except IOError, e: + fd = os.open(self.pathname, os.O_RDONLY) + except OSError, e: if e.errno != errno.EMFILE: raise victim = random.choice(filepool.keys()) - filepool[victim].close() + os.close(filepool[victim]) del filepool[victim] filestats.close_count += 1 - f = file(self.pathname) - f.seek(self.offset) - filepool[self.pathname] = f + fd = os.open(self.pathname, os.O_RDONLY) + os.lseek(fd, self.offset, os.SEEK_SET) + filepool[self.pathname] = fd filestats.open_count += 1 - data = f.read(num_bytes) + data = os.read(fd, num_bytes) self.offset += len(data) return data def close(self): if self.pathname in filepool: - filepool[self.pathname].close() + os.close(filepool[self.pathname]) del filepool[self.pathname] + filestats.close_count += 1 class DuplicateFileFinder(object): @@ -179,7 +180,7 @@ class DuplicateFileFinder(object): done = [] not_done = [] - chunk_size = 64 * 1024 + chunk_size = 4 * 1024 for pathname, f in group: data = f.read(chunk_size) if not data: @@ -223,6 +224,7 @@ def main(): for dirname in sorted(args): dupfinder.collect(dirname) + for duplicates in dupfinder.duplicates(): if opts.make_hardlinks: make_hardlinks(duplicates) @@ -233,4 +235,9 @@ def main(): if __name__ == '__main__': - main() + profname = os.environ.get('DUPFILES_PROFILE') + if profname: + import cProfile + cProfile.run('main()', profname) + else: + main() |