summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-12-22 09:06:46 +0000
committerLars Wirzenius <liw@liw.fi>2010-12-22 09:06:46 +0000
commit4d09413cba113e424e4227752a2b6d905fcadeab (patch)
tree94592738de2080391850ee5dcba8bed8f839aad7
parent9a0b263b7fc9d43e6acd16729ebfc2120ae501d6 (diff)
parentee06c931432ed23019e34baf66627ae3269ed998 (diff)
downloaddupfiles-4d09413cba113e424e4227752a2b6d905fcadeab.tar.gz
Merged from master repo.
-rwxr-xr-xdupfiles29
1 files changed, 18 insertions, 11 deletions
diff --git a/dupfiles b/dupfiles
index 3d9e8db..78e4b5f 100755
--- a/dupfiles
+++ b/dupfiles
@@ -46,30 +46,31 @@ class File(object):
def read(self, num_bytes):
if self.pathname in filepool:
- f = filepool[self.pathname]
+ fd = filepool[self.pathname]
filestats.hit_count += 1
else:
try:
- f = file(self.pathname)
- except IOError, e:
+ fd = os.open(self.pathname, os.O_RDONLY)
+ except OSError, e:
if e.errno != errno.EMFILE:
raise
victim = random.choice(filepool.keys())
- filepool[victim].close()
+ os.close(filepool[victim])
del filepool[victim]
filestats.close_count += 1
- f = file(self.pathname)
- f.seek(self.offset)
- filepool[self.pathname] = f
+ fd = os.open(self.pathname, os.O_RDONLY)
+ os.lseek(fd, self.offset, os.SEEK_SET)
+ filepool[self.pathname] = fd
filestats.open_count += 1
- data = f.read(num_bytes)
+ data = os.read(fd, num_bytes)
self.offset += len(data)
return data
def close(self):
if self.pathname in filepool:
- filepool[self.pathname].close()
+ os.close(filepool[self.pathname])
del filepool[self.pathname]
+ filestats.close_count += 1
class DuplicateFileFinder(object):
@@ -179,7 +180,7 @@ class DuplicateFileFinder(object):
done = []
not_done = []
- chunk_size = 64 * 1024
+ chunk_size = 4 * 1024
for pathname, f in group:
data = f.read(chunk_size)
if not data:
@@ -223,6 +224,7 @@ def main():
for dirname in sorted(args):
dupfinder.collect(dirname)
+
for duplicates in dupfinder.duplicates():
if opts.make_hardlinks:
make_hardlinks(duplicates)
@@ -233,4 +235,9 @@ def main():
if __name__ == '__main__':
- main()
+ profname = os.environ.get('DUPFILES_PROFILE')
+ if profname:
+ import cProfile
+ cProfile.run('main()', profname)
+ else:
+ main()