summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@xander>2010-11-28 00:14:37 +0200
committerLars Wirzenius <liw@xander>2010-11-28 00:14:37 +0200
commit279879c20a82614f3c323be77e1d4af601bdb70f (patch)
tree8c4a19ca775b759f332a49c5c7ce3c02f12e423f
parent58398ff7e4b47ec39a9106e193c2ea54eda4f723 (diff)
downloaddupfiles-279879c20a82614f3c323be77e1d4af601bdb70f.tar.gz
Use os.read for speed.
-rwxr-xr-xdupfiles20
1 files changed, 11 insertions, 9 deletions
diff --git a/dupfiles b/dupfiles
index 1956c1d..1c2550d 100755
--- a/dupfiles
+++ b/dupfiles
@@ -58,30 +58,31 @@ class File(object):
def read(self, num_bytes):
if self.pathname in filepool:
- f = filepool[self.pathname]
+ fd = filepool[self.pathname]
filestats.hit_count += 1
else:
try:
- f = file(self.pathname)
- except IOError, e:
+ fd = os.open(self.pathname, os.O_RDONLY)
+ except OSError, e:
if e.errno != errno.EMFILE:
raise
victim = random.choice(filepool.keys())
- filepool[victim].close()
+ os.close(filepool[victim])
del filepool[victim]
filestats.close_count += 1
- f = file(self.pathname)
- f.seek(self.offset)
- filepool[self.pathname] = f
+ fd = os.open(self.pathname, os.O_RDONLY)
+ os.lseek(fd, self.offset, os.SEEK_SET)
+ filepool[self.pathname] = fd
filestats.open_count += 1
- data = f.read(num_bytes)
+ data = os.read(fd, num_bytes)
self.offset += len(data)
return data
def close(self):
if self.pathname in filepool:
- filepool[self.pathname].close()
+ os.close(filepool[self.pathname])
del filepool[self.pathname]
+ filestats.close_count += 1
class DuplicateFileFinder(object):
@@ -220,6 +221,7 @@ def main():
for dirname in sorted(args):
dupfinder.collect(dirname)
+
for duplicates in dupfinder.duplicates():
if opts.make_hardlinks:
make_hardlinks(duplicates)