summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2010-04-18 14:26:32 +1200
committerLars Wirzenius <liw@liw.fi>2010-04-18 14:26:32 +1200
commit92dc0a5dc50ea0c10a8b6120af60f94b196408d3 (patch)
tree8af7e480d8b6416d2222e951c49ac132d8655434
parent3c1fb0d8afbb56e28732f71ad2c86bc16efd1068 (diff)
downloaddupfiles-92dc0a5dc50ea0c10a8b6120af60f94b196408d3.tar.gz
Do not read the whole file into memory when doing a checksum.
-rwxr-xr-xdupfiles10
1 files changed, 9 insertions, 1 deletions
diff --git a/dupfiles b/dupfiles
index 0f9dd48..d95237c 100755
--- a/dupfiles
+++ b/dupfiles
@@ -115,7 +115,15 @@ class DuplicateFileFinder(object):
return '0 B'
def file_checksum(self, pathname):
- return hashlib.md5(file(pathname, 'rb').read()).digest()
+ cs = hashlib.md5()
+ f = file(pathname, 'rb')
+ while True:
+ data = f.read(64*1024)
+ if not data:
+ break
+ cs.update(data)
+ f.close()
+ return cs.digest()
def make_hardlinks(duplicates):