Remove old FIXME, add new one.

author: Lars Wirzenius <liw@liw.fi> 2010-04-25 06:34:49 +1200
committer: Lars Wirzenius <liw@liw.fi> 2010-04-25 06:34:49 +1200
commit: 626248d5a81c7bdc76655da0aafa02ed3ae64186 (patch)
tree: f24238d645ac9f8a7cc684bf625bfde40f97d666
parent: f8b8501cba8546ecad422a6a41dca944bb3e94a5 (diff)
download: dupfiles-626248d5a81c7bdc76655da0aafa02ed3ae64186.tar.gz
1 files changed, 7 insertions, 6 deletions
diff --git a/dupfiles b/dupfiles
index abcd67d..35742f1 100755
--- a/dupfiles
+++ b/dupfiles
@@ -66,12 +66,6 @@ class DuplicateFileFinder(object):
                     self.by_size[stat.st_size] = [t]
         self.progress.finished()
 
-    # FIXME: This computed the checksum for each file, the full file.
-    # It might be faster to read all files (of the same size) in parallel,
-    # in suitably small chunks, and compute checksums for all of them in
-    # parallel. When checksums diverge, split the files into smaller
-    # groups. If a group becomes just a single file, that file is not
-    # a duplicate and can be dropped completely.
     def duplicates(self):
         skip = [size for size in self.by_size if len(self.by_size[size]) == 1]
         for size in skip:
@@ -98,6 +92,13 @@ class DuplicateFileFinder(object):
         of pathnames to files that are identical.
         
         '''
+        
+        # FIXME: This assumes it can open every file at the same time.
+        # If there are a lot of files, that's not going to be possible.
+        # This might work: keep a pool of open files, and record how
+        # far you got with each file. Then close and re-open files as
+        # necessary, if the pool is too small. When re-opening, seek
+        # to the remembered position.
 
         result = []
         identical_groups = [[(x, file(x)) for x in pathnames]]
author	Lars Wirzenius <liw@liw.fi>	2010-04-25 06:34:49 +1200
committer	Lars Wirzenius <liw@liw.fi>	2010-04-25 06:34:49 +1200
commit	626248d5a81c7bdc76655da0aafa02ed3ae64186 (patch)
tree	f24238d645ac9f8a7cc684bf625bfde40f97d666
parent	f8b8501cba8546ecad422a6a41dca944bb3e94a5 (diff)
download	dupfiles-626248d5a81c7bdc76655da0aafa02ed3ae64186.tar.gz