From b264520f1baf681f05fb837f82c11a57ae5256d3 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 25 Mar 2017 12:16:52 +0200 Subject: Avoid loading all ticket, just list of their ids This should avoid loading a lot of data that isn't needed for importing. --- distixlib/plugins/import_mail_plugin.py | 48 ++++++++++++++++++++------------- distixlib/ticket_store.py | 16 +++++------ 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/distixlib/plugins/import_mail_plugin.py b/distixlib/plugins/import_mail_plugin.py index 58f9627..6e3fb84 100644 --- a/distixlib/plugins/import_mail_plugin.py +++ b/distixlib/plugins/import_mail_plugin.py @@ -61,10 +61,10 @@ class ImportMailPlugin(cliapp.Plugin): repo = distixlib.Repository(repo_dirname) repo.require_clean_working_tree() ticket_store = repo.open_ticket_store(distixlib.tickets_dir_name) - all_tickets = ticket_store.get_tickets() + all_ticket_ids = ticket_store.get_ticket_ids() cache = _MessageIdCache() filenames = self._import_msg_to_ticket_store( - repo, ticket_store, all_tickets, msg, cache, key, value) + repo, ticket_store, all_ticket_ids, msg, cache, key, value) if filenames: repo.commit_changes(filenames, self.commit_msg) @@ -88,27 +88,29 @@ class ImportMailPlugin(cliapp.Plugin): return email.message_from_file(f) def _import_msg_to_ticket_store( - self, repo, ticket_store, all_tickets, msg, cache, key, value): + self, repo, ticket_store, all_ticket_ids, msg, cache, key, value): referenced_tickets = self._find_tickets_with_mails_referenced_by_msg( - all_tickets, msg, cache) + ticket_store, all_ticket_ids, msg, cache) msg_ids = self._get_message_ids(msg) filenames = [] if referenced_tickets: for ticket in referenced_tickets: - if not self._contains_message(ticket_store, ticket, msg): + if not self._contains_message( + ticket_store, ticket.get_ticket_id(), msg): ticket.add_message(msg) self._set_key_value(ticket, key, value) cache.add_msg_ids_for_ticket_id( ticket.get_ticket_id(), msg_ids) else: - if not self._is_already_imported(ticket_store, msg, all_tickets): + if not self._is_already_imported( + ticket_store, msg, all_ticket_ids): new_ticket = self._create_ticket_from_msg(repo, msg) self._set_key_value(new_ticket, key, value) cache.add_msg_ids_for_ticket_id( new_ticket.get_ticket_id(), msg_ids) - all_tickets.append(new_ticket) + all_ticket_ids.append(new_ticket.get_ticket_id()) filenames = ticket_store.add_ticket(new_ticket) filenames += ticket_store.save_changes() @@ -122,24 +124,32 @@ class ImportMailPlugin(cliapp.Plugin): metadata.add(key, value) ticket.set_ticket_metadata(metadata) - def _contains_message(self, store, ticket, msg): - return store.ticket_has_message_with_text(ticket, msg.as_string()) + def _contains_message(self, store, ticket_id, msg): + return store.ticket_has_message_with_text( + ticket_id, msg.as_string()) - def _is_already_imported(self, store, msg, all_tickets): - for ticket in all_tickets: - if self._contains_message(store, ticket, msg): + def _is_already_imported(self, store, msg, all_ticket_ids): + for ticket_id in all_ticket_ids: + if self._contains_message(store, ticket_id, msg): return True return False def _find_tickets_with_mails_referenced_by_msg( - self, all_tickets, msg, cache): + self, store, all_ticket_ids, msg, cache): tickets = [] msg_ids = self._get_message_ids(msg) - for ticket in all_tickets: - ticket_msg_ids = self._get_message_ids_for_ticket(ticket, cache) - if msg_ids.intersection(ticket_msg_ids): - tickets.append(ticket) + + for ticket_id in all_ticket_ids: + filenames = store.get_message_filenames(ticket_id) + for filename in filenames: + with open(filename) as f: + other_msg = email.message_from_file(f) + other_ids = self._get_message_ids(other_msg) + if other_ids.intersection(msg_ids): + ticket = store.get_ticket(ticket_id) + tickets.append(ticket) + return tickets def _get_message_ids(self, msg): @@ -212,7 +222,7 @@ class ImportMailPlugin(cliapp.Plugin): repo = distixlib.Repository(repo_dirname) repo.require_clean_working_tree() ticket_store = repo.open_ticket_store(distixlib.tickets_dir_name) - all_tickets = ticket_store.get_tickets() + all_ticket_ids = ticket_store.get_ticket_ids() cache = _MessageIdCache() filenames = [] if self.app.settings['quiet']: @@ -223,7 +233,7 @@ class ImportMailPlugin(cliapp.Plugin): for msg in folder: progress.next_msg() filenames += self._import_msg_to_ticket_store( - repo, ticket_store, all_tickets, msg, cache, key, value) + repo, ticket_store, all_ticket_ids, msg, cache, key, value) filenames += ticket_store.save_changes() if filenames: repo.commit_changes(filenames, self.commit_msg) diff --git a/distixlib/ticket_store.py b/distixlib/ticket_store.py index 8f38084..3c6a09d 100644 --- a/distixlib/ticket_store.py +++ b/distixlib/ticket_store.py @@ -152,11 +152,11 @@ class TicketStore(object): os.mkdir(self._dirname) def _create_ticket(self, ticket): - ticket_dir = self._get_dir_for_ticket(ticket) + ticket_dir = self._get_dir_for_ticket(ticket.get_ticket_id()) return self._saver.create_ticket_on_disk(ticket, ticket_dir) - def _get_dir_for_ticket(self, ticket): - return os.path.join(self._dirname, ticket.get_ticket_id()) + def _get_dir_for_ticket(self, ticket_id): + return os.path.join(self._dirname, ticket_id) def _get_ticket_id(self, ticket): # pragma: no cover ticket_id = ticket.get_ticket_id() @@ -173,19 +173,19 @@ class TicketStore(object): return filenames def _save_ticket(self, ticket): - ticket_dir = self._get_dir_for_ticket(ticket) + ticket_dir = self._get_dir_for_ticket(ticket.get_ticket_id()) return self._saver.save_changes_to_ticket(ticket, ticket_dir) def ticket_has_message_with_text( - self, ticket, msg_text): # pragma: no cover - filenames = self.get_message_filenames(ticket) + self, ticket_id, msg_text): # pragma: no cover + filenames = self.get_message_filenames(ticket_id) for filename in filenames: if self._file_contains(filename, msg_text): return True return False - def get_message_filenames(self, ticket): # pragma: no cover - ticket_dir = self._get_dir_for_ticket(ticket) + def get_message_filenames(self, ticket_id): # pragma: no cover + ticket_dir = self._get_dir_for_ticket(ticket_id) maildir_pathname = self._saver._get_maildir_pathname(ticket_dir) message_filenames = [] -- cgit v1.2.1