From 2ff64cae65415907007b0d474461af38fd41b073 Mon Sep 17 00:00:00 2001 From: Lars Wirzenius Date: Sat, 25 Mar 2017 10:34:28 +0200 Subject: Add TicketStore.ticket_has_message_with_text This violates abstractions to make the check much faster, without having to cache anything. --- NEWS | 2 ++ distixlib/plugins/import_mail_plugin.py | 17 ++++++----------- distixlib/ticket_store.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 120af27..f4011b3 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,8 @@ distix, a distributed ticketing system. Version 0.12+git, not yet released ---------------------------------- +* Speed up imports by improving check for whether a message has + already been imported. Version 0.12, released 2017-03-22 ---------------------------------- diff --git a/distixlib/plugins/import_mail_plugin.py b/distixlib/plugins/import_mail_plugin.py index 3ef19c6..58f9627 100644 --- a/distixlib/plugins/import_mail_plugin.py +++ b/distixlib/plugins/import_mail_plugin.py @@ -97,13 +97,13 @@ class ImportMailPlugin(cliapp.Plugin): if referenced_tickets: for ticket in referenced_tickets: - if not self._contains_message(ticket, msg): + if not self._contains_message(ticket_store, ticket, msg): ticket.add_message(msg) self._set_key_value(ticket, key, value) cache.add_msg_ids_for_ticket_id( ticket.get_ticket_id(), msg_ids) else: - if not self._is_already_imported(msg, all_tickets): + if not self._is_already_imported(ticket_store, msg, all_tickets): new_ticket = self._create_ticket_from_msg(repo, msg) self._set_key_value(new_ticket, key, value) cache.add_msg_ids_for_ticket_id( @@ -122,17 +122,12 @@ class ImportMailPlugin(cliapp.Plugin): metadata.add(key, value) ticket.set_ticket_metadata(metadata) - def _contains_message(self, ticket, msg): - return any( - self._equal_messages(existing, msg) - for existing in ticket.get_messages()) + def _contains_message(self, store, ticket, msg): + return store.ticket_has_message_with_text(ticket, msg.as_string()) - def _equal_messages(self, msg1, msg2): - return msg1.as_string() == msg2.as_string() - - def _is_already_imported(self, msg, all_tickets): + def _is_already_imported(self, store, msg, all_tickets): for ticket in all_tickets: - if self._contains_message(ticket, msg): + if self._contains_message(store, ticket, msg): return True return False diff --git a/distixlib/ticket_store.py b/distixlib/ticket_store.py index 73f7257..a174b02 100644 --- a/distixlib/ticket_store.py +++ b/distixlib/ticket_store.py @@ -172,6 +172,35 @@ class TicketStore(object): ticket_dir = self._get_dir_for_ticket(ticket) return self._saver.save_changes_to_ticket(ticket, ticket_dir) + def ticket_has_message_with_text( + self, ticket, msg_text): # pragma: no cover + filenames = self.get_message_filenames(ticket) + for filename in filenames: + if self._file_contains(filename, msg_text): + return True + return False + + def get_message_filenames(self, ticket): # pragma: no cover + ticket_dir = self._get_dir_for_ticket(ticket) + maildir_pathname = self._saver._get_maildir_pathname(ticket_dir) + + message_filenames = [] + + for dirname, subdirs, filenames in os.walk(maildir_pathname): + if '.empty' in subdirs: + subdirs.remove('.empty') + for filename in filenames: + message_filenames.append(os.path.join(dirname, filename)) + + return message_filenames + + def _file_contains(self, filename, data): # pragma: no cover + st = os.lstat(filename) + if st.st_size != len(data): + return False + with open(filename) as f: + return f.read() == data + class _TicketCache(object): -- cgit v1.2.1