summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2017-03-25 10:34:28 +0200
committerLars Wirzenius <liw@liw.fi>2017-03-25 11:07:53 +0200
commit2ff64cae65415907007b0d474461af38fd41b073 (patch)
tree064139c163c8b1f1bb7a9f4533bbb61a25dc974a
parent3f9f27b6eef32c9528df3397836fba141a7d1e56 (diff)
downloaddistix-2ff64cae65415907007b0d474461af38fd41b073.tar.gz
Add TicketStore.ticket_has_message_with_text
This violates abstractions to make the check much faster, without having to cache anything.
-rw-r--r--NEWS2
-rw-r--r--distixlib/plugins/import_mail_plugin.py17
-rw-r--r--distixlib/ticket_store.py29
3 files changed, 37 insertions, 11 deletions
diff --git a/NEWS b/NEWS
index 120af27..f4011b3 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ distix, a distributed ticketing system.
Version 0.12+git, not yet released
----------------------------------
+* Speed up imports by improving check for whether a message has
+ already been imported.
Version 0.12, released 2017-03-22
----------------------------------
diff --git a/distixlib/plugins/import_mail_plugin.py b/distixlib/plugins/import_mail_plugin.py
index 3ef19c6..58f9627 100644
--- a/distixlib/plugins/import_mail_plugin.py
+++ b/distixlib/plugins/import_mail_plugin.py
@@ -97,13 +97,13 @@ class ImportMailPlugin(cliapp.Plugin):
if referenced_tickets:
for ticket in referenced_tickets:
- if not self._contains_message(ticket, msg):
+ if not self._contains_message(ticket_store, ticket, msg):
ticket.add_message(msg)
self._set_key_value(ticket, key, value)
cache.add_msg_ids_for_ticket_id(
ticket.get_ticket_id(), msg_ids)
else:
- if not self._is_already_imported(msg, all_tickets):
+ if not self._is_already_imported(ticket_store, msg, all_tickets):
new_ticket = self._create_ticket_from_msg(repo, msg)
self._set_key_value(new_ticket, key, value)
cache.add_msg_ids_for_ticket_id(
@@ -122,17 +122,12 @@ class ImportMailPlugin(cliapp.Plugin):
metadata.add(key, value)
ticket.set_ticket_metadata(metadata)
- def _contains_message(self, ticket, msg):
- return any(
- self._equal_messages(existing, msg)
- for existing in ticket.get_messages())
+ def _contains_message(self, store, ticket, msg):
+ return store.ticket_has_message_with_text(ticket, msg.as_string())
- def _equal_messages(self, msg1, msg2):
- return msg1.as_string() == msg2.as_string()
-
- def _is_already_imported(self, msg, all_tickets):
+ def _is_already_imported(self, store, msg, all_tickets):
for ticket in all_tickets:
- if self._contains_message(ticket, msg):
+ if self._contains_message(store, ticket, msg):
return True
return False
diff --git a/distixlib/ticket_store.py b/distixlib/ticket_store.py
index 73f7257..a174b02 100644
--- a/distixlib/ticket_store.py
+++ b/distixlib/ticket_store.py
@@ -172,6 +172,35 @@ class TicketStore(object):
ticket_dir = self._get_dir_for_ticket(ticket)
return self._saver.save_changes_to_ticket(ticket, ticket_dir)
+ def ticket_has_message_with_text(
+ self, ticket, msg_text): # pragma: no cover
+ filenames = self.get_message_filenames(ticket)
+ for filename in filenames:
+ if self._file_contains(filename, msg_text):
+ return True
+ return False
+
+ def get_message_filenames(self, ticket): # pragma: no cover
+ ticket_dir = self._get_dir_for_ticket(ticket)
+ maildir_pathname = self._saver._get_maildir_pathname(ticket_dir)
+
+ message_filenames = []
+
+ for dirname, subdirs, filenames in os.walk(maildir_pathname):
+ if '.empty' in subdirs:
+ subdirs.remove('.empty')
+ for filename in filenames:
+ message_filenames.append(os.path.join(dirname, filename))
+
+ return message_filenames
+
+ def _file_contains(self, filename, data): # pragma: no cover
+ st = os.lstat(filename)
+ if st.st_size != len(data):
+ return False
+ with open(filename) as f:
+ return f.read() == data
+
class _TicketCache(object):