summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2017-03-25 12:16:52 +0200
committerLars Wirzenius <liw@liw.fi>2017-03-25 12:56:27 +0200
commitb264520f1baf681f05fb837f82c11a57ae5256d3 (patch)
tree519de9f0a5ea3f1435a11bd032ff9d54aa4a2ef9
parent3320e6e7132899ffb74478db1f2e9454ccbc0435 (diff)
downloaddistix-b264520f1baf681f05fb837f82c11a57ae5256d3.tar.gz
Avoid loading all ticket, just list of their ids
This should avoid loading a lot of data that isn't needed for importing.
-rw-r--r--distixlib/plugins/import_mail_plugin.py48
-rw-r--r--distixlib/ticket_store.py16
2 files changed, 37 insertions, 27 deletions
diff --git a/distixlib/plugins/import_mail_plugin.py b/distixlib/plugins/import_mail_plugin.py
index 58f9627..6e3fb84 100644
--- a/distixlib/plugins/import_mail_plugin.py
+++ b/distixlib/plugins/import_mail_plugin.py
@@ -61,10 +61,10 @@ class ImportMailPlugin(cliapp.Plugin):
repo = distixlib.Repository(repo_dirname)
repo.require_clean_working_tree()
ticket_store = repo.open_ticket_store(distixlib.tickets_dir_name)
- all_tickets = ticket_store.get_tickets()
+ all_ticket_ids = ticket_store.get_ticket_ids()
cache = _MessageIdCache()
filenames = self._import_msg_to_ticket_store(
- repo, ticket_store, all_tickets, msg, cache, key, value)
+ repo, ticket_store, all_ticket_ids, msg, cache, key, value)
if filenames:
repo.commit_changes(filenames, self.commit_msg)
@@ -88,27 +88,29 @@ class ImportMailPlugin(cliapp.Plugin):
return email.message_from_file(f)
def _import_msg_to_ticket_store(
- self, repo, ticket_store, all_tickets, msg, cache, key, value):
+ self, repo, ticket_store, all_ticket_ids, msg, cache, key, value):
referenced_tickets = self._find_tickets_with_mails_referenced_by_msg(
- all_tickets, msg, cache)
+ ticket_store, all_ticket_ids, msg, cache)
msg_ids = self._get_message_ids(msg)
filenames = []
if referenced_tickets:
for ticket in referenced_tickets:
- if not self._contains_message(ticket_store, ticket, msg):
+ if not self._contains_message(
+ ticket_store, ticket.get_ticket_id(), msg):
ticket.add_message(msg)
self._set_key_value(ticket, key, value)
cache.add_msg_ids_for_ticket_id(
ticket.get_ticket_id(), msg_ids)
else:
- if not self._is_already_imported(ticket_store, msg, all_tickets):
+ if not self._is_already_imported(
+ ticket_store, msg, all_ticket_ids):
new_ticket = self._create_ticket_from_msg(repo, msg)
self._set_key_value(new_ticket, key, value)
cache.add_msg_ids_for_ticket_id(
new_ticket.get_ticket_id(), msg_ids)
- all_tickets.append(new_ticket)
+ all_ticket_ids.append(new_ticket.get_ticket_id())
filenames = ticket_store.add_ticket(new_ticket)
filenames += ticket_store.save_changes()
@@ -122,24 +124,32 @@ class ImportMailPlugin(cliapp.Plugin):
metadata.add(key, value)
ticket.set_ticket_metadata(metadata)
- def _contains_message(self, store, ticket, msg):
- return store.ticket_has_message_with_text(ticket, msg.as_string())
+ def _contains_message(self, store, ticket_id, msg):
+ return store.ticket_has_message_with_text(
+ ticket_id, msg.as_string())
- def _is_already_imported(self, store, msg, all_tickets):
- for ticket in all_tickets:
- if self._contains_message(store, ticket, msg):
+ def _is_already_imported(self, store, msg, all_ticket_ids):
+ for ticket_id in all_ticket_ids:
+ if self._contains_message(store, ticket_id, msg):
return True
return False
def _find_tickets_with_mails_referenced_by_msg(
- self, all_tickets, msg, cache):
+ self, store, all_ticket_ids, msg, cache):
tickets = []
msg_ids = self._get_message_ids(msg)
- for ticket in all_tickets:
- ticket_msg_ids = self._get_message_ids_for_ticket(ticket, cache)
- if msg_ids.intersection(ticket_msg_ids):
- tickets.append(ticket)
+
+ for ticket_id in all_ticket_ids:
+ filenames = store.get_message_filenames(ticket_id)
+ for filename in filenames:
+ with open(filename) as f:
+ other_msg = email.message_from_file(f)
+ other_ids = self._get_message_ids(other_msg)
+ if other_ids.intersection(msg_ids):
+ ticket = store.get_ticket(ticket_id)
+ tickets.append(ticket)
+
return tickets
def _get_message_ids(self, msg):
@@ -212,7 +222,7 @@ class ImportMailPlugin(cliapp.Plugin):
repo = distixlib.Repository(repo_dirname)
repo.require_clean_working_tree()
ticket_store = repo.open_ticket_store(distixlib.tickets_dir_name)
- all_tickets = ticket_store.get_tickets()
+ all_ticket_ids = ticket_store.get_ticket_ids()
cache = _MessageIdCache()
filenames = []
if self.app.settings['quiet']:
@@ -223,7 +233,7 @@ class ImportMailPlugin(cliapp.Plugin):
for msg in folder:
progress.next_msg()
filenames += self._import_msg_to_ticket_store(
- repo, ticket_store, all_tickets, msg, cache, key, value)
+ repo, ticket_store, all_ticket_ids, msg, cache, key, value)
filenames += ticket_store.save_changes()
if filenames:
repo.commit_changes(filenames, self.commit_msg)
diff --git a/distixlib/ticket_store.py b/distixlib/ticket_store.py
index 8f38084..3c6a09d 100644
--- a/distixlib/ticket_store.py
+++ b/distixlib/ticket_store.py
@@ -152,11 +152,11 @@ class TicketStore(object):
os.mkdir(self._dirname)
def _create_ticket(self, ticket):
- ticket_dir = self._get_dir_for_ticket(ticket)
+ ticket_dir = self._get_dir_for_ticket(ticket.get_ticket_id())
return self._saver.create_ticket_on_disk(ticket, ticket_dir)
- def _get_dir_for_ticket(self, ticket):
- return os.path.join(self._dirname, ticket.get_ticket_id())
+ def _get_dir_for_ticket(self, ticket_id):
+ return os.path.join(self._dirname, ticket_id)
def _get_ticket_id(self, ticket): # pragma: no cover
ticket_id = ticket.get_ticket_id()
@@ -173,19 +173,19 @@ class TicketStore(object):
return filenames
def _save_ticket(self, ticket):
- ticket_dir = self._get_dir_for_ticket(ticket)
+ ticket_dir = self._get_dir_for_ticket(ticket.get_ticket_id())
return self._saver.save_changes_to_ticket(ticket, ticket_dir)
def ticket_has_message_with_text(
- self, ticket, msg_text): # pragma: no cover
- filenames = self.get_message_filenames(ticket)
+ self, ticket_id, msg_text): # pragma: no cover
+ filenames = self.get_message_filenames(ticket_id)
for filename in filenames:
if self._file_contains(filename, msg_text):
return True
return False
- def get_message_filenames(self, ticket): # pragma: no cover
- ticket_dir = self._get_dir_for_ticket(ticket)
+ def get_message_filenames(self, ticket_id): # pragma: no cover
+ ticket_dir = self._get_dir_for_ticket(ticket_id)
maildir_pathname = self._saver._get_maildir_pathname(ticket_dir)
message_filenames = []