# Copyright 2014 Lars Wirzenius # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # =*= License: GPL-3+ =*= import contextlib import email import mailbox import os import cliapp import ttystatus import distixlib class WrongArguments(distixlib.StructuredError): msg = 'Wrong number of arguments: got {count}, wanted 2 or 3' class NotKeywordArgument(distixlib.StructuredError): msg = '{arg} is not a keyword argumnent (KEY=VALUE)' class ImportMailPlugin(cliapp.Plugin): commit_msg = 'imported mails' def enable(self): self.app.add_subcommand( 'import-mail', self.import_mail, arg_synopsis='REPO FILE [KEY=VALUE]') self.app.add_subcommand( 'import-mbox', self.import_mbox, arg_synopsis='REPO FILE [KEY=VALUE]') self.app.add_subcommand( 'import-maildir', self.import_maildir, arg_synopsis='REPO MAILDIR') def import_mail(self, args): repo_dirname, mail_filename, keyvalue = self._parse_command_line(args) key, value = self._parse_keyvalue(keyvalue) msg = self._read_mail_message(mail_filename) repo = distixlib.Repository(repo_dirname) repo.require_clean_working_tree() ticket_store = repo.open_ticket_store(distixlib.tickets_dir_name) all_ticket_ids = ticket_store.get_ticket_ids() cache = _MessageIdCache() filenames = self._import_msg_to_ticket_store( repo, ticket_store, all_ticket_ids, msg, cache, key, value) if filenames: repo.commit_changes(filenames, self.commit_msg) def _parse_command_line(self, args): if len(args) == 2: return args[0], args[1], None elif len(args) == 3: if '=' not in args[2]: raise NotKeywordArgument(arg=args[2]) return args[0], args[1], args[2] else: raise WrongArguments(count=len(args)) def _parse_keyvalue(self, keyvalue): if keyvalue is None: return None, None return keyvalue.split('=', 1) def _read_mail_message(self, mail_filename): with open(mail_filename) as f: return email.message_from_file(f) def _import_msg_to_ticket_store( self, repo, ticket_store, all_ticket_ids, msg, cache, key, value): referenced_tickets = self._find_tickets_with_mails_referenced_by_msg( ticket_store, all_ticket_ids, msg, cache) msg_ids = self._get_message_ids(msg) filenames = [] if referenced_tickets: for ticket in referenced_tickets: if not self._contains_message( ticket_store, ticket.get_ticket_id(), msg): ticket.add_message(msg) self._set_key_value(ticket, key, value) cache.add_msg_ids_for_ticket_id( ticket.get_ticket_id(), msg_ids) else: if not self._is_already_imported( ticket_store, msg, all_ticket_ids): new_ticket = self._create_ticket_from_msg(repo, msg) self._set_key_value(new_ticket, key, value) cache.add_msg_ids_for_ticket_id( new_ticket.get_ticket_id(), msg_ids) all_ticket_ids.append(new_ticket.get_ticket_id()) filenames = ticket_store.add_ticket(new_ticket) filenames += ticket_store.save_changes() return filenames def _set_key_value(self, ticket, key, value): if key is not None and value is not None: metadata = ticket.get_ticket_metadata() if key in metadata: metadata.remove_all_values(key) metadata.add(key, value) ticket.set_ticket_metadata(metadata) def _contains_message(self, store, ticket_id, msg): return store.ticket_has_message_with_text( ticket_id, msg.as_string()) def _is_already_imported(self, store, msg, all_ticket_ids): for ticket_id in all_ticket_ids: if self._contains_message(store, ticket_id, msg): return True return False def _find_tickets_with_mails_referenced_by_msg( self, store, all_ticket_ids, msg, cache): tickets = [] msg_ids = self._get_message_ids(msg) for ticket_id in all_ticket_ids: if ticket_id in cache: other_ids = cache.get_msg_ids_for_ticket_id(ticket_id) else: filenames = store.get_message_filenames(ticket_id) for filename in filenames: with open(filename) as f: other_msg = email.message_from_file(f) other_ids = self._get_message_ids(other_msg) cache.add_msg_ids_for_ticket_id(ticket_id, other_ids) if other_ids.intersection(msg_ids): ticket = store.get_ticket(ticket_id) tickets.append(ticket) return tickets def _get_message_ids(self, msg): header_names = ('Message-Id', 'In-Reply-To', 'References') msg_ids = set() for header_name in header_names: msg_ids = msg_ids.union( self._get_message_ids_from_header(msg, header_name)) return msg_ids def _get_message_ids_from_header(self, msg, header_name): values = msg.get_all(header_name) if values is None: return [] return set( msg_id for real_name, msg_id in email.utils.getaddresses(values)) def _get_message_ids_for_ticket(self, ticket, cache): ticket_id = ticket.get_ticket_id() if ticket_id not in cache: for msg in ticket.get_messages(): msg_ids = self._get_message_ids(msg) cache.add_msg_ids_for_ticket_id(ticket_id, msg_ids) return cache.get_msg_ids_for_ticket_id(ticket_id) def _create_ticket_from_msg(self, repo, msg): ticket_id = repo.invent_new_ticket_id() subject = self._get_header(msg, 'Subject') ticket = self._create_ticket(ticket_id, subject) ticket.add_message(msg) return ticket def _get_header(self, msg, name): decoded = email.header.decode_header(msg[name]) combined = u' '.join( self._safe_decode(value, encoding) for value, encoding in decoded) return combined def _safe_decode(self, text, encoding): try: return text.decode(encoding or 'us-ascii') except LookupError: return repr(text) except UnicodeDecodeError: return repr(text) def _create_ticket(self, ticket_id, title): ticket = distixlib.Ticket() ticket.set_ticket_id(ticket_id) ticket.set_title(title) return ticket def import_mbox(self, args): self._import_folder(args, mailbox.mbox) def import_maildir(self, args): def maildir_factory(filename): return mailbox.Maildir(filename, factory=None) self._import_folder(args, maildir_factory) def _import_folder(self, args, folder_factory): repo_dirname, folder_filename, keyvalue = self._parse_command_line( args) key, value = self._parse_keyvalue(keyvalue) folder = folder_factory(folder_filename) repo = distixlib.Repository(repo_dirname) repo.require_clean_working_tree() ticket_store = repo.open_ticket_store(distixlib.tickets_dir_name) all_ticket_ids = ticket_store.get_ticket_ids() cache = _MessageIdCache() filenames = [] if self.app.settings['quiet']: progress = _QuietProgressReporter() else: progress = _MboxProgressReporter(len(folder)) with contextlib.closing(folder), progress: for msg in folder: progress.next_msg() filenames += self._import_msg_to_ticket_store( repo, ticket_store, all_ticket_ids, msg, cache, key, value) filenames += ticket_store.save_changes() if filenames: repo.commit_changes(filenames, self.commit_msg) class _MessageIdCache(object): def __init__(self): self._dict = {} def __contains__(self, ticket_id): return ticket_id in self._dict def get_msg_ids_for_ticket_id(self, ticket_id): return self._dict.get(ticket_id, set()) def add_msg_ids_for_ticket_id(self, ticket_id, msg_ids): old = self.get_msg_ids_for_ticket_id(ticket_id) self._dict[ticket_id] = old.union(msg_ids) class _MboxProgressReporter(object): def __init__(self, total): self._ts = ttystatus.TerminalStatus() self._ts.format( '%ElapsedTime() ' 'importing message %Integer(current) of %Integer(total) ' '(%PercentDone(current,total))') self._ts['current'] = 0 self._ts['total'] = total def next_msg(self): self._ts['current'] += 1 def __enter__(self): return self def __exit__(self, *args): self._ts.clear() self._ts.finish() class _QuietProgressReporter(object): def next_msg(self): pass def __enter__(self): pass def __exit__(self, *args): pass