summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Wirzenius <liw@liw.fi>2016-10-19 20:34:25 +0300
committerLars Wirzenius <liw@liw.fi>2016-10-19 20:34:25 +0300
commit1d0afe6a1d427a978b4e4d1668398b2a32acbd56 (patch)
tree6d5455bd5a168ff2545e634452fc3158b7191c07
parentca113c0f0e1e1d2cf1844796de62fda6f7f29f6e (diff)
downloaddistix-1d0afe6a1d427a978b4e4d1668398b2a32acbd56.tar.gz
Render parts of mails more safely
We can't safely assume that mails have correct charsets declared, including for headers. If using the declared charset fails, just use repr for now.
-rw-r--r--distixlib/message_renderer.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/distixlib/message_renderer.py b/distixlib/message_renderer.py
index 340e40b..9c8d900 100644
--- a/distixlib/message_renderer.py
+++ b/distixlib/message_renderer.py
@@ -36,10 +36,16 @@ class MessageRenderer(object):
def _get_header(self, msg, name):
decoded = email.header.decode_header(msg[name])
combined = u' '.join(
- value.decode(encoding or 'us-ascii')
+ self._decode_safely(value, encoding)
for value, encoding in decoded)
return combined
+ def _decode_safely(self, s, encoding):
+ try:
+ return s.decode(encoding or 'us-ascii')
+ except UnicodeDecodeError:
+ return repr(s)
+
def _plain_text_body(self, msg):
body = self._find_first_plain_text_part(msg)
if body is None:
@@ -56,7 +62,7 @@ class MessageRenderer(object):
def _get_payload_as_unicode(self, body):
charset = body.get_param('charset', 'us-ascii')
text = body.get_payload(decode=True)
- return text.decode(charset)
+ return self._decode_safely(text, charset)
def _strip_signature(self, text):
sigsep = u'\n-- \n'