diff options
author | Lars Wirzenius <liw@liw.fi> | 2016-10-19 20:34:25 +0300 |
---|---|---|
committer | Lars Wirzenius <liw@liw.fi> | 2016-10-19 20:34:25 +0300 |
commit | 1d0afe6a1d427a978b4e4d1668398b2a32acbd56 (patch) | |
tree | 6d5455bd5a168ff2545e634452fc3158b7191c07 | |
parent | ca113c0f0e1e1d2cf1844796de62fda6f7f29f6e (diff) | |
download | distix-1d0afe6a1d427a978b4e4d1668398b2a32acbd56.tar.gz |
Render parts of mails more safely
We can't safely assume that mails have correct charsets declared,
including for headers. If using the declared charset fails, just
use repr for now.
-rw-r--r-- | distixlib/message_renderer.py | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/distixlib/message_renderer.py b/distixlib/message_renderer.py index 340e40b..9c8d900 100644 --- a/distixlib/message_renderer.py +++ b/distixlib/message_renderer.py @@ -36,10 +36,16 @@ class MessageRenderer(object): def _get_header(self, msg, name): decoded = email.header.decode_header(msg[name]) combined = u' '.join( - value.decode(encoding or 'us-ascii') + self._decode_safely(value, encoding) for value, encoding in decoded) return combined + def _decode_safely(self, s, encoding): + try: + return s.decode(encoding or 'us-ascii') + except UnicodeDecodeError: + return repr(s) + def _plain_text_body(self, msg): body = self._find_first_plain_text_part(msg) if body is None: @@ -56,7 +62,7 @@ class MessageRenderer(object): def _get_payload_as_unicode(self, body): charset = body.get_param('charset', 'us-ascii') text = body.get_payload(decode=True) - return text.decode(charset) + return self._decode_safely(text, charset) def _strip_signature(self, text): sigsep = u'\n-- \n' |