From c719e606bf6f815d9fa3bf0c5b85234cf51188de Mon Sep 17 00:00:00 2001 From: c Date: Mon, 14 Oct 2024 22:26:09 +0200 Subject: [PATCH 1/2] Add .eml example with odd behavior --- resources/eml/malformed/018.eml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 resources/eml/malformed/018.eml diff --git a/resources/eml/malformed/018.eml b/resources/eml/malformed/018.eml new file mode 100644 index 0000000..3d23f83 --- /dev/null +++ b/resources/eml/malformed/018.eml @@ -0,0 +1,13 @@ +Content-Type: text/plain; + charset="Windows-1252" +Content-Transfer-Encoding: quoted-printable +MIME-Version: 1.0 + +Best +Enviado desde mi BlackBerry=AE de Vodafone= + + + + + + \ No newline at end of file From b06d9ab59f965a9923e4574af53f57da60d31a1b Mon Sep 17 00:00:00 2001 From: c Date: Mon, 21 Oct 2024 14:00:29 +0200 Subject: [PATCH 2/2] Parse malformed, non-multipart, single text/plain messages best-effort as if text is not malformed --- resources/eml/malformed/012.crlf.json | 8 ++-- resources/eml/malformed/012.json | 8 ++-- resources/eml/malformed/018.crlf.json | 58 +++++++++++++++++++++++++++ resources/eml/malformed/018.json | 58 +++++++++++++++++++++++++++ src/parsers/message.rs | 12 +++++- 5 files changed, 137 insertions(+), 7 deletions(-) create mode 100644 resources/eml/malformed/018.crlf.json create mode 100644 resources/eml/malformed/018.json diff --git a/resources/eml/malformed/012.crlf.json b/resources/eml/malformed/012.crlf.json index a0c5da6..bc5661e 100644 --- a/resources/eml/malformed/012.crlf.json +++ b/resources/eml/malformed/012.crlf.json @@ -146,11 +146,13 @@ "is_encoding_problem": false, "body": { "Message": { - "html_body": [], - "text_body": [], - "attachments": [ + "html_body": [ 0 ], + "text_body": [ + 0 + ], + "attachments": [], "parts": [ { "headers": [ diff --git a/resources/eml/malformed/012.json b/resources/eml/malformed/012.json index 6a2dc94..e368ae3 100644 --- a/resources/eml/malformed/012.json +++ b/resources/eml/malformed/012.json @@ -146,11 +146,13 @@ "is_encoding_problem": false, "body": { "Message": { - "html_body": [], - "text_body": [], - "attachments": [ + "html_body": [ 0 ], + "text_body": [ + 0 + ], + "attachments": [], "parts": [ { "headers": [ diff --git a/resources/eml/malformed/018.crlf.json b/resources/eml/malformed/018.crlf.json new file mode 100644 index 0000000..4d424f6 --- /dev/null +++ b/resources/eml/malformed/018.crlf.json @@ -0,0 +1,58 @@ +{ + "html_body": [ + 0 + ], + "text_body": [ + 0 + ], + "attachments": [], + "parts": [ + { + "headers": [ + { + "name": "content_type", + "value": { + "ContentType": { + "c_type": "text", + "c_subtype": "plain", + "attributes": [ + [ + "charset", + "Windows-1252" + ] + ] + } + }, + "offset_field": 0, + "offset_start": 13, + "offset_end": 52 + }, + { + "name": "content_transfer_encoding", + "value": { + "Text": "quoted-printable" + }, + "offset_field": 52, + "offset_start": 78, + "offset_end": 97 + }, + { + "name": "mime_version", + "value": { + "Text": "1.0" + }, + "offset_field": 97, + "offset_start": 110, + "offset_end": 116 + } + ], + "is_encoding_problem": true, + "body": { + "Text": "Best\r\nEnviado desde mi BlackBerry=AE de Vodafone=\r\n\r\n\r\n\r\n\r\n\r\n" + }, + "offset_header": 0, + "offset_body": 118, + "offset_end": 361 + } + ] +} \ No newline at end of file diff --git a/resources/eml/malformed/018.json b/resources/eml/malformed/018.json new file mode 100644 index 0000000..076a0f1 --- /dev/null +++ b/resources/eml/malformed/018.json @@ -0,0 +1,58 @@ +{ + "html_body": [ + 0 + ], + "text_body": [ + 0 + ], + "attachments": [], + "parts": [ + { + "headers": [ + { + "name": "content_type", + "value": { + "ContentType": { + "c_type": "text", + "c_subtype": "plain", + "attributes": [ + [ + "charset", + "Windows-1252" + ] + ] + } + }, + "offset_field": 0, + "offset_start": 13, + "offset_end": 52 + }, + { + "name": "content_transfer_encoding", + "value": { + "Text": "quoted-printable" + }, + "offset_field": 52, + "offset_start": 78, + "offset_end": 97 + }, + { + "name": "mime_version", + "value": { + "Text": "1.0" + }, + "offset_field": 97, + "offset_start": 110, + "offset_end": 116 + } + ], + "is_encoding_problem": true, + "body": { + "Text": "Best\nEnviado desde mi BlackBerry=AE de Vodafone=\n\n\n\n\n\n" + }, + "offset_header": 0, + "offset_body": 118, + "offset_end": 354 + } + ] +} \ No newline at end of file diff --git a/src/parsers/message.rs b/src/parsers/message.rs index 6a56b83..19a3694 100644 --- a/src/parsers/message.rs +++ b/src/parsers/message.rs @@ -250,7 +250,9 @@ impl MessageParser { let mut is_encoding_problem = offset_end == usize::MAX; if is_encoding_problem { encoding = Encoding::None; - mime_type = MimeType::TextOther; + if mime_type != MimeType::TextPlain { + mime_type = MimeType::TextOther; + } is_inline = false; is_text = true; @@ -278,6 +280,14 @@ impl MessageParser { && (mime_type == MimeType::Inline || content_type.map_or(true, |c| !c.has_attribute("name")))); + // if message consists of single text/plain part, classify as text regardless + // of encoding issues: see malformed/018.eml + let is_inline = is_inline + || state.parts == 1 + && state.mime_type == MimeType::Message + && mime_type == MimeType::TextPlain + && is_encoding_problem; + let (add_to_html, add_to_text) = if let MimeType::MultipartAlternative = state.mime_type { match mime_type {