From 4368c8e0647804a33cd029f35158914a5736b183 Mon Sep 17 00:00:00 2001 From: Michael Stapelberg Date: Fri, 13 Apr 2012 13:25:29 +0200 Subject: [PATCH] Support 'charset = "UTF-8"' (with spaces) This does not conform to the RFC, but PHPMailer in older versions does that. Therefore, we should support it :-/. --- lib/heliotrope/message.rb | 2 +- test/broken_charset.msg | 57 +++++++++++++++++++++++++++++++++++++++ test/test_heliotrope.rb | 12 +++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 test/broken_charset.msg diff --git a/lib/heliotrope/message.rb b/lib/heliotrope/message.rb index 0c928cd..c49af66 100644 --- a/lib/heliotrope/message.rb +++ b/lib/heliotrope/message.rb @@ -257,7 +257,7 @@ def mime_content_for mime_part, preferred_type mt = mime_type_for(mime_part) || "text/plain" # i guess content_type = if mt =~ /^(.+);/ then $1.downcase else mt end - source_charset = if mt =~ /charset="?(.*?)"?(;|$)/i then $1 else "US-ASCII" end + source_charset = if mt =~ /charset\s*=\s*"?(.*?)"?(;|$)/i then $1 else "US-ASCII" end content = mime_part.decode converted_content, converted_charset = if(converter = CONVERSIONS[[content_type, preferred_type]]) diff --git a/test/broken_charset.msg b/test/broken_charset.msg new file mode 100644 index 0000000..b549bd8 --- /dev/null +++ b/test/broken_charset.msg @@ -0,0 +1,57 @@ +Return-Path: +Date: Fri, 13 Apr 2012 12:00:02 +0200 +Return-Path: moodle@bar.invalid +To: "Michael Stapelberg" +From: "=?utf-8?B?THV0eiBTdHLDvG5nbWFubg==?=" +Subject: =?utf-8?B?QUZTMjAxMjogQXVmZ2FiZW46IFB1bmt0ZSDDnGJ1bmcgMQ==?= +Message-ID: <0ba81486243611670b534d4564ee3718@localhost.localdomain> +X-Priority: 3 +X-Mailer: PHPMailer [version Moodle 2007101590] +MIME-Version: 1.0 +Content-Type: multipart/alternative; + boundary="b1_0ba81486243611670b534d4564ee3718" +X-DSPAM-Result: Innocent +X-DSPAM-Processed: Fri Apr 13 12:08:16 2012 +X-DSPAM-Confidence: 0.9899 +X-DSPAM-Probability: 0.0000 +X-DSPAM-Signature: 4f87fb10319744089468961 + + +--b1_0ba81486243611670b534d4564ee3718 +Content-Type: text/plain; charset = "UTF-8" +Content-Transfer-Encoding: quoted-printable + + +AFS2012 -> Aufgaben -> Punkte =C3=9Cbung 1 +--------------------------------------------------------------------- +Lutz Str=C3=BCngmann hat einen Kommentar zu Ihrer eingereichten Aufgabe = +'Punkte +=C3=9Cbung 1' verfasst. + +Sie finden ihn im Anhang Ihrer abgegebenen Aufgabe: + +http://moodle.hs-mannheim.de/mod/assignment/view.php?id=3D11205 +--------------------------------------------------------------------- + + + +--b1_0ba81486243611670b534d4564ee3718 +Content-Type: text/html; charset = "UTF-8" +Content-Transfer-Encoding: quoted-printable + +

AFS2012 = +->Aufgaben ->Punkte =C3=9Cbung 1


Lutz Str=C3=BCngmann hat einen Kommentar zu Ihrer = +abgegebenen Aufgabe verfasst.'Punkte =C3=9Cbung 1'

+Sie finden ihn im Anhang Ihrer=20 +eingereichten Aufgabe:.


+ + + +--b1_0ba81486243611670b534d4564ee3718-- + + diff --git a/test/test_heliotrope.rb b/test/test_heliotrope.rb index e396b91..c480d29 100644 --- a/test/test_heliotrope.rb +++ b/test/test_heliotrope.rb @@ -1,3 +1,5 @@ +# encoding: UTF-8 + require 'test/unit' require 'fileutils' require 'digest/md5' @@ -394,6 +396,16 @@ def test_labellist_pruning_removes_labels_without_corresponding_threads assert_does_not_include "potato", @metaindex.all_labels end + def test_recognize_charset + msg = nil + File.open('test/broken_charset.msg', 'r') do |f| + msg = Heliotrope::Message.new(f).parse! + end + assert_not_nil msg + mime_parts = msg.mime_parts "text/plain" + assert_equal mime_parts, [["text/plain; charset = \"utf-8\"", nil, nil, "\nAFS2012 -> Aufgaben -> Punkte Übung 1\n---------------------------------------------------------------------\nLutz Strüngmann hat einen Kommentar zu Ihrer eingereichten Aufgabe 'Punkte\nÜbung 1' verfasst.\n\nSie finden ihn im Anhang Ihrer abgegebenen Aufgabe:\n\nhttp://moodle.hs-mannheim.de/mod/assignment/view.php?id=11205\n---------------------------------------------------------------------\n\n\n"]] + end + private def assert_includes v, set # standard one seems to have these things reversed