From: don <> Date: Sat, 30 Jul 2005 10:22:36 +0000 (-0800) Subject: [project @ 2005-07-30 03:22:36 by don] X-Git-Tag: release/2.6.0~670 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=543cf5ecf8bd735b5e7ca26199b11fd5de47fd4a;p=debbugs.git [project @ 2005-07-30 03:22:36 by don] The "FUNNY FERRIGN THINGIES DON'T WORK" Memorial Commit * All messages that originate from the BTS and either go to .log files or out to users are now properly RFC1522 encoded. (closes: #306068) * Use Encode.pm to convert from other charsets to UTF8; this introduces an (as yet unexpressed) dependency on a newish version of perl. [And decodes more charsets to boot.] * Stop depending on libunicode-maputf8-perl because of the above. * Really make encode_rfc1522() do what it is supposed to do without stripping out spaces and doing other funny things; A patch for MIME::Words was ripped out and modified to make this work: http://rt.cpan.org/NoAuth/Bug.html?id=13027 * Decode every single header, not just some of them --- diff --git a/Debbugs/MIME.pm b/Debbugs/MIME.pm index 1aae3cd..8b2deed 100644 --- a/Debbugs/MIME.pm +++ b/Debbugs/MIME.pm @@ -16,7 +16,7 @@ use MIME::Parser; # for decode_rfc1522 use MIME::WordDecoder qw(); -use Unicode::MapUTF8 qw(to_utf8 utf8_supported_charset); +use Encode qw(decode encode is_utf8); # for encode_rfc1522 use MIME::Words qw(); @@ -107,14 +107,20 @@ sub parse ($) sub convert_to_utf8 { my ($data, $charset) = @_; - $charset =~ s/^(UTF)\-(\d+)/$1$2/i; - # XXX HACK UNTIL #320406 IS FIXED - return $data if $charset =~ /BIG5/i; - return $data unless utf8_supported_charset($charset); - return to_utf8({ - -string => $data, - -charset => $charset, - }); + # raw data just gets returned (that's the charset WordDecorder + # uses when it doesn't know what to do) + return $data if $charset eq 'raw' or is_utf8($data,1); + my $result; + eval { + # this encode/decode madness is to make sure that the data + # really is valid utf8 and that the is_utf8 flag is off. + $result = encode("utf8",decode($charset,$data)) + }; + if ($@) { + warn "Unable to decode charset; '$charset' and '$data': $@"; + return $data; + } + return $result; } @@ -145,9 +151,72 @@ sub decode_rfc1522 ($) sub encode_rfc1522 ($) { - my ($string) = @_; +# my ($string) = @_; +# +# return MIME::Words::encode_mimewords($string, Charset => 'UTF-8'); + +# This function was stolen brazenly from a patched version of +# MIME::Words (fix for http://rt.cpan.org/NoAuth/Bug.html?id=13027) +# +# The patch has been modified slightly to only encode things that +# should be encoded, and not eat up every single character. + + my ($rawstr) = @_; + my $charset = 'UTF-8'; + my $encoding = 'q'; + + my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF"; + + my $result = ""; + my $current = $rawstr; + + while ($current ne "") { + if ($current =~ s/^(([^$NONPRINT]|\s)+)//) { + # safe chars (w/spaces) are handled as-is + $result .= $1; + next; + } elsif ($current =~ s/^(([$NONPRINT]|\s)+)//) { + # unsafe chars (w/spaces) are encoded + my $unsafe_chars = $1; + CHUNK75: + while ($unsafe_chars ne "") { + + my $full_len = length($unsafe_chars); + my $len = 1; + my $prev_encoded = ""; + + while ($len <= $full_len) { + # we try to encode next beginning of unsafe string + my $possible = substr $unsafe_chars, 0, $len; + my $encoded = MIME::Words::encode_mimeword($possible, $encoding, $charset); + + if (length($encoded) < 75) { + # if it could be encoded in specified maximum length, try + # bigger beginning... + $prev_encoded = $encoded; + } else { + # + # ...otherwise, add encoded chunk which still fits, and + # restart with rest of unsafe string + $result .= $prev_encoded; + $prev_encoded = ""; + substr $unsafe_chars, 0, $len - 1, ""; + next CHUNK75; + } + + # if we have reached the end of the string, add final + # encoded chunk + if ($len == $full_len) { + $result .= $encoded; + last CHUNK75; + } - return MIME::Words::encode_mimewords($string, Charset => 'UTF-8'); + $len++; + } + } + } + } + return $result; } 1; diff --git a/debian/changelog b/debian/changelog index e6f037e..5c9aff7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -65,7 +65,7 @@ debbugs (2.4.2) UNRELEASED; urgency=low - All RFC1522 subject lines are decoded, both in the html information and the message headers. All messages are converted to UTF-8 whereever possible; all bugreport.cgi pages are now completely in UTF-8 to the - degree possible. (closes: #46848, #238984) + degree possible using Encode.pm (closes: #46848,#238984) - Add a convert_to_utf8 function to Debbugs::Mime to make the above possible; abstracts functionality that was already present in the decode_rfc1522 fucntionality. @@ -76,6 +76,8 @@ debbugs (2.4.2) UNRELEASED; urgency=low - Fix postfix instructions in README.mail (thanks to Jeff Teunissen) (closes: #134166) - Display old severity when changing severity (closes: #196947) + - All messages that originate from the BTS and either go to .log files + or out to users are now properly RFC1522 encoded. (closes: #306068) -- Colin Watson Fri, 20 Jun 2003 18:57:25 +0100 diff --git a/debian/control b/debian/control index c66c468..c681a8e 100644 --- a/debian/control +++ b/debian/control @@ -8,7 +8,7 @@ Build-Depends-Indep: debhelper Package: debbugs Architecture: all -Depends: perl5 | perl, exim4 | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl, libunicode-maputf8-perl, liburi-perl +Depends: perl5 | perl, exim4 | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl, liburi-perl Recommends: httpd, links | lynx Suggests: spamassassin (>= 3.0) Description: The bug tracking system based on the active Debian BTS diff --git a/scripts/process.in b/scripts/process.in index cdc1d10..fe311f5 100755 --- a/scripts/process.in +++ b/scripts/process.in @@ -1,5 +1,5 @@ #!/usr/bin/perl -# $Id: process.in,v 1.99 2005/07/29 04:34:11 don Exp $ +# $Id: process.in,v 1.100 2005/07/30 03:22:36 don Exp $ # # Usage: process nn # Temps: incoming/Pnn @@ -101,6 +101,7 @@ if ($entity and $entity->head->tags) { } for my $hdr (@headerlines) { + $hdr = decode_rfc1522($hdr); $_ = $hdr; s/\n\s/ /g; &finish if m/^x-loop: (\S+)$/i && $1 eq "$gMaintainerEmail"; @@ -111,7 +112,7 @@ for my $hdr (@headerlines) { if (s/^(\S+):\s*//) { my $v = lc $1; print DEBUG ">$v=$_<\n"; - $header{$v} = decode_rfc1522($_); + $header{$v} = $_; } else { print DEBUG "!>$_<\n"; } @@ -1019,6 +1020,12 @@ sub sendmessage { $recips = ['-t']; } $msg = "X-Loop: $gMaintainerEmail\n" . $msg; + # The original message received is written out in appendlog, so + # before writing out the other messages we've sent out, we need to + # RFC1522 encode the header. + my ($header,$body) = split /\n\n/, $msg, 2; + $header = encode_rfc1522($header); + $msg = $header . qq(\n\n). $body; my $hash = get_hashname($ref); #save email to the log diff --git a/scripts/service.in b/scripts/service.in index fdcaa68..693caec 100755 --- a/scripts/service.in +++ b/scripts/service.in @@ -1,12 +1,12 @@ #!/usr/bin/perl -# $Id: service.in,v 1.110 2005/07/29 20:32:30 cjwatson Exp $ +# $Id: service.in,v 1.111 2005/07/30 03:22:36 don Exp $ # # Usage: service .nn # Temps: incoming/P.nn use File::Copy; use MIME::Parser; -use Debbugs::MIME qw(decode_rfc1522); +use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522); $config_path = '/etc/debbugs'; $lib_path = '/usr/lib/debbugs'; @@ -75,12 +75,13 @@ if ($entity and $entity->head->tags) { } for (@headerlines) { + $_ = decode_rfc1522($_); s/\n\s/ /g; print ">$_<\n" if $debug; if (s/^(\S+):\s*//) { my $v = lc $1; print ">$v=$_<\n" if $debug; - $header{$v} = decode_rfc1522($_); + $header{$v} = $_; } else { print "!>$_<\n" if $debug; } @@ -864,6 +865,13 @@ unlink("incoming/P$nn") || &quit("unlinking incoming/P$nn: $!"); sub sendmailmessage { local ($message,@recips) = @_; $message = "X-Loop: $gMaintainerEmail\n" . $message; + # The original message received is written out above, so before + # writing out the other messages we've sent out, we need to + # RFC1522 encode the header. + my ($header,$body) = split /\n\n/, $message, 2; + $header = encode_rfc1522($header); + $message = $header . qq(\n\n). $body; + print "mailing to >@recips<\n" if $debug; $c= open(D,"|-"); defined($c) || &quit("mailing forking for sendmail: $!");