# for decode_rfc1522
use MIME::WordDecoder qw();
-use Unicode::MapUTF8 qw(to_utf8 utf8_supported_charset);
+use Encode qw(decode encode is_utf8);
# for encode_rfc1522
use MIME::Words qw();
sub convert_to_utf8 {
my ($data, $charset) = @_;
- $charset =~ s/^(UTF)\-(\d+)/$1$2/i;
- # XXX HACK UNTIL #320406 IS FIXED
- return $data if $charset =~ /BIG5/i;
- return $data unless utf8_supported_charset($charset);
- return to_utf8({
- -string => $data,
- -charset => $charset,
- });
+ # raw data just gets returned (that's the charset WordDecorder
+ # uses when it doesn't know what to do)
+ return $data if $charset eq 'raw' or is_utf8($data,1);
+ my $result;
+ eval {
+ # this encode/decode madness is to make sure that the data
+ # really is valid utf8 and that the is_utf8 flag is off.
+ $result = encode("utf8",decode($charset,$data))
+ };
+ if ($@) {
+ warn "Unable to decode charset; '$charset' and '$data': $@";
+ return $data;
+ }
+ return $result;
}
sub encode_rfc1522 ($)
{
- my ($string) = @_;
+# my ($string) = @_;
+#
+# return MIME::Words::encode_mimewords($string, Charset => 'UTF-8');
+
+# This function was stolen brazenly from a patched version of
+# MIME::Words (fix for http://rt.cpan.org/NoAuth/Bug.html?id=13027)
+#
+# The patch has been modified slightly to only encode things that
+# should be encoded, and not eat up every single character.
+
+ my ($rawstr) = @_;
+ my $charset = 'UTF-8';
+ my $encoding = 'q';
+
+ my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF";
+
+ my $result = "";
+ my $current = $rawstr;
+
+ while ($current ne "") {
+ if ($current =~ s/^(([^$NONPRINT]|\s)+)//) {
+ # safe chars (w/spaces) are handled as-is
+ $result .= $1;
+ next;
+ } elsif ($current =~ s/^(([$NONPRINT]|\s)+)//) {
+ # unsafe chars (w/spaces) are encoded
+ my $unsafe_chars = $1;
+ CHUNK75:
+ while ($unsafe_chars ne "") {
+
+ my $full_len = length($unsafe_chars);
+ my $len = 1;
+ my $prev_encoded = "";
+
+ while ($len <= $full_len) {
+ # we try to encode next beginning of unsafe string
+ my $possible = substr $unsafe_chars, 0, $len;
+ my $encoded = MIME::Words::encode_mimeword($possible, $encoding, $charset);
+
+ if (length($encoded) < 75) {
+ # if it could be encoded in specified maximum length, try
+ # bigger beginning...
+ $prev_encoded = $encoded;
+ } else {
+ #
+ # ...otherwise, add encoded chunk which still fits, and
+ # restart with rest of unsafe string
+ $result .= $prev_encoded;
+ $prev_encoded = "";
+ substr $unsafe_chars, 0, $len - 1, "";
+ next CHUNK75;
+ }
+
+ # if we have reached the end of the string, add final
+ # encoded chunk
+ if ($len == $full_len) {
+ $result .= $encoded;
+ last CHUNK75;
+ }
- return MIME::Words::encode_mimewords($string, Charset => 'UTF-8');
+ $len++;
+ }
+ }
+ }
+ }
+ return $result;
}
1;
- All RFC1522 subject lines are decoded, both in the html information
and the message headers. All messages are converted to UTF-8 whereever
possible; all bugreport.cgi pages are now completely in UTF-8 to the
- degree possible. (closes: #46848, #238984)
+ degree possible using Encode.pm (closes: #46848,#238984)
- Add a convert_to_utf8 function to Debbugs::Mime to make the above
possible; abstracts functionality that was already present in the
decode_rfc1522 fucntionality.
- Fix postfix instructions in README.mail (thanks to Jeff Teunissen)
(closes: #134166)
- Display old severity when changing severity (closes: #196947)
+ - All messages that originate from the BTS and either go to .log files
+ or out to users are now properly RFC1522 encoded. (closes: #306068)
-- Colin Watson <cjwatson@debian.org> Fri, 20 Jun 2003 18:57:25 +0100
Package: debbugs
Architecture: all
-Depends: perl5 | perl, exim4 | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl, libunicode-maputf8-perl, liburi-perl
+Depends: perl5 | perl, exim4 | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl, liburi-perl
Recommends: httpd, links | lynx
Suggests: spamassassin (>= 3.0)
Description: The bug tracking system based on the active Debian BTS
#!/usr/bin/perl
-# $Id: process.in,v 1.99 2005/07/29 04:34:11 don Exp $
+# $Id: process.in,v 1.100 2005/07/30 03:22:36 don Exp $
#
# Usage: process nn
# Temps: incoming/Pnn
}
for my $hdr (@headerlines) {
+ $hdr = decode_rfc1522($hdr);
$_ = $hdr;
s/\n\s/ /g;
&finish if m/^x-loop: (\S+)$/i && $1 eq "$gMaintainerEmail";
if (s/^(\S+):\s*//) {
my $v = lc $1;
print DEBUG ">$v=$_<\n";
- $header{$v} = decode_rfc1522($_);
+ $header{$v} = $_;
} else {
print DEBUG "!>$_<\n";
}
$recips = ['-t'];
}
$msg = "X-Loop: $gMaintainerEmail\n" . $msg;
+ # The original message received is written out in appendlog, so
+ # before writing out the other messages we've sent out, we need to
+ # RFC1522 encode the header.
+ my ($header,$body) = split /\n\n/, $msg, 2;
+ $header = encode_rfc1522($header);
+ $msg = $header . qq(\n\n). $body;
my $hash = get_hashname($ref);
#save email to the log
#!/usr/bin/perl
-# $Id: service.in,v 1.110 2005/07/29 20:32:30 cjwatson Exp $
+# $Id: service.in,v 1.111 2005/07/30 03:22:36 don Exp $
#
# Usage: service <code>.nn
# Temps: incoming/P<code>.nn
use File::Copy;
use MIME::Parser;
-use Debbugs::MIME qw(decode_rfc1522);
+use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522);
$config_path = '/etc/debbugs';
$lib_path = '/usr/lib/debbugs';
}
for (@headerlines) {
+ $_ = decode_rfc1522($_);
s/\n\s/ /g;
print ">$_<\n" if $debug;
if (s/^(\S+):\s*//) {
my $v = lc $1;
print ">$v=$_<\n" if $debug;
- $header{$v} = decode_rfc1522($_);
+ $header{$v} = $_;
} else {
print "!>$_<\n" if $debug;
}
sub sendmailmessage {
local ($message,@recips) = @_;
$message = "X-Loop: $gMaintainerEmail\n" . $message;
+ # The original message received is written out above, so before
+ # writing out the other messages we've sent out, we need to
+ # RFC1522 encode the header.
+ my ($header,$body) = split /\n\n/, $message, 2;
+ $header = encode_rfc1522($header);
+ $message = $header . qq(\n\n). $body;
+
print "mailing to >@recips<\n" if $debug;
$c= open(D,"|-");
defined($c) || &quit("mailing forking for sendmail: $!");