[project @ 2005-07-30 03:22:36 by don]

author don <>

Sat, 30 Jul 2005 10:22:36 +0000 (02:22 -0800)

committer don <>

Sat, 30 Jul 2005 10:22:36 +0000 (02:22 -0800)
author don <>
Sat, 30 Jul 2005 10:22:36 +0000 (02:22 -0800)
committer don <>
Sat, 30 Jul 2005 10:22:36 +0000 (02:22 -0800)
diff --git a/Debbugs/MIME.pm b/Debbugs/MIME.pm

index 1aae3cd53ba47895b6adeb9cc898234c81099a95..8b2deed386f827a28f236e4e4e646e51e56e9721 100644 (file)
--- a/Debbugs/MIME.pm
+++ b/Debbugs/MIME.pm
@@ -16,7 +16,7 @@ use MIME::Parser;
  
  # for decode_rfc1522
  use MIME::WordDecoder qw();
-use Unicode::MapUTF8 qw(to_utf8 utf8_supported_charset);
+use Encode qw(decode encode is_utf8);
  
  # for encode_rfc1522
  use MIME::Words qw();
@@ -107,14 +107,20 @@ sub parse ($)
  
  sub convert_to_utf8 {
       my ($data, $charset) = @_;
-     $charset =~ s/^(UTF)\-(\d+)/$1$2/i;
-     # XXX HACK UNTIL #320406 IS FIXED
-     return $data if $charset =~ /BIG5/i;
-     return $data unless utf8_supported_charset($charset);
-     return to_utf8({
-                    -string  => $data,
-                    -charset => $charset,
-                   });
+     # raw data just gets returned (that's the charset WordDecorder
+     # uses when it doesn't know what to do)
+     return $data if $charset eq 'raw' or is_utf8($data,1);
+     my $result;
+     eval {
+         # this encode/decode madness is to make sure that the data
+         # really is valid utf8 and that the is_utf8 flag is off.
+         $result = encode("utf8",decode($charset,$data))
+     };
+     if ($@) {
+         warn "Unable to decode charset; '$charset' and '$data': $@";
+         return $data;
+     }
+     return $result;
  }
  
  
@@ -145,9 +151,72 @@ sub decode_rfc1522 ($)
  
  sub encode_rfc1522 ($)
  {
-    my ($string) = @_;
+#    my ($string) = @_;
+#
+#    return MIME::Words::encode_mimewords($string, Charset => 'UTF-8');
+
+# This function was stolen brazenly from a patched version of
+# MIME::Words (fix for http://rt.cpan.org/NoAuth/Bug.html?id=13027)
+#
+# The patch has been modified slightly to only encode things that
+# should be encoded, and not eat up every single character.
+
+    my ($rawstr) = @_;
+    my $charset  = 'UTF-8';
+    my $encoding = 'q';
+
+    my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF"; 
+
+    my $result = "";
+    my $current = $rawstr;
+
+    while ($current ne "") {
+      if ($current =~ s/^(([^$NONPRINT]|\s)+)//) {
+       # safe chars (w/spaces) are handled as-is
+       $result .= $1;
+       next;
+      } elsif ($current =~ s/^(([$NONPRINT]|\s)+)//) {
+       # unsafe chars (w/spaces) are encoded
+       my $unsafe_chars = $1;
+      CHUNK75:
+       while ($unsafe_chars ne "") {
+
+         my $full_len = length($unsafe_chars);
+         my $len = 1;
+         my $prev_encoded = "";
+
+         while ($len <= $full_len) {
+           # we try to encode next beginning of unsafe string
+           my $possible = substr $unsafe_chars, 0, $len;
+           my $encoded = MIME::Words::encode_mimeword($possible, $encoding, $charset);
+
+           if (length($encoded) < 75) {
+             # if it could be encoded in specified maximum length, try
+             # bigger beginning...
+             $prev_encoded = $encoded;
+           } else {
+             #
+             # ...otherwise, add encoded chunk which still fits, and
+             # restart with rest of unsafe string
+             $result .= $prev_encoded;
+             $prev_encoded = "";
+             substr $unsafe_chars, 0, $len - 1, "";
+             next CHUNK75;
+           }
+
+           # if we have reached the end of the string, add final
+           # encoded chunk
+           if ($len == $full_len) {
+             $result .= $encoded;
+             last CHUNK75;
+           }
  
-    return MIME::Words::encode_mimewords($string, Charset => 'UTF-8');
+           $len++;
+         }
+       }
+      }
+    }
+    return $result;
  }
  
  1;
diff --git a/debian/changelog b/debian/changelog

index e6f037e55392ab3ed64aac34fe6dc01c07f9600d..5c9aff723a092ee544cfc18de8d9640743065705 100644 (file)
--- a/debian/changelog
+++ b/debian/changelog
@@ -65,7 +65,7 @@ debbugs (2.4.2) UNRELEASED; urgency=low
      - All RFC1522 subject lines are decoded, both in the html information
        and the message headers. All messages are converted to UTF-8 whereever
        possible; all bugreport.cgi pages are now completely in UTF-8 to the
-      degree possible. (closes: #46848, #238984)
+      degree possible using Encode.pm (closes: #46848,#238984)
      - Add a convert_to_utf8 function to Debbugs::Mime to make the above
        possible; abstracts functionality that was already present in the
        decode_rfc1522 fucntionality.
@@ -76,6 +76,8 @@ debbugs (2.4.2) UNRELEASED; urgency=low
      - Fix postfix instructions in README.mail (thanks to Jeff Teunissen)
        (closes: #134166)
      - Display old severity when changing severity (closes: #196947)
+    - All messages that originate from the BTS and either go to .log files
+      or out to users are now properly RFC1522 encoded. (closes: #306068)
  
   -- Colin Watson <cjwatson@debian.org>  Fri, 20 Jun 2003 18:57:25 +0100
  
diff --git a/debian/control b/debian/control

index c66c4683aa72660fb116400dcec09f0e082f77e8..c681a8e661239481c49c45c71be7b70248dc1fb5 100644 (file)
--- a/debian/control
+++ b/debian/control
@@ -8,7 +8,7 @@ Build-Depends-Indep: debhelper
  
  Package: debbugs
  Architecture: all
-Depends: perl5 | perl, exim4 | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl, libunicode-maputf8-perl, liburi-perl
+Depends: perl5 | perl, exim4 | mail-transport-agent, libmailtools-perl, ed, libmime-perl, libio-stringy-perl, libmldbm-perl, liburi-perl
  Recommends: httpd, links | lynx
  Suggests: spamassassin (>= 3.0)
  Description: The bug tracking system based on the active Debian BTS
diff --git a/scripts/process.in b/scripts/process.in

index cdc1d1098619fea03c89fe5c20e9722d50937bc2..fe311f5527160fc894b88993a5f6a6ed898c6b8f 100755 (executable)
--- a/scripts/process.in
+++ b/scripts/process.in
@@ -1,5 +1,5 @@
  #!/usr/bin/perl
-# $Id: process.in,v 1.99 2005/07/29 04:34:11 don Exp $
+# $Id: process.in,v 1.100 2005/07/30 03:22:36 don Exp $
  #
  # Usage: process nn
  # Temps:  incoming/Pnn
@@ -101,6 +101,7 @@ if ($entity and $entity->head->tags) {
  }
  
  for my $hdr (@headerlines) {
+    $hdr = decode_rfc1522($hdr);
      $_ = $hdr;
      s/\n\s/ /g;
      &finish if m/^x-loop: (\S+)$/i && $1 eq "$gMaintainerEmail";
@@ -111,7 +112,7 @@ for my $hdr (@headerlines) {
      if (s/^(\S+):\s*//) {
         my $v = lc $1;
         print DEBUG ">$v=$_<\n";
-       $header{$v} = decode_rfc1522($_);
+       $header{$v} = $_;
      } else {
         print DEBUG "!>$_<\n";
      }
@@ -1019,6 +1020,12 @@ sub sendmessage {
          $recips = ['-t'];
      }
      $msg = "X-Loop: $gMaintainerEmail\n" . $msg;
+    # The original message received is written out in appendlog, so
+    # before writing out the other messages we've sent out, we need to
+    # RFC1522 encode the header.
+    my ($header,$body) = split /\n\n/, $msg, 2;
+    $header = encode_rfc1522($header);
+    $msg = $header . qq(\n\n). $body;
  
      my $hash = get_hashname($ref);
      #save email to the log
diff --git a/scripts/service.in b/scripts/service.in

index fdcaa68b0e00800552f1ca996987805036acea15..693caec7477f605b297b4604037b9bc6efca6c7d 100755 (executable)
--- a/scripts/service.in
+++ b/scripts/service.in
@@ -1,12 +1,12 @@
  #!/usr/bin/perl
-# $Id: service.in,v 1.110 2005/07/29 20:32:30 cjwatson Exp $
+# $Id: service.in,v 1.111 2005/07/30 03:22:36 don Exp $
  #
  # Usage: service <code>.nn
  # Temps:  incoming/P<code>.nn
  
  use File::Copy;
  use MIME::Parser;
-use Debbugs::MIME qw(decode_rfc1522);
+use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522);
  
  $config_path = '/etc/debbugs';
  $lib_path = '/usr/lib/debbugs';
@@ -75,12 +75,13 @@ if ($entity and $entity->head->tags) {
  }
  
  for (@headerlines) {
+    $_ = decode_rfc1522($_);
      s/\n\s/ /g;
      print ">$_<\n" if $debug;
      if (s/^(\S+):\s*//) {
         my $v = lc $1;
         print ">$v=$_<\n" if $debug;
-       $header{$v} = decode_rfc1522($_);
+       $header{$v} = $_;
      } else {
         print "!>$_<\n" if $debug;
      }
@@ -864,6 +865,13 @@ unlink("incoming/P$nn") || &quit("unlinking incoming/P$nn: $!");
  sub sendmailmessage {
      local ($message,@recips) = @_;
      $message = "X-Loop: $gMaintainerEmail\n" . $message;
+    # The original message received is written out above, so before
+    # writing out the other messages we've sent out, we need to
+    # RFC1522 encode the header.
+    my ($header,$body) = split /\n\n/, $message, 2;
+    $header = encode_rfc1522($header);
+    $message = $header . qq(\n\n). $body;
+
      print "mailing to >@recips<\n" if $debug;
      $c= open(D,"|-");
      defined($c) || &quit("mailing forking for sendmail: $!");
author	don <>
	Sat, 30 Jul 2005 10:22:36 +0000 (02:22 -0800)
committer	don <>
	Sat, 30 Jul 2005 10:22:36 +0000 (02:22 -0800)
Debbugs/MIME.pm		patch \| blob \| history
debian/changelog		patch \| blob \| history
debian/control		patch \| blob \| history
scripts/process.in		patch \| blob \| history
scripts/service.in		patch \| blob \| history