merge libravatar support into master

author Don Armstrong <don@donarmstrong.com>

Thu, 14 Mar 2013 20:13:56 +0000 (13:13 -0700)

committer Don Armstrong <don@donarmstrong.com>

Thu, 14 Mar 2013 20:13:56 +0000 (13:13 -0700)
author Don Armstrong <don@donarmstrong.com>
Thu, 14 Mar 2013 20:13:56 +0000 (13:13 -0700)
committer Don Armstrong <don@donarmstrong.com>
Thu, 14 Mar 2013 20:13:56 +0000 (13:13 -0700)
diff --git a/Debbugs/CGI/Bugreport.pm b/Debbugs/CGI/Bugreport.pm

index 59d88b07f5dedbdcb112574881a2085f084405be..29602c58c39f1088bc17208e49ab28c1561ac152 100644 (file)
--- a/Debbugs/CGI/Bugreport.pm
+++ b/Debbugs/CGI/Bugreport.pm
@@ -34,9 +34,10 @@ use IO::Scalar;
  use Params::Validate qw(validate_with :types);
  use Digest::MD5 qw(md5_hex);
  use Debbugs::Mail qw(get_addresses);
-use Debbugs::MIME qw(convert_to_utf8 decode_rfc1522 create_mime_message);
+use Debbugs::MIME qw(decode_rfc1522 create_mime_message);
  use Debbugs::CGI qw(:url :html :util);
  use Debbugs::Common qw(globify_scalar english_join);
+use Debbugs::UTF8;
  use Debbugs::Config qw(:config);
  use POSIX qw(strftime);
  use Encode qw(decode_utf8);
@@ -342,7 +343,7 @@ sub handle_record{
       local $_ = $record->{type};
       if (/html/) {
          # $record->{text} is not in perl's internal encoding; convert it
-        my $text = decode_utf8(decode_rfc1522($record->{text}));
+        my $text = decode_rfc1522(decode_utf8($record->{text}));
           my ($time) = $text =~ /<!--\s+time:(\d+)\s+-->/;
           my $class = $text =~ /^<strong>(?:Acknowledgement|Reply|Information|Report|Notification)/m ? 'infmessage':'msgreceived';
           $output .= $text;
diff --git a/Debbugs/Common.pm b/Debbugs/Common.pm

index f3c8218ea66cf1cc8e63e6ac77e22fd31d5a2f47..1331ac003cb8108a4b3f46b8564b334162f37e84 100644 (file)
--- a/Debbugs/Common.pm
+++ b/Debbugs/Common.pm
@@ -50,7 +50,6 @@ BEGIN{
                                 qw(cleanup_eval_fail),
                                 qw(hash_slice),
                                ],
-                    utf8   => [qw(encode_utf8_structure)],
                      date   => [qw(secs_to_english)],
                      quit   => [qw(quit)],
                      lock   => [qw(filelock unfilelock lockpid)],
@@ -71,7 +70,6 @@ use IO::Scalar;
  use Debbugs::MIME qw(decode_rfc1522);
  use Mail::Address;
  use Cwd qw(cwd);
-use Encode qw(encode_utf8 is_utf8);
  use Storable qw(dclone);
  
  use Params::Validate qw(validate_with :types);
@@ -827,7 +825,7 @@ sub globify_scalar {
           if (defined ref($scalar)) {
                if (ref($scalar) eq 'SCALAR' and
                    not UNIVERSAL::isa($scalar,'GLOB')) {
-                   open $handle, '>:scalar:utf8', $scalar;
+                   open $handle, '>:scalar:encoding(UTF-8)', $scalar;
                     return $handle;
                }
                else {
@@ -841,7 +839,7 @@ sub globify_scalar {
                carp "Given a non-scalar reference, non-glob to globify_scalar; returning /dev/null handle";
           }
       }
-     return IO::File->new('/dev/null','>:utf8');
+     return IO::File->new('/dev/null','>:encoding(UTF-8)');
  }
  
  =head2 cleanup_eval_fail()
@@ -900,55 +898,6 @@ sub hash_slice(\%@) {
  }
  
  
-=head1 UTF-8
-
-These functions are exported with the :utf8 tag
-
-=head2 encode_utf8_structure
-
-     %newdata = encode_utf8_structure(%newdata);
-
-Takes a complex data structure and encodes any strings with is_utf8
-set into their constituent octets.
-
-=cut
-
-our $depth = 0;
-sub encode_utf8_structure {
-    ++$depth;
-    my @ret;
-    for my $_ (@_) {
-       if (ref($_) eq 'HASH') {
-           push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
-       }
-       elsif (ref($_) eq 'ARRAY') {
-           push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
-       }
-       elsif (ref($_)) {
-           # we don't know how to handle non hash or non arrays
-           push @ret,$_;
-       }
-       else {
-           push @ret,__encode_utf8($_);
-       }
-    }
-    --$depth;
-    return @ret;
-}
-
-sub __encode_utf8 {
-    my @ret;
-    for my $r (@_) {
-       if (not ref($r) and is_utf8($r)) {
-           $r = encode_utf8($r);
-       }
-       push @ret,$r;
-    }
-    return @ret;
-}
-
-
-
  1;
  
  __END__
diff --git a/Debbugs/Control.pm b/Debbugs/Control.pm

index 5739734b06f24f623ff6ddfc37391451c226fcdf..4b3a02aa37daea67e05c609e2a44f35fd25a4747 100644 (file)
--- a/Debbugs/Control.pm
+++ b/Debbugs/Control.pm
@@ -110,7 +110,8 @@ BEGIN{
  }
  
  use Debbugs::Config qw(:config);
-use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions :utf8);
+use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions);
+use Debbugs::UTF8;
  use Debbugs::Status qw(bug_archiveable :read :hook writebug new_bug splitpackages split_status_fields get_bug_status);
  use Debbugs::CGI qw(html_escape);
  use Debbugs::Log qw(:misc :write);
@@ -125,7 +126,7 @@ use IO::File;
  
  use Debbugs::Text qw(:templates);
  
-use Debbugs::Mail qw(rfc822_date send_mail_message default_headers);
+use Debbugs::Mail qw(rfc822_date send_mail_message default_headers encode_headers);
  use Debbugs::MIME qw(create_mime_message);
  
  use Mail::RFC822::Address qw();
@@ -3437,25 +3438,25 @@ sub append_action_to_log{
       }
       my $msg = join('',
                     (exists $param{command} ?
-                    "<!-- command:".html_escape(encode_utf8($param{command}))." -->\n":""
+                    "<!-- command:".html_escape(encode_utf8_safely($param{command}))." -->\n":""
                     ),
                     (length $param{requester} ?
-                    "<!-- requester: ".html_escape(encode_utf8($param{requester}))." -->\n":""
+                    "<!-- requester: ".html_escape(encode_utf8_safely($param{requester}))." -->\n":""
                     ),
                     (length $param{request_addr} ?
-                    "<!-- request_addr: ".html_escape(encode_utf8($param{request_addr}))." -->\n":""
+                    "<!-- request_addr: ".html_escape(encode_utf8_safely($param{request_addr}))." -->\n":""
                     ),
                     "<!-- time:".time()." -->\n",
                     $data_diff,
-                   "<strong>".html_escape(encode_utf8($param{action}))."</strong>\n");
+                   "<strong>".html_escape(encode_utf8_safely($param{action}))."</strong>\n");
       if (length $param{requester}) {
-          $msg .= "Request was from <code>".html_escape(encode_utf8($param{requester}))."</code>\n";
+          $msg .= "Request was from <code>".html_escape(encode_utf8_safely($param{requester}))."</code>\n";
       }
       if (length $param{request_addr}) {
-          $msg .= "to <code>".html_escape(encode_utf8($param{request_addr}))."</code>";
+          $msg .= "to <code>".html_escape(encode_utf8_safely($param{request_addr}))."</code>";
       }
       if (length $param{desc}) {
-         $msg .= ":<br>\n".encode_utf8($param{desc})."\n";
+         $msg .= ":<br>\n".encode_utf8_safely($param{desc})."\n";
       }
       else {
           $msg .= ".\n";
@@ -3590,13 +3591,14 @@ sub __return_append_to_log_options{
       }
       if (not exists $param{message}) {
           my $date = rfc822_date();
-         $param{message} = fill_in_template(template  => 'mail/fake_control_message',
-                                            variables => {request_addr => $param{request_addr},
-                                                          requester    => $param{requester},
-                                                          date         => $date,
-                                                          action       => $action
-                                                         },
-                                           );
+         $param{message} =
+              encode_headers(fill_in_template(template  => 'mail/fake_control_message',
+                                              variables => {request_addr => $param{request_addr},
+                                                            requester    => $param{requester},
+                                                            date         => $date,
+                                                            action       => $action
+                                                           },
+                                             ));
       }
       if (not defined $action) {
           carp "Undefined action!";
diff --git a/Debbugs/Log.pm b/Debbugs/Log.pm

index 2ae7af7b919705b1e859f55cfc686a0741ab980c..8b99b7de7a097207ffbe6abff3848edeae63fe52 100644 (file)
--- a/Debbugs/Log.pm
+++ b/Debbugs/Log.pm
@@ -39,7 +39,7 @@ use Carp;
  
  use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
  use Params::Validate qw(:types validate_with);
-use Encode qw(encode is_utf8);
+use Encode qw(encode encode_utf8 is_utf8);
  
  =head1 NAME
  
@@ -390,8 +390,12 @@ sub write_log_records
         croak "record type '$type' with no text field" unless defined $record->{text};
         # I am not sure if we really want to croak here; but this is
         # almost certainly a bug if is_utf8 is on.
-       # croak "probably wrong encoding" if is_utf8($record->{text});
-       my ($text) = escape_log($record->{text});
+        my $text = $record->{text};
+        if (is_utf8($text)) {
+            carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
+            $text = encode_utf8($text)
+        }
+       ($text) = escape_log($text);
         if ($type eq 'autocheck') {
             print {$logfh} "\01\n$text\03\n" or
                 die "Unable to write to logfile: $!";
diff --git a/Debbugs/MIME.pm b/Debbugs/MIME.pm

index 481be7bf9573f758ff331292b8f5a61fe66b75a6..afc077649939784e4cedf95a81d7b75efb04ded8 100644 (file)
--- a/Debbugs/MIME.pm
+++ b/Debbugs/MIME.pm
@@ -41,7 +41,6 @@ BEGIN {
  
      %EXPORT_TAGS = (mime => [qw(parse create_mime_message getmailbody)],
                     rfc1522 => [qw(decode_rfc1522 encode_rfc1522)],
-                   utf8 => [qw(convert_to_utf8)],
                    );
      @EXPORT_OK=();
      Exporter::export_ok_tags(keys %EXPORT_TAGS);
@@ -55,11 +54,11 @@ use MIME::Parser;
  use POSIX qw(strftime);
  use List::MoreUtils qw(apply);
  
-# for decode_rfc1522
-use MIME::WordDecoder qw();
-use Encode qw(decode encode encode_utf8 decode_utf8 is_utf8);
+# for convert_to_utf8
+use Debbugs::UTF8 qw(convert_to_utf8);
  
-# for encode_rfc1522
+# for decode_rfc1522 and encode_rfc1522
+use Encode qw(decode encode encode_utf8 decode_utf8 is_utf8);
  use MIME::Words qw();
  
  sub getmailbody
@@ -69,7 +68,7 @@ sub getmailbody
      if ($type eq 'text/plain' or
             ($type =~ m#text/?# and $type ne 'text/html') or
             $type eq 'application/pgp') {
-       return $entity->bodyhandle;
+       return $entity;
      } elsif ($type eq 'multipart/alternative') {
         # RFC 2046 says we should use the last part we recognize.
         for my $part (reverse $entity->parts) {
@@ -101,14 +100,24 @@ sub parse
         @headerlines = @{$entity->head->header};
         chomp @headerlines;
  
-       my $entity_body = getmailbody($entity);
-       @bodylines = $entity_body ? $entity_body->as_lines() : ();
+        my $entity_body = getmailbody($entity);
+       my $entity_body_handle;
+        my $charset;
+        if (defined $entity_body) {
+            $entity_body_handle = $entity_body->bodyhandle();
+            $charset = $entity_body->head()->mime_attr('content-type.charset');
+        }
+       @bodylines = $entity_body_handle ? $entity_body_handle->as_lines() : ();
+        @bodylines = map {convert_to_utf8($_,$charset)} @bodylines;
         chomp @bodylines;
      } else {
         # Legacy pre-MIME code, kept around in case MIME::Parser fails.
         my @msg = split /\n/, $_[0];
         my $i;
  
+        # assume us-ascii unless charset is set; probably bad, but we
+        # really shouldn't get to this point anyway
+        my $charset = 'us-ascii';
         for ($i = 0; $i <= $#msg; ++$i) {
             $_ = $msg[$i];
             last unless length;
@@ -116,10 +125,12 @@ sub parse
                 ++$i;
                 $_ .= "\n" . $msg[$i];
             }
+            if (/charset=\"([^\"]+)\"/) {
+                $charset = $1;
+            }
             push @headerlines, $_;
         }
-
-       @bodylines = @msg[$i .. $#msg];
+       @bodylines = map {convert_to_utf8($_,$charset)} @msg[$i .. $#msg];
      }
  
      rmtree $tempdir, 0, 1;
@@ -193,8 +204,8 @@ sub create_mime_message{
       # MIME::Entity is stupid, and doesn't rfc1522 encode its headers, so we do it for it.
       my $msg = MIME::Entity->build('Content-Type' => 'text/plain; charset=utf-8',
                                    'Encoding'     => 'quoted-printable',
-                                  (map{encode_rfc1522($_)} @{$headers}),
-                                  Data    => $body
+                                  (map{encode_rfc1522(encode_utf8($_))} @{$headers}),
+                                  Data    => encode_utf8($body),
                                   );
  
       # Attach the attachments
@@ -229,23 +240,6 @@ sub create_mime_message{
  }
  
  
-# Bug #61342 et al.
-
-sub convert_to_utf8 {
-     my ($data, $charset) = @_;
-     # raw data just gets returned (that's the charset WordDecorder
-     # uses when it doesn't know what to do)
-     return $data if $charset eq 'raw' or is_utf8($data,1);
-     my $result;
-     eval {
-        $result = decode($charset,$data);
-     };
-     if ($@) {
-         warn "Unable to decode charset; '$charset' and '$data': $@";
-         return $data;
-     }
-     return $result;
-}
  
  
  =head2 decode_rfc1522
@@ -256,24 +250,27 @@ Turn RFC-1522 names into the UTF-8 equivalent.
  
  =cut
  
-BEGIN {
-    # Set up the default RFC1522 decoder, which turns all charsets that
-    # are supported into the appropriate UTF-8 charset.
-    MIME::WordDecoder->default(new MIME::WordDecoder(
-       ['*' => \&convert_to_utf8,
-       ]));
-}
-
  sub decode_rfc1522 {
      my ($string) = @_;
  
      # this is craptacular, but leading space is hacked off by unmime.
      # Save it.
      my $leading_space = '';
-    $leading_space = $1 if $string =~ s/^(\s+)//;
-    # unmime calls the default MIME::WordDecoder handler set up at
-    # initialization time.
-    return $leading_space . MIME::WordDecoder::unmime($string);
+    $leading_space = $1 if $string =~ s/^(\ +)//;
+    # we must do this to switch off the utf8 flag before calling decode_mimewords
+    $string = encode_utf8($string);
+    my @mime_words = MIME::Words::decode_mimewords($string);
+    my $tmp = $leading_space .
+        join('',
+             (map {
+                 if (@{$_} > 1) {
+                     convert_to_utf8(${$_}[0],${$_}[1]);
+                 } else {
+                     decode_utf8(${$_}[0]);
+                 }
+             } @mime_words)
+            );
+    return $tmp;
  }
  
  =head2 encode_rfc1522
@@ -293,9 +290,10 @@ sub encode_rfc1522 {
  
       # handle being passed undef properly
       return undef if not defined $rawstr;
-     if (is_utf8($rawstr)) {
-        $rawstr= encode_utf8($rawstr);
-     }
+
+     # convert to octets if we are given a string in perl's internal
+     # encoding
+     $rawstr= encode_utf8($rawstr) if is_utf8($rawstr);
       # We process words in reverse so we can preserve spacing between
       # encoded words. This regex splits on word|nonword boundaries and
       # nonword|nonword boundaries. We also consider parenthesis and "
diff --git a/Debbugs/Mail.pm b/Debbugs/Mail.pm

index d26c860a612f5866576e56944e5c2c529e7eda0c..9fa282b1b53689447621513908d3092d57ebc65b 100644 (file)
--- a/Debbugs/Mail.pm
+++ b/Debbugs/Mail.pm
@@ -49,6 +49,7 @@ use Debbugs::MIME qw(encode_rfc1522);
  use Debbugs::Config qw(:config);
  use Params::Validate qw(:types validate_with);
  use Encode qw(encode is_utf8);
+use Debbugs::UTF8 qw(encode_utf8_safely);
  
  use Debbugs::Packages;
  
@@ -398,7 +399,7 @@ sub encode_headers{
  
       my ($header,$body) = split /\n\n/, $message, 2;
       $header = encode_rfc1522($header);
-     return $header . qq(\n\n). $body;
+     return $header . qq(\n\n). encode_utf8_safely($body);
  }
  
  =head2 rfc822_date
diff --git a/Debbugs/SOAP.pm b/Debbugs/SOAP.pm

index 5cb08bb0f11eff7de6ab49dea880731a13dcb8b3..c1fc85f9b84fb5465bbab56ab916b4b445f1073b 100644 (file)
--- a/Debbugs/SOAP.pm
+++ b/Debbugs/SOAP.pm
@@ -41,7 +41,8 @@ BEGIN{
  
  use IO::File;
  use Debbugs::Status qw(get_bug_status);
-use Debbugs::Common qw(make_list getbuglocation getbugcomponent :utf8);
+use Debbugs::Common qw(make_list getbuglocation getbugcomponent);
+use Debbugs::UTF8;
  use Debbugs::Packages;
  
  use Storable qw(nstore retrieve dclone);
diff --git a/Debbugs/Status.pm b/Debbugs/Status.pm

index a2aeabef1879cfcaa578fa1c6cc8573d82e868dd..7d72c032ec9b68c8678bca33acd2eacfd7707748 100644 (file)
--- a/Debbugs/Status.pm
+++ b/Debbugs/Status.pm
@@ -37,7 +37,8 @@ use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
  use base qw(Exporter);
  
  use Params::Validate qw(validate_with :types);
-use Debbugs::Common qw(:util :lock :quit :misc :utf8);
+use Debbugs::Common qw(:util :lock :quit :misc);
+use Debbugs::UTF8;
  use Debbugs::Config qw(:config);
  use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522);
  use Debbugs::Packages qw(makesourceversions make_source_versions getversions get_versions binary_to_source);
diff --git a/Debbugs/UTF8.pm b/Debbugs/UTF8.pm

new file mode 100644 (file)

index 0000000..74a4042
--- /dev/null
+++ b/Debbugs/UTF8.pm
@@ -0,0 +1,215 @@
+# This module is part of debbugs, and is released
+# under the terms of the GPL version 2, or any later
+# version at your option.
+# See the file README and COPYING for more information.
+#
+# Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
+
+package Debbugs::UTF8;
+
+=head1 NAME
+
+Debbugs::UTF8 -- Routines for handling conversion of charsets to UTF8
+
+=head1 SYNOPSIS
+
+use Debbugs::UTF8;
+
+
+=head1 DESCRIPTION
+
+This module contains routines which convert from various different
+charsets to UTF8.
+
+=head1 FUNCTIONS
+
+=cut
+
+use warnings;
+use strict;
+use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
+use base qw(Exporter);
+
+BEGIN{
+     $VERSION = 1.00;
+     $DEBUG = 0 unless defined $DEBUG;
+
+     %EXPORT_TAGS = (utf8   => [qw(encode_utf8_structure encode_utf8_safely),
+                                qw(convert_to_utf8 decode_utf8_safely)],
+                    );
+     @EXPORT = (@{$EXPORT_TAGS{utf8}});
+     @EXPORT_OK = ();
+     Exporter::export_ok_tags(keys %EXPORT_TAGS);
+     $EXPORT_TAGS{all} = [@EXPORT_OK];
+}
+
+use Carp;
+$Carp::Verbose = 1;
+
+use Encode qw(encode_utf8 is_utf8 decode decode_utf8);
+use Text::Iconv;
+use Storable qw(dclone);
+
+
+=head1 UTF-8
+
+These functions are exported with the :utf8 tag
+
+=head2 encode_utf8_structure
+
+     %newdata = encode_utf8_structure(%newdata);
+
+Takes a complex data structure and encodes any strings with is_utf8
+set into their constituent octets.
+
+=cut
+
+our $depth = 0;
+sub encode_utf8_structure {
+    ++$depth;
+    my @ret;
+    for my $_ (@_) {
+       if (ref($_) eq 'HASH') {
+           push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
+       }
+       elsif (ref($_) eq 'ARRAY') {
+           push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
+       }
+       elsif (ref($_)) {
+           # we don't know how to handle non hash or non arrays
+           push @ret,$_;
+       }
+       else {
+           push @ret,encode_utf8_safely($_);
+       }
+    }
+    --$depth;
+    return @ret;
+}
+
+=head2 encode_utf8_safely
+
+     $octets = encode_utf8_safely($string);
+
+Given a $string, returns the octet equivalent of $string if $string is
+in perl's internal encoding; otherwise returns $string.
+
+Silently returns REFs without encoding them. [If you want to deeply
+encode REFs, see encode_utf8_structure.]
+
+=cut
+
+
+sub encode_utf8_safely{
+    my @ret;
+    for my $r (@_) {
+        if (not ref($r) and is_utf8($r)) {
+           $r = encode_utf8($r);
+       }
+       push @ret,$r;
+    }
+    return wantarray ? @ret : (@_ > 1 ? @ret : $ret[0]);
+}
+
+=head2 decode_utf8_safely
+
+     $string = decode_utf8_safely($octets);
+
+Given $octets in UTF8, returns the perl-internal equivalent of $octets
+if $octets does not have is_utf8 set; otherwise returns $octets.
+
+Silently returns REFs without encoding them.
+
+=cut
+
+
+sub decode_utf8_safely{
+    my @ret;
+    for my $r (@_) {
+        if (not ref($r) and not is_utf8($r)) {
+           $r = decode_utf8($r);
+       }
+       push @ret, $r;
+    }
+    return wantarray ? @ret : (@_ > 1 ? @ret : $ret[0]);
+}
+
+
+
+
+=head2 convert_to_utf8
+
+    $utf8 = convert_to_utf8("text","charset");
+
+=cut
+
+our %iconv_converters;
+
+sub convert_to_utf8 {
+    my ($data,$charset) = @_;
+    if (is_utf8($data)) {
+        cluck("utf8 flag is set when calling convert_to_utf8");
+        return $data;
+    }
+    $charset = uc($charset);
+    if ($charset eq 'RAW') {
+        croak("Charset must not be raw when calling convert_to_utf8");
+    }
+    if (not defined $iconv_converters{$charset}) {
+        eval {
+            $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
+                die "Unable to create converter for '$charset'";
+        };
+        if ($@) {
+            warn $@;
+            # We weren't able to create the converter, so use Encode
+            # instead
+            return __fallback_convert_to_utf8($data,$charset);
+        }
+    }
+    if (not defined $iconv_converters{$charset}) {
+        warn "The converter for $charset wasn't created properly somehow!";
+        return __fallback_convert_to_utf8($data,$charset);
+    }
+    my $converted_data = $iconv_converters{$charset}->convert($data);
+    # if the conversion failed, retval will be undefined or perhaps
+    # -1.
+    my $retval = $iconv_converters{$charset}->retval();
+    if (not defined $retval or
+        $retval < 0
+       ) {
+        warn "failed to convert to utf8";
+        # Fallback to encode, which will probably also fail.
+        return __fallback_convert_to_utf8($data,$charset);
+    }
+    return decode("UTF-8",$converted_data);
+}
+
+# this returns data in perl's internal encoding
+sub __fallback_convert_to_utf8 {
+     my ($data, $charset) = @_;
+     # raw data just gets returned (that's the charset WordDecorder
+     # uses when it doesn't know what to do)
+     return $data if $charset eq 'raw';
+     if (not defined $charset and not is_utf8($data)) {
+         warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
+         return $data;
+     }
+     # lets assume everything that doesn't have a charset is utf8
+     $charset //= 'utf8';
+     my $result;
+     eval {
+        $result = decode($charset,$data);
+     };
+     if ($@) {
+         warn "Unable to decode charset; '$charset' and '$data': $@";
+         return $data;
+     }
+     return $result;
+}
+
+
+
+1;
+
+__END__
diff --git a/cgi/bugreport.cgi b/cgi/bugreport.cgi

index 55617ae1f2a9abcd186bbf5007576a2e4572670e..70f42292644f437e1284a6ba0aee2eeb5aabb635 100755 (executable)
--- a/cgi/bugreport.cgi
+++ b/cgi/bugreport.cgi
@@ -8,8 +8,6 @@ BEGIN{
      delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  }
  
-# STDOUT should be using the utf8 io layer
-binmode(STDOUT,':utf8');
  
  use POSIX qw(strftime);
  use MIME::Parser;
@@ -38,6 +36,8 @@ use List::Util qw(max);
  
  use CGI::Simple;
  my $q = new CGI::Simple;
+# STDOUT should be using the utf8 io layer
+binmode(STDOUT,':raw:encoding(UTF-8)');
  
  my %param = cgi_parameters(query => $q,
                            single => [qw(bug msg att boring terse),
diff --git a/cgi/pkgreport.cgi b/cgi/pkgreport.cgi

index d29c3abc7ff56943a7a0a2196be16e5560bda857..eb7a61a2101600c9196bed3ca1f8eafaf8e73293 100755 (executable)
--- a/cgi/pkgreport.cgi
+++ b/cgi/pkgreport.cgi
@@ -18,9 +18,7 @@ BEGIN{
      delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
  }
  
-# STDOUT should be in utf8 mode
-binmode(STDOUT,':utf8');
-
+binmode(STDOUT,':encoding(UTF-8)');
  use POSIX qw(strftime nice);
  
  use Debbugs::Config qw(:globals :text :config);
diff --git a/scripts/process b/scripts/process

index 0524c38c75de3f2039b621d5f929eeae5b7d82e9..73eebb0c19bf81a441d9876d848dfd1c06be4495 100755 (executable)
--- a/scripts/process
+++ b/scripts/process
@@ -79,6 +79,7 @@ my $debugfh = IO::File->new('/dev/null','w') or
  if ($DEBUG > 0) {
      $debugfh = \*STDERR;
  }
+binmode($debugfh,':raw:encoding(UTF-8)');
  
  # these are the valid bug addresses
  my %baddress = (B => 'submit',
@@ -174,13 +175,14 @@ my %header;
  
  my @common_headers;
  for my $hdr (@headerlines) {
+    my $orig_hdr = $hdr;
      $hdr = decode_rfc1522($hdr);
      $_ = $hdr;
      s/\n\s/ /g;
      finish() if m/^x-loop: (\S+)$/i && $1 eq "$gMaintainerEmail";
      my $ins = !m/^subject:/i && !m/^reply-to:/i && !m/^return-path:/i
             && !m/^From / && !m/^X-Debbugs-/i;
-    $fwd .= $hdr."\n" if $ins;
+    $fwd .= encode_utf8($hdr)."\n" if $ins;
      # print {$debugfh} ">$_<\n";
      if (s/^(\S+):\s*//) {
         my $v = lc $1;
@@ -248,7 +250,9 @@ my $i = 0;
  ++$i while $msg[$i] =~ /./;
  $fwd .= join("\n",@msg[$i..$#msg]);
  
+binmode($debugfh,':raw');
  print {$debugfh} "***\n$fwd\n***\n";
+binmode($debugfh,':raw:encoding(UTF-8)');
  
  if (defined $header{'resent-from'} && !defined $header{'from'}) {
      $header{'from'} = $header{'resent-from'};
diff --git a/t/01_mime.t b/t/01_mime.t

index d103eb19e445987ac40215102c755ea3d6fa9018..dcd3b7608f869c41d6f1eed8d666d8ba55832393 100644 (file)
--- a/t/01_mime.t
+++ b/t/01_mime.t
@@ -30,12 +30,12 @@ ok(Debbugs::MIME::decode_rfc1522(q(=?iso-8859-1?Q?D=F6n_Armstr=F3ng?= <don@donar
  
  
  # 2: test encode
-ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522($test_str)) eq $test_str,
+ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522(encode_utf8($test_str))) eq $test_str,
    "encode_rfc1522 encodes strings that decode_rfc1522 can decode");
-ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522($test_str2)) eq $test_str2,
+ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522(encode_utf8($test_str2))) eq $test_str2,
    "encode_rfc1522 encodes strings that decode_rfc1522 can decode");
-ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522($test_str3)) eq $test_str3,
-  "encode_rfc1522 properly handles parentesis and \"");
+ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522(encode_utf8($test_str3))) eq $test_str3,
+  "encode_rfc1522 properly handles parenthesis and \"");
  
  
  # Make sure that create_mime_message has encoded headers and doesn't enclude any 8-bit characters
diff --git a/t/02_common.t b/t/02_common.t

index d770d3fe090e83ab7bf971c17f4cf3aa282f6c8f..f7d949a833690d0d41da5e8062958466f15fcd45 100644 (file)
--- a/t/02_common.t
+++ b/t/02_common.t
@@ -1,10 +1,11 @@
  # -*- mode: cperl;-*-
  
-use Test::More tests => 2;
+use Test::More tests => 3;
  use Encode qw(decode_utf8);
  
  use_ok('Debbugs::Common');
-is_deeply(Debbugs::Common::encode_utf8_structure(
+use_ok('Debbugs::UTF8');
+is_deeply(Debbugs::UTF8::encode_utf8_structure(
            {a => decode_utf8('föö'),
            b => [map {decode_utf8($_)} qw(blëh bl♥h)],
           }),
diff --git a/t/05_mail.t b/t/05_mail.t

index 3d21049c579ef9e3c994ce261fc701e674ac7fac..53860d39e82406d411cd5078e1bdae6f0b58364f 100644 (file)
--- a/t/05_mail.t
+++ b/t/05_mail.t
@@ -25,7 +25,7 @@ blah blah blah
  END
  
  # 1: test decode
-ok(decode_rfc1522(Debbugs::Mail::encode_headers($test_str)) eq encode_utf8($test_str));
+ok(decode_rfc1522(Debbugs::Mail::encode_headers($test_str)) eq $test_str);
  
  # XXX Figure out a good way to test the send message bit of
  # Debbugs::Mail
diff --git a/t/13_utf8_mail.t b/t/13_utf8_mail.t

index 8ada76ab01b05a225987cd86c7e8947e357b23ab..4eda888df35cd0b67dbd0a8f5776342317d9f7f4 100644 (file)
--- a/t/13_utf8_mail.t
+++ b/t/13_utf8_mail.t
@@ -6,6 +6,8 @@ use Test::More tests => 12;
  use warnings;
  use strict;
  
+use utf8;
+
  # Here, we're going to shoot messages through a set of things that can
  # happen.
  
@@ -89,7 +91,7 @@ $SD_SIZE =
  
  send_message(to => '1@bugs.something',
              headers => [To   => '1@bugs.something',
-                        From => 'foo@bugs.something',
+                        From => 'föoﬀ@bugs.something',
                          Subject => 'Sending a message to a bug',
                         ],
              body => <<EOF) or fail('sending message to 1@bugs.someting failed');
@@ -107,7 +109,7 @@ $SD_SIZE =
  # just check to see that control doesn't explode
  send_message(to => 'control@bugs.something',
              headers => [To   => 'control@bugs.something',
-                        From => 'foo@bugs.something',
+                        From => 'föoﬀ@bugs.something',
                          Subject => 'Munging a bug',
                         ],
              body => <<EOF) or fail 'message to control@bugs.something failed';
@@ -127,7 +129,7 @@ ok(system('sh','-c','find '.$sendmail_dir.q( -type f | xargs grep -q "Subject: P
  # This is an eval because $ENV{DEBBUGS_CONFIG_FILE} isn't set at BEGIN{} time
  eval "use Debbugs::Status qw(read_bug writebug);";
  my $status = read_bug(bug=>1);
-ok($status->{subject} eq decode_utf8('ütﬀ8 title encoding test'),'bug 1 retitled');
+ok($status->{subject} eq 'ütﬀ8 title encoding test','bug 1 retitled');
  ok($status->{severity} eq 'wishlist','bug 1 wishlisted');
  ok(system('sh','-c','[ $(egrep "retitle.*encoding test" '.$spool_dir.'/db-h/01/1.log|grep -v "=C3=BCt=EF=AC=808"|wc -l) -eq 0 ]') == 0,
     'Control messages escaped properly');
author	Don Armstrong <don@donarmstrong.com>
	Thu, 14 Mar 2013 20:13:56 +0000 (13:13 -0700)
committer	Don Armstrong <don@donarmstrong.com>
	Thu, 14 Mar 2013 20:13:56 +0000 (13:13 -0700)
Debbugs/CGI/Bugreport.pm		patch \| blob \| history
Debbugs/Common.pm		patch \| blob \| history
Debbugs/Control.pm		patch \| blob \| history
Debbugs/Log.pm		patch \| blob \| history
Debbugs/MIME.pm		patch \| blob \| history
Debbugs/Mail.pm		patch \| blob \| history
Debbugs/SOAP.pm		patch \| blob \| history
Debbugs/Status.pm		patch \| blob \| history
Debbugs/UTF8.pm	[new file with mode: 0644]	patch \| blob
cgi/bugreport.cgi		patch \| blob \| history
cgi/pkgreport.cgi		patch \| blob \| history
scripts/process		patch \| blob \| history
t/01_mime.t		patch \| blob \| history
t/02_common.t		patch \| blob \| history
t/05_mail.t		patch \| blob \| history
t/13_utf8_mail.t		patch \| blob \| history