From: Don Armstrong <don@donarmstrong.com>
Date: Thu, 14 Mar 2013 19:59:41 +0000 (-0700)
Subject: Merge branch 'don/fix_encoding'
X-Git-Tag: release/2.6.0~313
X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=04265c83beff7e1ccb28a88ff7257fe36b2fc45d;hp=0b63438d6beca43a78a941605db8fd446a3af7c2;p=debbugs.git

Merge branch 'don/fix_encoding'
---

diff --git a/Debbugs/CGI/Bugreport.pm b/Debbugs/CGI/Bugreport.pm
index 154ee047..707c3a0d 100644
--- a/Debbugs/CGI/Bugreport.pm
+++ b/Debbugs/CGI/Bugreport.pm
@@ -32,9 +32,10 @@ use base qw(Exporter);
 
 use IO::Scalar;
 use Params::Validate qw(validate_with :types);
-use Debbugs::MIME qw(convert_to_utf8 decode_rfc1522 create_mime_message);
+use Debbugs::MIME qw(decode_rfc1522 create_mime_message);
 use Debbugs::CGI qw(:url :html :util);
 use Debbugs::Common qw(globify_scalar english_join);
+use Debbugs::UTF8;
 use Debbugs::Config qw(:config);
 use POSIX qw(strftime);
 use Encode qw(decode_utf8);
@@ -334,7 +335,7 @@ sub handle_record{
      local $_ = $record->{type};
      if (/html/) {
 	 # $record->{text} is not in perl's internal encoding; convert it
-	 my $text = decode_utf8(decode_rfc1522($record->{text}));
+	 my $text = decode_rfc1522(decode_utf8($record->{text}));
 	  my ($time) = $text =~ /<!--\s+time:(\d+)\s+-->/;
 	  my $class = $text =~ /^<strong>(?:Acknowledgement|Reply|Information|Report|Notification)/m ? 'infmessage':'msgreceived';
 	  $output .= $text;
diff --git a/Debbugs/Common.pm b/Debbugs/Common.pm
index f3c8218e..1331ac00 100644
--- a/Debbugs/Common.pm
+++ b/Debbugs/Common.pm
@@ -50,7 +50,6 @@ BEGIN{
 				qw(cleanup_eval_fail),
 				qw(hash_slice),
 			       ],
-		     utf8   => [qw(encode_utf8_structure)],
 		     date   => [qw(secs_to_english)],
 		     quit   => [qw(quit)],
 		     lock   => [qw(filelock unfilelock lockpid)],
@@ -71,7 +70,6 @@ use IO::Scalar;
 use Debbugs::MIME qw(decode_rfc1522);
 use Mail::Address;
 use Cwd qw(cwd);
-use Encode qw(encode_utf8 is_utf8);
 use Storable qw(dclone);
 
 use Params::Validate qw(validate_with :types);
@@ -827,7 +825,7 @@ sub globify_scalar {
 	  if (defined ref($scalar)) {
 	       if (ref($scalar) eq 'SCALAR' and
 		   not UNIVERSAL::isa($scalar,'GLOB')) {
-		    open $handle, '>:scalar:utf8', $scalar;
+		    open $handle, '>:scalar:encoding(UTF-8)', $scalar;
 		    return $handle;
 	       }
 	       else {
@@ -841,7 +839,7 @@ sub globify_scalar {
 	       carp "Given a non-scalar reference, non-glob to globify_scalar; returning /dev/null handle";
 	  }
      }
-     return IO::File->new('/dev/null','>:utf8');
+     return IO::File->new('/dev/null','>:encoding(UTF-8)');
 }
 
 =head2 cleanup_eval_fail()
@@ -900,55 +898,6 @@ sub hash_slice(\%@) {
 }
 
 
-=head1 UTF-8
-
-These functions are exported with the :utf8 tag
-
-=head2 encode_utf8_structure
-
-     %newdata = encode_utf8_structure(%newdata);
-
-Takes a complex data structure and encodes any strings with is_utf8
-set into their constituent octets.
-
-=cut
-
-our $depth = 0;
-sub encode_utf8_structure {
-    ++$depth;
-    my @ret;
-    for my $_ (@_) {
-	if (ref($_) eq 'HASH') {
-	    push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
-	}
-	elsif (ref($_) eq 'ARRAY') {
-	    push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
-	}
-	elsif (ref($_)) {
-	    # we don't know how to handle non hash or non arrays
-	    push @ret,$_;
-	}
-	else {
-	    push @ret,__encode_utf8($_);
-	}
-    }
-    --$depth;
-    return @ret;
-}
-
-sub __encode_utf8 {
-    my @ret;
-    for my $r (@_) {
-	if (not ref($r) and is_utf8($r)) {
-	    $r = encode_utf8($r);
-	}
-	push @ret,$r;
-    }
-    return @ret;
-}
-
-
-
 1;
 
 __END__
diff --git a/Debbugs/Control.pm b/Debbugs/Control.pm
index 5739734b..95876245 100644
--- a/Debbugs/Control.pm
+++ b/Debbugs/Control.pm
@@ -110,7 +110,8 @@ BEGIN{
 }
 
 use Debbugs::Config qw(:config);
-use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions :utf8);
+use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions);
+use Debbugs::UTF8;
 use Debbugs::Status qw(bug_archiveable :read :hook writebug new_bug splitpackages split_status_fields get_bug_status);
 use Debbugs::CGI qw(html_escape);
 use Debbugs::Log qw(:misc :write);
@@ -3437,25 +3438,25 @@ sub append_action_to_log{
      }
      my $msg = join('',
 		    (exists $param{command} ?
-		     "<!-- command:".html_escape(encode_utf8($param{command}))." -->\n":""
+		     "<!-- command:".html_escape(encode_utf8_safely($param{command}))." -->\n":""
 		    ),
 		    (length $param{requester} ?
-		     "<!-- requester: ".html_escape(encode_utf8($param{requester}))." -->\n":""
+		     "<!-- requester: ".html_escape(encode_utf8_safely($param{requester}))." -->\n":""
 		    ),
 		    (length $param{request_addr} ?
-		     "<!-- request_addr: ".html_escape(encode_utf8($param{request_addr}))." -->\n":""
+		     "<!-- request_addr: ".html_escape(encode_utf8_safely($param{request_addr}))." -->\n":""
 		    ),
 		    "<!-- time:".time()." -->\n",
 		    $data_diff,
-		    "<strong>".html_escape(encode_utf8($param{action}))."</strong>\n");
+		    "<strong>".html_escape(encode_utf8_safely($param{action}))."</strong>\n");
      if (length $param{requester}) {
-          $msg .= "Request was from <code>".html_escape(encode_utf8($param{requester}))."</code>\n";
+          $msg .= "Request was from <code>".html_escape(encode_utf8_safely($param{requester}))."</code>\n";
      }
      if (length $param{request_addr}) {
-          $msg .= "to <code>".html_escape(encode_utf8($param{request_addr}))."</code>";
+          $msg .= "to <code>".html_escape(encode_utf8_safely($param{request_addr}))."</code>";
      }
      if (length $param{desc}) {
-	  $msg .= ":<br>\n".encode_utf8($param{desc})."\n";
+	  $msg .= ":<br>\n".encode_utf8_safely($param{desc})."\n";
      }
      else {
 	  $msg .= ".\n";
diff --git a/Debbugs/Log.pm b/Debbugs/Log.pm
index 2ae7af7b..8b99b7de 100644
--- a/Debbugs/Log.pm
+++ b/Debbugs/Log.pm
@@ -39,7 +39,7 @@ use Carp;
 
 use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
 use Params::Validate qw(:types validate_with);
-use Encode qw(encode is_utf8);
+use Encode qw(encode encode_utf8 is_utf8);
 
 =head1 NAME
 
@@ -390,8 +390,12 @@ sub write_log_records
 	croak "record type '$type' with no text field" unless defined $record->{text};
 	# I am not sure if we really want to croak here; but this is
 	# almost certainly a bug if is_utf8 is on.
-	# croak "probably wrong encoding" if is_utf8($record->{text});
-	my ($text) = escape_log($record->{text});
+        my $text = $record->{text};
+        if (is_utf8($text)) {
+            carp('Record text was in the wrong encoding (perl internal instead of utf8 octets)');
+            $text = encode_utf8($text)
+        }
+	($text) = escape_log($text);
 	if ($type eq 'autocheck') {
 	    print {$logfh} "\01\n$text\03\n" or
 		die "Unable to write to logfile: $!";
diff --git a/Debbugs/MIME.pm b/Debbugs/MIME.pm
index 481be7bf..afc07764 100644
--- a/Debbugs/MIME.pm
+++ b/Debbugs/MIME.pm
@@ -41,7 +41,6 @@ BEGIN {
 
     %EXPORT_TAGS = (mime => [qw(parse create_mime_message getmailbody)],
 		    rfc1522 => [qw(decode_rfc1522 encode_rfc1522)],
-		    utf8 => [qw(convert_to_utf8)],
 		   );
     @EXPORT_OK=();
     Exporter::export_ok_tags(keys %EXPORT_TAGS);
@@ -55,11 +54,11 @@ use MIME::Parser;
 use POSIX qw(strftime);
 use List::MoreUtils qw(apply);
 
-# for decode_rfc1522
-use MIME::WordDecoder qw();
-use Encode qw(decode encode encode_utf8 decode_utf8 is_utf8);
+# for convert_to_utf8
+use Debbugs::UTF8 qw(convert_to_utf8);
 
-# for encode_rfc1522
+# for decode_rfc1522 and encode_rfc1522
+use Encode qw(decode encode encode_utf8 decode_utf8 is_utf8);
 use MIME::Words qw();
 
 sub getmailbody
@@ -69,7 +68,7 @@ sub getmailbody
     if ($type eq 'text/plain' or
 	    ($type =~ m#text/?# and $type ne 'text/html') or
 	    $type eq 'application/pgp') {
-	return $entity->bodyhandle;
+	return $entity;
     } elsif ($type eq 'multipart/alternative') {
 	# RFC 2046 says we should use the last part we recognize.
 	for my $part (reverse $entity->parts) {
@@ -101,14 +100,24 @@ sub parse
 	@headerlines = @{$entity->head->header};
 	chomp @headerlines;
 
-	my $entity_body = getmailbody($entity);
-	@bodylines = $entity_body ? $entity_body->as_lines() : ();
+        my $entity_body = getmailbody($entity);
+	my $entity_body_handle;
+        my $charset;
+        if (defined $entity_body) {
+            $entity_body_handle = $entity_body->bodyhandle();
+            $charset = $entity_body->head()->mime_attr('content-type.charset');
+        }
+	@bodylines = $entity_body_handle ? $entity_body_handle->as_lines() : ();
+        @bodylines = map {convert_to_utf8($_,$charset)} @bodylines;
 	chomp @bodylines;
     } else {
 	# Legacy pre-MIME code, kept around in case MIME::Parser fails.
 	my @msg = split /\n/, $_[0];
 	my $i;
 
+        # assume us-ascii unless charset is set; probably bad, but we
+        # really shouldn't get to this point anyway
+        my $charset = 'us-ascii';
 	for ($i = 0; $i <= $#msg; ++$i) {
 	    $_ = $msg[$i];
 	    last unless length;
@@ -116,10 +125,12 @@ sub parse
 		++$i;
 		$_ .= "\n" . $msg[$i];
 	    }
+            if (/charset=\"([^\"]+)\"/) {
+                $charset = $1;
+            }
 	    push @headerlines, $_;
 	}
-
-	@bodylines = @msg[$i .. $#msg];
+	@bodylines = map {convert_to_utf8($_,$charset)} @msg[$i .. $#msg];
     }
 
     rmtree $tempdir, 0, 1;
@@ -193,8 +204,8 @@ sub create_mime_message{
      # MIME::Entity is stupid, and doesn't rfc1522 encode its headers, so we do it for it.
      my $msg = MIME::Entity->build('Content-Type' => 'text/plain; charset=utf-8',
 				   'Encoding'     => 'quoted-printable',
-				   (map{encode_rfc1522($_)} @{$headers}),
-				   Data    => $body
+				   (map{encode_rfc1522(encode_utf8($_))} @{$headers}),
+				   Data    => encode_utf8($body),
 				  );
 
      # Attach the attachments
@@ -229,23 +240,6 @@ sub create_mime_message{
 }
 
 
-# Bug #61342 et al.
-
-sub convert_to_utf8 {
-     my ($data, $charset) = @_;
-     # raw data just gets returned (that's the charset WordDecorder
-     # uses when it doesn't know what to do)
-     return $data if $charset eq 'raw' or is_utf8($data,1);
-     my $result;
-     eval {
-	 $result = decode($charset,$data);
-     };
-     if ($@) {
-	  warn "Unable to decode charset; '$charset' and '$data': $@";
-	  return $data;
-     }
-     return $result;
-}
 
 
 =head2 decode_rfc1522
@@ -256,24 +250,27 @@ Turn RFC-1522 names into the UTF-8 equivalent.
 
 =cut
 
-BEGIN {
-    # Set up the default RFC1522 decoder, which turns all charsets that
-    # are supported into the appropriate UTF-8 charset.
-    MIME::WordDecoder->default(new MIME::WordDecoder(
-	['*' => \&convert_to_utf8,
-	]));
-}
-
 sub decode_rfc1522 {
     my ($string) = @_;
 
     # this is craptacular, but leading space is hacked off by unmime.
     # Save it.
     my $leading_space = '';
-    $leading_space = $1 if $string =~ s/^(\s+)//;
-    # unmime calls the default MIME::WordDecoder handler set up at
-    # initialization time.
-    return $leading_space . MIME::WordDecoder::unmime($string);
+    $leading_space = $1 if $string =~ s/^(\ +)//;
+    # we must do this to switch off the utf8 flag before calling decode_mimewords
+    $string = encode_utf8($string);
+    my @mime_words = MIME::Words::decode_mimewords($string);
+    my $tmp = $leading_space .
+        join('',
+             (map {
+                 if (@{$_} > 1) {
+                     convert_to_utf8(${$_}[0],${$_}[1]);
+                 } else {
+                     decode_utf8(${$_}[0]);
+                 }
+             } @mime_words)
+            );
+    return $tmp;
 }
 
 =head2 encode_rfc1522
@@ -293,9 +290,10 @@ sub encode_rfc1522 {
 
      # handle being passed undef properly
      return undef if not defined $rawstr;
-     if (is_utf8($rawstr)) {
-	 $rawstr= encode_utf8($rawstr);
-     }
+
+     # convert to octets if we are given a string in perl's internal
+     # encoding
+     $rawstr= encode_utf8($rawstr) if is_utf8($rawstr);
      # We process words in reverse so we can preserve spacing between
      # encoded words. This regex splits on word|nonword boundaries and
      # nonword|nonword boundaries. We also consider parenthesis and "
diff --git a/Debbugs/Mail.pm b/Debbugs/Mail.pm
index d26c860a..9fa282b1 100644
--- a/Debbugs/Mail.pm
+++ b/Debbugs/Mail.pm
@@ -49,6 +49,7 @@ use Debbugs::MIME qw(encode_rfc1522);
 use Debbugs::Config qw(:config);
 use Params::Validate qw(:types validate_with);
 use Encode qw(encode is_utf8);
+use Debbugs::UTF8 qw(encode_utf8_safely);
 
 use Debbugs::Packages;
 
@@ -398,7 +399,7 @@ sub encode_headers{
 
      my ($header,$body) = split /\n\n/, $message, 2;
      $header = encode_rfc1522($header);
-     return $header . qq(\n\n). $body;
+     return $header . qq(\n\n). encode_utf8_safely($body);
 }
 
 =head2 rfc822_date
diff --git a/Debbugs/SOAP.pm b/Debbugs/SOAP.pm
index 5cb08bb0..c1fc85f9 100644
--- a/Debbugs/SOAP.pm
+++ b/Debbugs/SOAP.pm
@@ -41,7 +41,8 @@ BEGIN{
 
 use IO::File;
 use Debbugs::Status qw(get_bug_status);
-use Debbugs::Common qw(make_list getbuglocation getbugcomponent :utf8);
+use Debbugs::Common qw(make_list getbuglocation getbugcomponent);
+use Debbugs::UTF8;
 use Debbugs::Packages;
 
 use Storable qw(nstore retrieve dclone);
diff --git a/Debbugs/Status.pm b/Debbugs/Status.pm
index a2aeabef..7d72c032 100644
--- a/Debbugs/Status.pm
+++ b/Debbugs/Status.pm
@@ -37,7 +37,8 @@ use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
 use base qw(Exporter);
 
 use Params::Validate qw(validate_with :types);
-use Debbugs::Common qw(:util :lock :quit :misc :utf8);
+use Debbugs::Common qw(:util :lock :quit :misc);
+use Debbugs::UTF8;
 use Debbugs::Config qw(:config);
 use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522);
 use Debbugs::Packages qw(makesourceversions make_source_versions getversions get_versions binary_to_source);
diff --git a/Debbugs/UTF8.pm b/Debbugs/UTF8.pm
new file mode 100644
index 00000000..74a40427
--- /dev/null
+++ b/Debbugs/UTF8.pm
@@ -0,0 +1,215 @@
+# This module is part of debbugs, and is released
+# under the terms of the GPL version 2, or any later
+# version at your option.
+# See the file README and COPYING for more information.
+#
+# Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
+
+package Debbugs::UTF8;
+
+=head1 NAME
+
+Debbugs::UTF8 -- Routines for handling conversion of charsets to UTF8
+
+=head1 SYNOPSIS
+
+use Debbugs::UTF8;
+
+
+=head1 DESCRIPTION
+
+This module contains routines which convert from various different
+charsets to UTF8.
+
+=head1 FUNCTIONS
+
+=cut
+
+use warnings;
+use strict;
+use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
+use base qw(Exporter);
+
+BEGIN{
+     $VERSION = 1.00;
+     $DEBUG = 0 unless defined $DEBUG;
+
+     %EXPORT_TAGS = (utf8   => [qw(encode_utf8_structure encode_utf8_safely),
+                                qw(convert_to_utf8 decode_utf8_safely)],
+                    );
+     @EXPORT = (@{$EXPORT_TAGS{utf8}});
+     @EXPORT_OK = ();
+     Exporter::export_ok_tags(keys %EXPORT_TAGS);
+     $EXPORT_TAGS{all} = [@EXPORT_OK];
+}
+
+use Carp;
+$Carp::Verbose = 1;
+
+use Encode qw(encode_utf8 is_utf8 decode decode_utf8);
+use Text::Iconv;
+use Storable qw(dclone);
+
+
+=head1 UTF-8
+
+These functions are exported with the :utf8 tag
+
+=head2 encode_utf8_structure
+
+     %newdata = encode_utf8_structure(%newdata);
+
+Takes a complex data structure and encodes any strings with is_utf8
+set into their constituent octets.
+
+=cut
+
+our $depth = 0;
+sub encode_utf8_structure {
+    ++$depth;
+    my @ret;
+    for my $_ (@_) {
+	if (ref($_) eq 'HASH') {
+	    push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
+	}
+	elsif (ref($_) eq 'ARRAY') {
+	    push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
+	}
+	elsif (ref($_)) {
+	    # we don't know how to handle non hash or non arrays
+	    push @ret,$_;
+	}
+	else {
+	    push @ret,encode_utf8_safely($_);
+	}
+    }
+    --$depth;
+    return @ret;
+}
+
+=head2 encode_utf8_safely
+
+     $octets = encode_utf8_safely($string);
+
+Given a $string, returns the octet equivalent of $string if $string is
+in perl's internal encoding; otherwise returns $string.
+
+Silently returns REFs without encoding them. [If you want to deeply
+encode REFs, see encode_utf8_structure.]
+
+=cut
+
+
+sub encode_utf8_safely{
+    my @ret;
+    for my $r (@_) {
+        if (not ref($r) and is_utf8($r)) {
+	    $r = encode_utf8($r);
+	}
+	push @ret,$r;
+    }
+    return wantarray ? @ret : (@_ > 1 ? @ret : $ret[0]);
+}
+
+=head2 decode_utf8_safely
+
+     $string = decode_utf8_safely($octets);
+
+Given $octets in UTF8, returns the perl-internal equivalent of $octets
+if $octets does not have is_utf8 set; otherwise returns $octets.
+
+Silently returns REFs without encoding them.
+
+=cut
+
+
+sub decode_utf8_safely{
+    my @ret;
+    for my $r (@_) {
+        if (not ref($r) and not is_utf8($r)) {
+	    $r = decode_utf8($r);
+	}
+	push @ret, $r;
+    }
+    return wantarray ? @ret : (@_ > 1 ? @ret : $ret[0]);
+}
+
+
+
+
+=head2 convert_to_utf8
+
+    $utf8 = convert_to_utf8("text","charset");
+
+=cut
+
+our %iconv_converters;
+
+sub convert_to_utf8 {
+    my ($data,$charset) = @_;
+    if (is_utf8($data)) {
+        cluck("utf8 flag is set when calling convert_to_utf8");
+        return $data;
+    }
+    $charset = uc($charset);
+    if ($charset eq 'RAW') {
+        croak("Charset must not be raw when calling convert_to_utf8");
+    }
+    if (not defined $iconv_converters{$charset}) {
+        eval {
+            $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
+                die "Unable to create converter for '$charset'";
+        };
+        if ($@) {
+            warn $@;
+            # We weren't able to create the converter, so use Encode
+            # instead
+            return __fallback_convert_to_utf8($data,$charset);
+        }
+    }
+    if (not defined $iconv_converters{$charset}) {
+        warn "The converter for $charset wasn't created properly somehow!";
+        return __fallback_convert_to_utf8($data,$charset);
+    }
+    my $converted_data = $iconv_converters{$charset}->convert($data);
+    # if the conversion failed, retval will be undefined or perhaps
+    # -1.
+    my $retval = $iconv_converters{$charset}->retval();
+    if (not defined $retval or
+        $retval < 0
+       ) {
+        warn "failed to convert to utf8";
+        # Fallback to encode, which will probably also fail.
+        return __fallback_convert_to_utf8($data,$charset);
+    }
+    return decode("UTF-8",$converted_data);
+}
+
+# this returns data in perl's internal encoding
+sub __fallback_convert_to_utf8 {
+     my ($data, $charset) = @_;
+     # raw data just gets returned (that's the charset WordDecorder
+     # uses when it doesn't know what to do)
+     return $data if $charset eq 'raw';
+     if (not defined $charset and not is_utf8($data)) {
+         warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
+         return $data;
+     }
+     # lets assume everything that doesn't have a charset is utf8
+     $charset //= 'utf8';
+     my $result;
+     eval {
+	 $result = decode($charset,$data);
+     };
+     if ($@) {
+	  warn "Unable to decode charset; '$charset' and '$data': $@";
+	  return $data;
+     }
+     return $result;
+}
+
+
+
+1;
+
+__END__
diff --git a/cgi/bugreport.cgi b/cgi/bugreport.cgi
index 55617ae1..70f42292 100755
--- a/cgi/bugreport.cgi
+++ b/cgi/bugreport.cgi
@@ -8,8 +8,6 @@ BEGIN{
     delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
 }
 
-# STDOUT should be using the utf8 io layer
-binmode(STDOUT,':utf8');
 
 use POSIX qw(strftime);
 use MIME::Parser;
@@ -38,6 +36,8 @@ use List::Util qw(max);
 
 use CGI::Simple;
 my $q = new CGI::Simple;
+# STDOUT should be using the utf8 io layer
+binmode(STDOUT,':raw:encoding(UTF-8)');
 
 my %param = cgi_parameters(query => $q,
 			   single => [qw(bug msg att boring terse),
diff --git a/cgi/pkgreport.cgi b/cgi/pkgreport.cgi
index d29c3abc..eb7a61a2 100755
--- a/cgi/pkgreport.cgi
+++ b/cgi/pkgreport.cgi
@@ -18,9 +18,7 @@ BEGIN{
     delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
 }
 
-# STDOUT should be in utf8 mode
-binmode(STDOUT,':utf8');
-
+binmode(STDOUT,':encoding(UTF-8)');
 use POSIX qw(strftime nice);
 
 use Debbugs::Config qw(:globals :text :config);
diff --git a/t/01_mime.t b/t/01_mime.t
index d103eb19..dcd3b760 100644
--- a/t/01_mime.t
+++ b/t/01_mime.t
@@ -30,12 +30,12 @@ ok(Debbugs::MIME::decode_rfc1522(q(=?iso-8859-1?Q?D=F6n_Armstr=F3ng?= <don@donar
 
 
 # 2: test encode
-ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522($test_str)) eq $test_str,
+ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522(encode_utf8($test_str))) eq $test_str,
   "encode_rfc1522 encodes strings that decode_rfc1522 can decode");
-ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522($test_str2)) eq $test_str2,
+ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522(encode_utf8($test_str2))) eq $test_str2,
   "encode_rfc1522 encodes strings that decode_rfc1522 can decode");
-ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522($test_str3)) eq $test_str3,
-  "encode_rfc1522 properly handles parentesis and \"");
+ok(Debbugs::MIME::decode_rfc1522(Debbugs::MIME::encode_rfc1522(encode_utf8($test_str3))) eq $test_str3,
+  "encode_rfc1522 properly handles parenthesis and \"");
 
 
 # Make sure that create_mime_message has encoded headers and doesn't enclude any 8-bit characters
diff --git a/t/02_common.t b/t/02_common.t
index d770d3fe..f7d949a8 100644
--- a/t/02_common.t
+++ b/t/02_common.t
@@ -1,10 +1,11 @@
 # -*- mode: cperl;-*-
 
-use Test::More tests => 2;
+use Test::More tests => 3;
 use Encode qw(decode_utf8);
 
 use_ok('Debbugs::Common');
-is_deeply(Debbugs::Common::encode_utf8_structure(
+use_ok('Debbugs::UTF8');
+is_deeply(Debbugs::UTF8::encode_utf8_structure(
           {a => decode_utf8('fÃ¶Ã¶'),
 	   b => [map {decode_utf8($_)} qw(blÃ«h blâ¥h)],
 	  }),
diff --git a/t/13_utf8_mail.t b/t/13_utf8_mail.t
index 8ada76ab..2491e14c 100644
--- a/t/13_utf8_mail.t
+++ b/t/13_utf8_mail.t
@@ -6,6 +6,8 @@ use Test::More tests => 12;
 use warnings;
 use strict;
 
+use utf8;
+
 # Here, we're going to shoot messages through a set of things that can
 # happen.
 
@@ -127,7 +129,7 @@ ok(system('sh','-c','find '.$sendmail_dir.q( -type f | xargs grep -q "Subject: P
 # This is an eval because $ENV{DEBBUGS_CONFIG_FILE} isn't set at BEGIN{} time
 eval "use Debbugs::Status qw(read_bug writebug);";
 my $status = read_bug(bug=>1);
-ok($status->{subject} eq decode_utf8('Ã¼tï¬8 title encoding test'),'bug 1 retitled');
+ok($status->{subject} eq 'Ã¼tï¬8 title encoding test','bug 1 retitled');
 ok($status->{severity} eq 'wishlist','bug 1 wishlisted');
 ok(system('sh','-c','[ $(egrep "retitle.*encoding test" '.$spool_dir.'/db-h/01/1.log|grep -v "=C3=BCt=EF=AC=808"|wc -l) -eq 0 ]') == 0,
    'Control messages escaped properly');