Merge branch 'don/processencoding' of git+ssh://git.donarmstrong.com/srv/git/debbugs...

author Don Armstrong <don@donarmstrong.com>

Sun, 8 Jul 2012 03:04:29 +0000 (20:04 -0700)

committer Don Armstrong <don@donarmstrong.com>

Sun, 8 Jul 2012 03:04:29 +0000 (20:04 -0700)
author Don Armstrong <don@donarmstrong.com>
Sun, 8 Jul 2012 03:04:29 +0000 (20:04 -0700)
committer Don Armstrong <don@donarmstrong.com>
Sun, 8 Jul 2012 03:04:29 +0000 (20:04 -0700)
diff --combined Debbugs/Log.pm

index 20e13503441df9521913a4ed7308e8c0a21ea08e,e58d84ae486bf9d2892fbd7b54940e50c2df09d9..96748b7ee401813e6a096bf825a33122caca9daf
--- 1/Debbugs/Log.pm
--- 2/Debbugs/Log.pm
+++ b/Debbugs/Log.pm
@@@ -39,7 -39,7 +39,7 @@@ use Carp
   
   use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
   use Params::Validate qw(:types validate_with);
- -use Encode qw(encode);
+ +use Encode qw(encode is_utf8);
   
   =head1 NAME
   
@@@ -201,6 -201,7 +201,7 @@@ sub ne
               die "Unable to open bug log $bug_log for reading: $!";
       }
   
+     binmode($self->{logfh},':utf8');
       $self->{state} = 'kill-init';
       $self->{linenum} = 0;
       return $self;
@@@ -427,7 -428,7 +428,7 @@@ Applies the log escape regex to the pas
   
   sub escape_log {
         my @log = @_;
- -      return map { eval {$_ = encode("utf8",$_,Encode::FB_CROAK)}; s/^([\01-\07\030])/\030$1/gm; $_ } @log;
+ +      return map { eval {$_ = is_utf8($_)?encode("utf8",$_,Encode::FB_CROAK):$_;}; s/^([\01-\07\030])/\030$1/gm; $_ } @log;
   }
   
   
diff --combined Debbugs/MIME.pm

index e94733ee016db8de4213700a7581bb3b6a00e285,bc47ca3a7b0a94e549fafd49dd473505fa3d7157..05534e3e054c47d2cb281f683aaacf1a0ac7a52e
--- 1/Debbugs/MIME.pm
--- 2/Debbugs/MIME.pm
+++ b/Debbugs/MIME.pm
@@@ -229,9 -229,7 +229,7 @@@ sub convert_to_utf8 
        return $data if $charset eq 'raw' or is_utf8($data,1);
        my $result;
        eval {
-         # this encode/decode madness is to make sure that the data
-         # really is valid utf8 and that the is_utf8 flag is off.
-         $result = encode("utf8",decode($charset,$data))
+        $result = decode($charset,$data);
        };
        if ($@) {
           warn "Unable to decode charset; '$charset' and '$data': $@";
@@@ -286,6 -284,9 +284,9 @@@ sub encode_rfc1522 
   
        # handle being passed undef properly
        return undef if not defined $rawstr;
+      if (is_utf8($rawstr)) {
+        $rawstr= encode_utf8($rawstr);
+      }
        # We process words in reverse so we can preserve spacing between
        # encoded words. This regex splits on word|nonword boundaries and
        # nonword|nonword boundaries. We also consider parenthesis and "
@@@ -313,7 -314,7 +314,7 @@@
                if (length $encoded > 75) {
                     # Turn utf8 into the internal perl representation
                     # so . is a character, not a byte.
- -                  my $tempstr = decode_utf8($word,Encode::FB_DEFAULT);
+ +                  my $tempstr = is_utf8($word)?$word:decode_utf8($word,Encode::FB_DEFAULT);
                     my @encoded;
                     # Strip it into 10 character long segments, and encode
                     # the segments
author	Don Armstrong <don@donarmstrong.com>
	Sun, 8 Jul 2012 03:04:29 +0000 (20:04 -0700)
committer	Don Armstrong <don@donarmstrong.com>
	Sun, 8 Jul 2012 03:04:29 +0000 (20:04 -0700)
		1	2
Debbugs/Log.pm	patch \|	diff1 \|	diff2 \|	blob \| history
Debbugs/MIME.pm	patch \|	diff1 \|	diff2 \|	blob \| history