X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FUTF8.pm;h=0c4d33619dce695ab2de3b0a6c6bd1eba97fba94;hb=55e320bcc73b6ef5c40b70d02dc69049a7e4c1e8;hp=7ab8f9cb619954a1e1fbe6f3056fdc3f99397d3a;hpb=45c0f862895dfe537b2ed6e55cca35b4565abe63;p=debbugs.git diff --git a/Debbugs/UTF8.pm b/Debbugs/UTF8.pm index 7ab8f9c..0c4d336 100644 --- a/Debbugs/UTF8.pm +++ b/Debbugs/UTF8.pm @@ -68,7 +68,7 @@ our $depth = 0; sub encode_utf8_structure { ++$depth; my @ret; - for my $_ (@_) { + for $_ (@_) { if (ref($_) eq 'HASH') { push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})}; } @@ -143,10 +143,9 @@ sub decode_utf8_safely{ =cut -our %iconv_converters; - sub convert_to_utf8 { - my ($data,$charset) = @_; + my ($data,$charset,$internal_call) = @_; + $internal_call //= 0; if (is_utf8($data)) { cluck("utf8 flag is set when calling convert_to_utf8"); return $data; @@ -155,29 +154,36 @@ sub convert_to_utf8 { if ($charset eq 'RAW') { croak("Charset must not be raw when calling convert_to_utf8"); } - if (not defined $iconv_converters{$charset}) { - eval { - $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or - die "Unable to create converter for '$charset'"; - }; - if ($@) { - warn $@; - # We weren't able to create the converter, so use Encode - # instead - return __fallback_convert_to_utf8($data,$charset); - } - } - if (not defined $iconv_converters{$charset}) { - warn "The converter for $charset wasn't created properly somehow!"; + my $iconv_converter; + eval { + $iconv_converter = Text::Iconv->new($charset,"UTF-8") or + die "Unable to create converter for '$charset'"; + }; + if ($@) { + return undef if $internal_call; + warn $@; + # We weren't able to create the converter, so use Encode + # instead return __fallback_convert_to_utf8($data,$charset); } - my $converted_data = $iconv_converters{$charset}->convert($data); + my $converted_data = $iconv_converter->convert($data); # if the conversion failed, retval will be undefined or perhaps # -1. - my $retval = $iconv_converters{$charset}->retval(); + my $retval = $iconv_converter->retval(); if (not defined $retval or $retval < 0 ) { + # try iso8559-1 first + if (not $internal_call) { + my $call_back_data = convert_to_utf8($data,'ISO8859-1',1); + # if there's an à (0xC3), it's probably something + # horrible, and we shouldn't try to convert it. + if (defined $call_back_data and $call_back_data !~ /\x{C3}/) { + # this warning produces far too much useless output; elminating it + # warn "failed to convert to utf8 (charset: $charset, data: $data), but succeeded with ISO8859-1: ".encode_utf8($call_back_data); + return $call_back_data; + } + } warn "failed to convert to utf8 (charset: $charset, data: $data)"; # Fallback to encode, which will probably also fail. return __fallback_convert_to_utf8($data,$charset); @@ -199,7 +205,7 @@ sub __fallback_convert_to_utf8 { $charset //= 'utf8'; my $result; eval { - $result = decode($charset,$data); + $result = decode($charset,$data,0); }; if ($@) { warn "Unable to decode charset; '$charset' and '$data': $@";