X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FUTF8.pm;h=c90cedf48109f0c6301b1e87ebdc964e28d1a170;hb=aa087e76db76df3c8e8a9e052aefcb99371880f6;hp=74a40427412fe402bc81bb4b9cb631e6985995f6;hpb=dd24067d306b8fb6c1984bcc77ccc374ba9bc9ec;p=debbugs.git diff --git a/Debbugs/UTF8.pm b/Debbugs/UTF8.pm index 74a4042..c90cedf 100644 --- a/Debbugs/UTF8.pm +++ b/Debbugs/UTF8.pm @@ -146,12 +146,13 @@ sub decode_utf8_safely{ our %iconv_converters; sub convert_to_utf8 { - my ($data,$charset) = @_; + my ($data,$charset,$internal_call) = @_; + $internal_call //= 0; if (is_utf8($data)) { cluck("utf8 flag is set when calling convert_to_utf8"); return $data; } - $charset = uc($charset); + $charset = uc($charset//'UTF-8'); if ($charset eq 'RAW') { croak("Charset must not be raw when calling convert_to_utf8"); } @@ -161,6 +162,7 @@ sub convert_to_utf8 { die "Unable to create converter for '$charset'"; }; if ($@) { + return undef if $internal_call; warn $@; # We weren't able to create the converter, so use Encode # instead @@ -168,6 +170,7 @@ sub convert_to_utf8 { } } if (not defined $iconv_converters{$charset}) { + return undef if $internal_call; warn "The converter for $charset wasn't created properly somehow!"; return __fallback_convert_to_utf8($data,$charset); } @@ -178,7 +181,17 @@ sub convert_to_utf8 { if (not defined $retval or $retval < 0 ) { - warn "failed to convert to utf8"; + # try iso8559-1 first + if (not $internal_call) { + my $call_back_data = convert_to_utf8($data,'ISO8859-1',1); + # if there's an à (0xC3), it's probably something + # horrible, and we shouldn't try to convert it. + if (defined $call_back_data and $call_back_data !~ /\x{C3}/) { + warn "failed to convert to utf8 (charset: $charset, data: $data), but succeeded with ISO8859-1: ".encode_utf8($call_back_data); + return $call_back_data; + } + } + warn "failed to convert to utf8 (charset: $charset, data: $data)"; # Fallback to encode, which will probably also fail. return __fallback_convert_to_utf8($data,$charset); } @@ -199,7 +212,7 @@ sub __fallback_convert_to_utf8 { $charset //= 'utf8'; my $result; eval { - $result = decode($charset,$data); + $result = decode($charset,$data,0); }; if ($@) { warn "Unable to decode charset; '$charset' and '$data': $@";