X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FCommon.pm;h=cf53b07e0e35344c35823496315585cd958df8ca;hb=32c86fe1d577f9710bdc1edb34714780ea145d0c;hp=eb068edbf4bc589a586989fda55b0ba9678fbacb;hpb=c70c019a5771d994f0022309c2800115ea97163e;p=debbugs.git diff --git a/Debbugs/Common.pm b/Debbugs/Common.pm index eb068ed..cf53b07 100644 --- a/Debbugs/Common.pm +++ b/Debbugs/Common.pm @@ -50,8 +50,6 @@ BEGIN{ qw(cleanup_eval_fail), qw(hash_slice), ], - utf8 => [qw(encode_utf8_structure encode_utf8_safely), - qw(convert_to_utf8)], date => [qw(secs_to_english)], quit => [qw(quit)], lock => [qw(filelock unfilelock lockpid)], @@ -72,13 +70,12 @@ use IO::Scalar; use Debbugs::MIME qw(decode_rfc1522); use Mail::Address; use Cwd qw(cwd); -use Encode qw(encode_utf8 is_utf8 decode); -use Text::Iconv; use Storable qw(dclone); use Params::Validate qw(validate_with :types); use Fcntl qw(:DEFAULT :flock); +use Encode qw(is_utf8 decode_utf8); our $DEBUG_FH = \*STDERR if not defined $DEBUG_FH; @@ -820,6 +817,10 @@ Will carp if given a scalar which isn't a scalarref or a glob (or globref), and return /dev/null. May return undef if IO::Scalar or IO::File fails. (Check $!) +The scalar will fill with octets, not perl's internal encoding, so you +must use decode_utf8() after on the scalar, and encode_utf8() on it +before. This appears to be a bug in the underlying modules. + =cut sub globify_scalar { @@ -829,6 +830,10 @@ sub globify_scalar { if (defined ref($scalar)) { if (ref($scalar) eq 'SCALAR' and not UNIVERSAL::isa($scalar,'GLOB')) { + if (is_utf8(${$scalar})) { + ${$scalar} = decode_utf8(${$scalar}); + carp(q(\$scalar must not be in perl's internal encoding)); + } open $handle, '>:scalar:utf8', $scalar; return $handle; } @@ -843,7 +848,7 @@ sub globify_scalar { carp "Given a non-scalar reference, non-glob to globify_scalar; returning /dev/null handle"; } } - return IO::File->new('/dev/null','>:utf8'); + return IO::File->new('/dev/null','>:encoding(UTF-8)'); } =head2 cleanup_eval_fail() @@ -902,147 +907,6 @@ sub hash_slice(\%@) { } -=head1 UTF-8 - -These functions are exported with the :utf8 tag - -=head2 encode_utf8_structure - - %newdata = encode_utf8_structure(%newdata); - -Takes a complex data structure and encodes any strings with is_utf8 -set into their constituent octets. - -=cut - -our $depth = 0; -sub encode_utf8_structure { - ++$depth; - my @ret; - for my $_ (@_) { - if (ref($_) eq 'HASH') { - push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})}; - } - elsif (ref($_) eq 'ARRAY') { - push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})]; - } - elsif (ref($_)) { - # we don't know how to handle non hash or non arrays - push @ret,$_; - } - else { - push @ret,encode_utf8_safely($_); - } - } - --$depth; - return @ret; -} - -=head2 encode_utf8_safely - - $octets = encode_utf8_safely($string); - -Given a $string, returns the octet equivalent of $string if $string is -in perl's internal encoding; otherwise returns $string. - -Silently returns REFs without encoding them. [If you want to deeply -encode REFs, see encode_utf8_structure.] - -=cut - - -sub encode_utf8_safely{ - my @ret; - for my $r (@_) { - if (not ref($r) and is_utf8($r)) { - $r = encode_utf8($r); - } - push @ret,$r; - } - return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]); -} - -=head2 convert_to_utf8 - - $utf8 = convert_to_utf8("text","charset"); - -=cut - -our %iconv_converters; - -sub convert_to_utf8 { - my ($data,$charset) = @_; - if (is_utf8($data)) { - return encode_utf8($data); - } - $charset = uc($charset); - if (not defined $iconv_converters{$charset}) { - eval { - $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or - die "Unable to create converter for '$charset'"; - }; - if ($@) { - warn $@; - # We weren't able to create the converter, so use Encode - # instead - return __fallback_convert_to_utf8($data,$charset); - } - # It shouldn't be necessary when converting to UTF8, but lets - # allow for transliteration and silent discarding of broken - # sequences - eval { - $iconv_converters{$charset}->set_attr("transliterate"); - $iconv_converters{$charset}->set_attr("discard_ilseq"); - }; - # This shouldn't fail on Debian systems; we're warning here - # just in case we've made a mistake above. This warning should - # probably be disabled on non-GNU libc systems. - warn $@ if $@; - } - if (not defined $iconv_converters{$charset}) { - warn "The converter for $charset wasn't created properly somehow!"; - return __fallback_convert_to_utf8($data,$charset); - } - my $converted_data = $iconv_converters{$charset}->convert($data); - # if the conversion failed, retval will be undefined or perhaps - # -1. - if (not defined $iconv_converters{$charset}->retval() or - $iconv_converters{$charset}->retval() < 0 - ) { - # Fallback to encode, which will probably also fail. - return __fallback_convert_to_utf8($data,$charset); - } - return $converted_data; -} - -# Bug #61342 et al. -# we're switching this to return UTF8 octets instead of perl's internal -# encoding -sub __Fallback_convert_to_utf8 { - my ($data, $charset) = @_; - # raw data just gets returned (that's the charset WordDecorder - # uses when it doesn't know what to do) - return $data if $charset eq 'raw'; - if (not defined $charset and not is_utf8($data)) { - warn ("Undefined charset, and string '$data' is not in perl's internal encoding"); - return $data; - } - # lets assume everything that doesn't have a charset is utf8 - $charset //= 'utf8'; - my $result; - eval { - $result = decode($charset,$data) unless is_utf8($data); - $result = encode_utf8($result); - }; - if ($@) { - warn "Unable to decode charset; '$charset' and '$data': $@"; - return $data; - } - return $result; -} - - - 1; __END__