use Debbugs::Common qw(getbuglocation getbugcomponent make_list);
use Params::Validate qw(:types validate_with);
-use Encode qw(encode);
+use Encode qw(encode is_utf8);
=head1 NAME
die "Unable to open bug log $bug_log for reading: $!";
}
+ binmode($self->{logfh},':utf8');
$self->{state} = 'kill-init';
$self->{linenum} = 0;
return $self;
sub escape_log {
my @log = @_;
- return map { eval {$_ = encode("utf8",$_,Encode::FB_CROAK)}; s/^([\01-\07\030])/\030$1/gm; $_ } @log;
+ return map { eval {$_ = is_utf8($_)?encode("utf8",$_,Encode::FB_CROAK):$_;}; s/^([\01-\07\030])/\030$1/gm; $_ } @log;
}
return $data if $charset eq 'raw' or is_utf8($data,1);
my $result;
eval {
- # this encode/decode madness is to make sure that the data
- # really is valid utf8 and that the is_utf8 flag is off.
- $result = encode("utf8",decode($charset,$data))
+ $result = decode($charset,$data);
};
if ($@) {
warn "Unable to decode charset; '$charset' and '$data': $@";
# handle being passed undef properly
return undef if not defined $rawstr;
+ if (is_utf8($rawstr)) {
+ $rawstr= encode_utf8($rawstr);
+ }
# We process words in reverse so we can preserve spacing between
# encoded words. This regex splits on word|nonword boundaries and
# nonword|nonword boundaries. We also consider parenthesis and "
if (length $encoded > 75) {
# Turn utf8 into the internal perl representation
# so . is a character, not a byte.
- my $tempstr = decode_utf8($word,Encode::FB_DEFAULT);
+ my $tempstr = is_utf8($word)?$word:decode_utf8($word,Encode::FB_DEFAULT);
my @encoded;
# Strip it into 10 character long segments, and encode
# the segments