return $data if $charset eq 'raw' or is_utf8($data,1);
my $result;
eval {
- # this encode/decode madness is to make sure that the data
- # really is valid utf8 and that the is_utf8 flag is off.
- $result = encode("utf8",decode($charset,$data))
+ $result = decode($charset,$data);
};
if ($@) {
warn "Unable to decode charset; '$charset' and '$data': $@";
# handle being passed undef properly
return undef if not defined $rawstr;
+ if (is_utf8($rawstr)) {
+ $rawstr= encode_utf8($rawstr);
+ }
# We process words in reverse so we can preserve spacing between
# encoded words. This regex splits on word|nonword boundaries and
# nonword|nonword boundaries. We also consider parenthesis and "
if (length $encoded > 75) {
# Turn utf8 into the internal perl representation
# so . is a character, not a byte.
- my $tempstr = decode_utf8($word,Encode::FB_DEFAULT);
+ my $tempstr = is_utf8($word)?$word:decode_utf8($word,Encode::FB_DEFAULT);
my @encoded;
# Strip it into 10 character long segments, and encode
# the segments