qw(cleanup_eval_fail),
qw(hash_slice),
],
- utf8 => [qw(encode_utf8_structure encode_utf8_safely),
- qw(convert_to_utf8)],
date => [qw(secs_to_english)],
quit => [qw(quit)],
lock => [qw(filelock unfilelock lockpid)],
use Debbugs::MIME qw(decode_rfc1522);
use Mail::Address;
use Cwd qw(cwd);
-use Encode qw(encode_utf8 is_utf8 decode);
-use Text::Iconv;
use Storable qw(dclone);
use Params::Validate qw(validate_with :types);
}
-=head1 UTF-8
-
-These functions are exported with the :utf8 tag
-
-=head2 encode_utf8_structure
-
- %newdata = encode_utf8_structure(%newdata);
-
-Takes a complex data structure and encodes any strings with is_utf8
-set into their constituent octets.
-
-=cut
-
-our $depth = 0;
-sub encode_utf8_structure {
- ++$depth;
- my @ret;
- for my $_ (@_) {
- if (ref($_) eq 'HASH') {
- push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
- }
- elsif (ref($_) eq 'ARRAY') {
- push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
- }
- elsif (ref($_)) {
- # we don't know how to handle non hash or non arrays
- push @ret,$_;
- }
- else {
- push @ret,encode_utf8_safely($_);
- }
- }
- --$depth;
- return @ret;
-}
-
-=head2 encode_utf8_safely
-
- $octets = encode_utf8_safely($string);
-
-Given a $string, returns the octet equivalent of $string if $string is
-in perl's internal encoding; otherwise returns $string.
-
-Silently returns REFs without encoding them. [If you want to deeply
-encode REFs, see encode_utf8_structure.]
-
-=cut
-
-
-sub encode_utf8_safely{
- my @ret;
- for my $r (@_) {
- if (not ref($r) and is_utf8($r)) {
- $r = encode_utf8($r);
- }
- push @ret,$r;
- }
- return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]);
-}
-
-=head2 convert_to_utf8
-
- $utf8 = convert_to_utf8("text","charset");
-
-=cut
-
-our %iconv_converters;
-
-sub convert_to_utf8 {
- my ($data,$charset) = @_;
- if (is_utf8($data)) {
- return encode_utf8($data);
- }
- $charset = uc($charset);
- if (not defined $iconv_converters{$charset}) {
- eval {
- $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
- die "Unable to create converter for '$charset'";
- };
- if ($@) {
- warn $@;
- # We weren't able to create the converter, so use Encode
- # instead
- return __fallback_convert_to_utf8($data,$charset);
- }
- # It shouldn't be necessary when converting to UTF8, but lets
- # allow for transliteration and silent discarding of broken
- # sequences
- eval {
- $iconv_converters{$charset}->set_attr("transliterate");
- $iconv_converters{$charset}->set_attr("discard_ilseq");
- };
- # This shouldn't fail on Debian systems; we're warning here
- # just in case we've made a mistake above. This warning should
- # probably be disabled on non-GNU libc systems.
- warn $@ if $@;
- }
- if (not defined $iconv_converters{$charset}) {
- warn "The converter for $charset wasn't created properly somehow!";
- return __fallback_convert_to_utf8($data,$charset);
- }
- my $converted_data = $iconv_converters{$charset}->convert($data);
- # if the conversion failed, retval will be undefined or perhaps
- # -1.
- if (not defined $iconv_converters{$charset}->retval() or
- $iconv_converters{$charset}->retval() < 0
- ) {
- # Fallback to encode, which will probably also fail.
- return __fallback_convert_to_utf8($data,$charset);
- }
- return $converted_data;
-}
-
-# Bug #61342 et al.
-# we're switching this to return UTF8 octets instead of perl's internal
-# encoding
-sub __Fallback_convert_to_utf8 {
- my ($data, $charset) = @_;
- # raw data just gets returned (that's the charset WordDecorder
- # uses when it doesn't know what to do)
- return $data if $charset eq 'raw';
- if (not defined $charset and not is_utf8($data)) {
- warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
- return $data;
- }
- # lets assume everything that doesn't have a charset is utf8
- $charset //= 'utf8';
- my $result;
- eval {
- $result = decode($charset,$data) unless is_utf8($data);
- $result = encode_utf8($result);
- };
- if ($@) {
- warn "Unable to decode charset; '$charset' and '$data': $@";
- return $data;
- }
- return $result;
-}
-
-
-
1;
__END__
}
use Debbugs::Config qw(:config);
-use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions :utf8);
+use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions);
+use Debbugs::UTF8;
use Debbugs::Status qw(bug_archiveable :read :hook writebug new_bug splitpackages split_status_fields get_bug_status);
use Debbugs::CGI qw(html_escape);
use Debbugs::Log qw(:misc :write);
use List::MoreUtils qw(apply);
# for convert_to_utf8
-use Debbugs::Common qw(convert_to_utf8);
+use Debbugs::UTF8 qw(convert_to_utf8);
# for decode_rfc1522
use MIME::WordDecoder qw();
use Debbugs::Config qw(:config);
use Params::Validate qw(:types validate_with);
use Encode qw(encode is_utf8);
+use Debbugs::UTF8 qw(encode_utf8_safely);
use Debbugs::Packages;
my ($header,$body) = split /\n\n/, $message, 2;
$header = encode_rfc1522($header);
- return $header . qq(\n\n). $body;
+ return $header . qq(\n\n). encode_utf8_safely($body);
}
=head2 rfc822_date
use base qw(Exporter);
use Params::Validate qw(validate_with :types);
-use Debbugs::Common qw(:util :lock :quit :misc :utf8);
+use Debbugs::Common qw(:util :lock :quit :misc);
+use Debbugs::UTF8;
use Debbugs::Config qw(:config);
use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522);
use Debbugs::Packages qw(makesourceversions make_source_versions getversions get_versions binary_to_source);
--- /dev/null
+# This module is part of debbugs, and is released
+# under the terms of the GPL version 2, or any later
+# version at your option.
+# See the file README and COPYING for more information.
+#
+# Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
+
+package Debbugs::UTF8;
+
+=head1 NAME
+
+Debbugs::UTF8 -- Routines for handling conversion of charsets to UTF8
+
+=head1 SYNOPSIS
+
+use Debbugs::UTF8;
+
+
+=head1 DESCRIPTION
+
+This module contains routines which convert from various different
+charsets to UTF8.
+
+=head1 FUNCTIONS
+
+=cut
+
+use warnings;
+use strict;
+use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
+use base qw(Exporter);
+
+BEGIN{
+ $VERSION = 1.00;
+ $DEBUG = 0 unless defined $DEBUG;
+
+ %EXPORT_TAGS = (utf8 => [qw(encode_utf8_structure encode_utf8_safely),
+ qw(convert_to_utf8)],
+ );
+ @EXPORT = (@{$EXPORT_TAGS{utf8}});
+ @EXPORT_OK = ();
+ Exporter::export_ok_tags(keys %EXPORT_TAGS);
+ $EXPORT_TAGS{all} = [@EXPORT_OK];
+}
+
+use Carp;
+$Carp::Verbose = 1;
+
+use Encode qw(encode_utf8 is_utf8 decode);
+use Text::Iconv;
+use Storable qw(dclone);
+
+
+=head1 UTF-8
+
+These functions are exported with the :utf8 tag
+
+=head2 encode_utf8_structure
+
+ %newdata = encode_utf8_structure(%newdata);
+
+Takes a complex data structure and encodes any strings with is_utf8
+set into their constituent octets.
+
+=cut
+
+our $depth = 0;
+sub encode_utf8_structure {
+ ++$depth;
+ my @ret;
+ for my $_ (@_) {
+ if (ref($_) eq 'HASH') {
+ push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
+ }
+ elsif (ref($_) eq 'ARRAY') {
+ push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
+ }
+ elsif (ref($_)) {
+ # we don't know how to handle non hash or non arrays
+ push @ret,$_;
+ }
+ else {
+ push @ret,encode_utf8_safely($_);
+ }
+ }
+ --$depth;
+ return @ret;
+}
+
+=head2 encode_utf8_safely
+
+ $octets = encode_utf8_safely($string);
+
+Given a $string, returns the octet equivalent of $string if $string is
+in perl's internal encoding; otherwise returns $string.
+
+Silently returns REFs without encoding them. [If you want to deeply
+encode REFs, see encode_utf8_structure.]
+
+=cut
+
+
+sub encode_utf8_safely{
+ my @ret;
+ for my $r (@_) {
+ if (not ref($r) and is_utf8($r)) {
+ $r = encode_utf8($r);
+ }
+ push @ret,$r;
+ }
+ return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]);
+}
+
+=head2 convert_to_utf8
+
+ $utf8 = convert_to_utf8("text","charset");
+
+=cut
+
+our %iconv_converters;
+
+sub convert_to_utf8 {
+ my ($data,$charset) = @_;
+ if (is_utf8($data)) {
+ return encode_utf8($data);
+ }
+ $charset = uc($charset);
+ if ($charset eq 'RAW') {
+ return $data;
+ }
+ if (not defined $iconv_converters{$charset}) {
+ eval {
+ $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
+ die "Unable to create converter for '$charset'";
+ };
+ if ($@) {
+ warn $@;
+ # We weren't able to create the converter, so use Encode
+ # instead
+ return __fallback_convert_to_utf8($data,$charset);
+ }
+ }
+ if (not defined $iconv_converters{$charset}) {
+ warn "The converter for $charset wasn't created properly somehow!";
+ return __fallback_convert_to_utf8($data,$charset);
+ }
+ my $converted_data = $iconv_converters{$charset}->convert($data);
+ $converted_data = "$converted_data";
+ # if the conversion failed, retval will be undefined or perhaps
+ # -1.
+ my $retval = $iconv_converters{$charset}->retval();
+ if (not defined $retval or
+ $retval < 0
+ ) {
+ # Fallback to encode, which will probably also fail.
+ return __fallback_convert_to_utf8($data,$charset);
+ }
+ return $converted_data;
+}
+
+# Bug #61342 et al.
+# we're switching this to return UTF8 octets instead of perl's internal
+# encoding
+sub __fallback_convert_to_utf8 {
+ my ($data, $charset) = @_;
+ # raw data just gets returned (that's the charset WordDecorder
+ # uses when it doesn't know what to do)
+ return $data if $charset eq 'raw';
+ if (not defined $charset and not is_utf8($data)) {
+ warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
+ return $data;
+ }
+ # lets assume everything that doesn't have a charset is utf8
+ $charset //= 'utf8';
+ my $result;
+ eval {
+ $result = decode($charset,$data) unless is_utf8($data);
+ $result = encode_utf8($result);
+ };
+ if ($@) {
+ warn "Unable to decode charset; '$charset' and '$data': $@";
+ return $data;
+ }
+ return $result;
+}
+
+
+
+1;
+
+__END__