move UTF8 routines out of Debbugs::Common and into Debbugs::UTF8 so Debbugs::Config...

author Don Armstrong <don@donarmstrong.com>

Fri, 1 Mar 2013 05:56:28 +0000 (21:56 -0800)

committer Don Armstrong <don@donarmstrong.com>

Fri, 1 Mar 2013 05:56:28 +0000 (21:56 -0800)
author Don Armstrong <don@donarmstrong.com>
Fri, 1 Mar 2013 05:56:28 +0000 (21:56 -0800)
committer Don Armstrong <don@donarmstrong.com>
Fri, 1 Mar 2013 05:56:28 +0000 (21:56 -0800)
diff --git a/Debbugs/Common.pm b/Debbugs/Common.pm

index eb068edbf4bc589a586989fda55b0ba9678fbacb..279893c135bb98a4fda11d8dbd652cb2ffc64e10 100644 (file)
--- a/Debbugs/Common.pm
+++ b/Debbugs/Common.pm
@@ -50,8 +50,6 @@ BEGIN{
                                 qw(cleanup_eval_fail),
                                 qw(hash_slice),
                                ],
-                    utf8   => [qw(encode_utf8_structure encode_utf8_safely),
-                                qw(convert_to_utf8)],
                      date   => [qw(secs_to_english)],
                      quit   => [qw(quit)],
                      lock   => [qw(filelock unfilelock lockpid)],
@@ -72,8 +70,6 @@ use IO::Scalar;
  use Debbugs::MIME qw(decode_rfc1522);
  use Mail::Address;
  use Cwd qw(cwd);
-use Encode qw(encode_utf8 is_utf8 decode);
-use Text::Iconv;
  use Storable qw(dclone);
  
  use Params::Validate qw(validate_with :types);
@@ -902,147 +898,6 @@ sub hash_slice(\%@) {
  }
  
  
-=head1 UTF-8
-
-These functions are exported with the :utf8 tag
-
-=head2 encode_utf8_structure
-
-     %newdata = encode_utf8_structure(%newdata);
-
-Takes a complex data structure and encodes any strings with is_utf8
-set into their constituent octets.
-
-=cut
-
-our $depth = 0;
-sub encode_utf8_structure {
-    ++$depth;
-    my @ret;
-    for my $_ (@_) {
-       if (ref($_) eq 'HASH') {
-           push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
-       }
-       elsif (ref($_) eq 'ARRAY') {
-           push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
-       }
-       elsif (ref($_)) {
-           # we don't know how to handle non hash or non arrays
-           push @ret,$_;
-       }
-       else {
-           push @ret,encode_utf8_safely($_);
-       }
-    }
-    --$depth;
-    return @ret;
-}
-
-=head2 encode_utf8_safely
-
-     $octets = encode_utf8_safely($string);
-
-Given a $string, returns the octet equivalent of $string if $string is
-in perl's internal encoding; otherwise returns $string.
-
-Silently returns REFs without encoding them. [If you want to deeply
-encode REFs, see encode_utf8_structure.]
-
-=cut
-
-
-sub encode_utf8_safely{
-    my @ret;
-    for my $r (@_) {
-        if (not ref($r) and is_utf8($r)) {
-           $r = encode_utf8($r);
-       }
-       push @ret,$r;
-    }
-    return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]);
-}
-
-=head2 convert_to_utf8
-
-    $utf8 = convert_to_utf8("text","charset");
-
-=cut
-
-our %iconv_converters;
-
-sub convert_to_utf8 {
-    my ($data,$charset) = @_;
-    if (is_utf8($data)) {
-        return encode_utf8($data);
-    }
-    $charset = uc($charset);
-    if (not defined $iconv_converters{$charset}) {
-        eval {
-            $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
-                die "Unable to create converter for '$charset'";
-        };
-        if ($@) {
-            warn $@;
-            # We weren't able to create the converter, so use Encode
-            # instead
-            return __fallback_convert_to_utf8($data,$charset);
-        }
-        # It shouldn't be necessary when converting to UTF8, but lets
-        # allow for transliteration and silent discarding of broken
-        # sequences
-        eval {
-            $iconv_converters{$charset}->set_attr("transliterate");
-            $iconv_converters{$charset}->set_attr("discard_ilseq");
-        };
-        # This shouldn't fail on Debian systems; we're warning here
-        # just in case we've made a mistake above. This warning should
-        # probably be disabled on non-GNU libc systems.
-        warn $@ if $@;
-    }
-    if (not defined $iconv_converters{$charset}) {
-        warn "The converter for $charset wasn't created properly somehow!";
-        return __fallback_convert_to_utf8($data,$charset);
-    }
-    my $converted_data = $iconv_converters{$charset}->convert($data);
-    # if the conversion failed, retval will be undefined or perhaps
-    # -1.
-    if (not defined $iconv_converters{$charset}->retval() or
-        $iconv_converters{$charset}->retval() < 0
-       ) {
-        # Fallback to encode, which will probably also fail.
-        return __fallback_convert_to_utf8($data,$charset);
-    }
-    return $converted_data;
-}
-
-# Bug #61342 et al.
-# we're switching this to return UTF8 octets instead of perl's internal
-# encoding
-sub __Fallback_convert_to_utf8 {
-     my ($data, $charset) = @_;
-     # raw data just gets returned (that's the charset WordDecorder
-     # uses when it doesn't know what to do)
-     return $data if $charset eq 'raw';
-     if (not defined $charset and not is_utf8($data)) {
-         warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
-         return $data;
-     }
-     # lets assume everything that doesn't have a charset is utf8
-     $charset //= 'utf8';
-     my $result;
-     eval {
-        $result = decode($charset,$data) unless is_utf8($data);
-         $result = encode_utf8($result);
-     };
-     if ($@) {
-         warn "Unable to decode charset; '$charset' and '$data': $@";
-         return $data;
-     }
-     return $result;
-}
-
-
-
  1;
  
  __END__
diff --git a/Debbugs/Control.pm b/Debbugs/Control.pm

index 3119ef8940bf9f32a5e2593e6f338b0ecc84c157..44463ba271189f4b9efd3c3f8a55df7797e4eb66 100644 (file)
--- a/Debbugs/Control.pm
+++ b/Debbugs/Control.pm
@@ -110,7 +110,8 @@ BEGIN{
  }
  
  use Debbugs::Config qw(:config);
-use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions :utf8);
+use Debbugs::Common qw(:lock buglog :misc get_hashname sort_versions);
+use Debbugs::UTF8;
  use Debbugs::Status qw(bug_archiveable :read :hook writebug new_bug splitpackages split_status_fields get_bug_status);
  use Debbugs::CGI qw(html_escape);
  use Debbugs::Log qw(:misc :write);
diff --git a/Debbugs/MIME.pm b/Debbugs/MIME.pm

index 8c7b0cd5568305e5deaf2aabea903379fc1ce8c4..482a30b4d182e530bbcd3b780a5479dc53f4cae4 100644 (file)
--- a/Debbugs/MIME.pm
+++ b/Debbugs/MIME.pm
@@ -55,7 +55,7 @@ use POSIX qw(strftime);
  use List::MoreUtils qw(apply);
  
  # for convert_to_utf8
-use Debbugs::Common qw(convert_to_utf8);
+use Debbugs::UTF8 qw(convert_to_utf8);
  
  # for decode_rfc1522
  use MIME::WordDecoder qw();
diff --git a/Debbugs/Mail.pm b/Debbugs/Mail.pm

index d26c860a612f5866576e56944e5c2c529e7eda0c..9fa282b1b53689447621513908d3092d57ebc65b 100644 (file)
--- a/Debbugs/Mail.pm
+++ b/Debbugs/Mail.pm
@@ -49,6 +49,7 @@ use Debbugs::MIME qw(encode_rfc1522);
  use Debbugs::Config qw(:config);
  use Params::Validate qw(:types validate_with);
  use Encode qw(encode is_utf8);
+use Debbugs::UTF8 qw(encode_utf8_safely);
  
  use Debbugs::Packages;
  
@@ -398,7 +399,7 @@ sub encode_headers{
  
       my ($header,$body) = split /\n\n/, $message, 2;
       $header = encode_rfc1522($header);
-     return $header . qq(\n\n). $body;
+     return $header . qq(\n\n). encode_utf8_safely($body);
  }
  
  =head2 rfc822_date
diff --git a/Debbugs/Status.pm b/Debbugs/Status.pm

index a2aeabef1879cfcaa578fa1c6cc8573d82e868dd..7d72c032ec9b68c8678bca33acd2eacfd7707748 100644 (file)
--- a/Debbugs/Status.pm
+++ b/Debbugs/Status.pm
@@ -37,7 +37,8 @@ use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
  use base qw(Exporter);
  
  use Params::Validate qw(validate_with :types);
-use Debbugs::Common qw(:util :lock :quit :misc :utf8);
+use Debbugs::Common qw(:util :lock :quit :misc);
+use Debbugs::UTF8;
  use Debbugs::Config qw(:config);
  use Debbugs::MIME qw(decode_rfc1522 encode_rfc1522);
  use Debbugs::Packages qw(makesourceversions make_source_versions getversions get_versions binary_to_source);
diff --git a/Debbugs/UTF8.pm b/Debbugs/UTF8.pm

new file mode 100644 (file)

index 0000000..e967340
--- /dev/null
+++ b/Debbugs/UTF8.pm
@@ -0,0 +1,191 @@
+# This module is part of debbugs, and is released
+# under the terms of the GPL version 2, or any later
+# version at your option.
+# See the file README and COPYING for more information.
+#
+# Copyright 2013 by Don Armstrong <don@donarmstrong.com>.
+
+package Debbugs::UTF8;
+
+=head1 NAME
+
+Debbugs::UTF8 -- Routines for handling conversion of charsets to UTF8
+
+=head1 SYNOPSIS
+
+use Debbugs::UTF8;
+
+
+=head1 DESCRIPTION
+
+This module contains routines which convert from various different
+charsets to UTF8.
+
+=head1 FUNCTIONS
+
+=cut
+
+use warnings;
+use strict;
+use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
+use base qw(Exporter);
+
+BEGIN{
+     $VERSION = 1.00;
+     $DEBUG = 0 unless defined $DEBUG;
+
+     %EXPORT_TAGS = (utf8   => [qw(encode_utf8_structure encode_utf8_safely),
+                                qw(convert_to_utf8)],
+                    );
+     @EXPORT = (@{$EXPORT_TAGS{utf8}});
+     @EXPORT_OK = ();
+     Exporter::export_ok_tags(keys %EXPORT_TAGS);
+     $EXPORT_TAGS{all} = [@EXPORT_OK];
+}
+
+use Carp;
+$Carp::Verbose = 1;
+
+use Encode qw(encode_utf8 is_utf8 decode);
+use Text::Iconv;
+use Storable qw(dclone);
+
+
+=head1 UTF-8
+
+These functions are exported with the :utf8 tag
+
+=head2 encode_utf8_structure
+
+     %newdata = encode_utf8_structure(%newdata);
+
+Takes a complex data structure and encodes any strings with is_utf8
+set into their constituent octets.
+
+=cut
+
+our $depth = 0;
+sub encode_utf8_structure {
+    ++$depth;
+    my @ret;
+    for my $_ (@_) {
+       if (ref($_) eq 'HASH') {
+           push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
+       }
+       elsif (ref($_) eq 'ARRAY') {
+           push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
+       }
+       elsif (ref($_)) {
+           # we don't know how to handle non hash or non arrays
+           push @ret,$_;
+       }
+       else {
+           push @ret,encode_utf8_safely($_);
+       }
+    }
+    --$depth;
+    return @ret;
+}
+
+=head2 encode_utf8_safely
+
+     $octets = encode_utf8_safely($string);
+
+Given a $string, returns the octet equivalent of $string if $string is
+in perl's internal encoding; otherwise returns $string.
+
+Silently returns REFs without encoding them. [If you want to deeply
+encode REFs, see encode_utf8_structure.]
+
+=cut
+
+
+sub encode_utf8_safely{
+    my @ret;
+    for my $r (@_) {
+        if (not ref($r) and is_utf8($r)) {
+           $r = encode_utf8($r);
+       }
+       push @ret,$r;
+    }
+    return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]);
+}
+
+=head2 convert_to_utf8
+
+    $utf8 = convert_to_utf8("text","charset");
+
+=cut
+
+our %iconv_converters;
+
+sub convert_to_utf8 {
+    my ($data,$charset) = @_;
+    if (is_utf8($data)) {
+        return encode_utf8($data);
+    }
+    $charset = uc($charset);
+    if ($charset eq 'RAW') {
+        return $data;
+    }
+    if (not defined $iconv_converters{$charset}) {
+        eval {
+            $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
+                die "Unable to create converter for '$charset'";
+        };
+        if ($@) {
+            warn $@;
+            # We weren't able to create the converter, so use Encode
+            # instead
+            return __fallback_convert_to_utf8($data,$charset);
+        }
+    }
+    if (not defined $iconv_converters{$charset}) {
+        warn "The converter for $charset wasn't created properly somehow!";
+        return __fallback_convert_to_utf8($data,$charset);
+    }
+    my $converted_data = $iconv_converters{$charset}->convert($data);
+    $converted_data = "$converted_data";
+    # if the conversion failed, retval will be undefined or perhaps
+    # -1.
+    my $retval = $iconv_converters{$charset}->retval();
+    if (not defined $retval or
+        $retval < 0
+       ) {
+        # Fallback to encode, which will probably also fail.
+        return __fallback_convert_to_utf8($data,$charset);
+    }
+    return $converted_data;
+}
+
+# Bug #61342 et al.
+# we're switching this to return UTF8 octets instead of perl's internal
+# encoding
+sub __fallback_convert_to_utf8 {
+     my ($data, $charset) = @_;
+     # raw data just gets returned (that's the charset WordDecorder
+     # uses when it doesn't know what to do)
+     return $data if $charset eq 'raw';
+     if (not defined $charset and not is_utf8($data)) {
+         warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
+         return $data;
+     }
+     # lets assume everything that doesn't have a charset is utf8
+     $charset //= 'utf8';
+     my $result;
+     eval {
+        $result = decode($charset,$data) unless is_utf8($data);
+         $result = encode_utf8($result);
+     };
+     if ($@) {
+         warn "Unable to decode charset; '$charset' and '$data': $@";
+         return $data;
+     }
+     return $result;
+}
+
+
+
+1;
+
+__END__
author	Don Armstrong <don@donarmstrong.com>
	Fri, 1 Mar 2013 05:56:28 +0000 (21:56 -0800)
committer	Don Armstrong <don@donarmstrong.com>
	Fri, 1 Mar 2013 05:56:28 +0000 (21:56 -0800)
Debbugs/Common.pm		patch \| blob \| history
Debbugs/Control.pm		patch \| blob \| history
Debbugs/MIME.pm		patch \| blob \| history
Debbugs/Mail.pm		patch \| blob \| history
Debbugs/Status.pm		patch \| blob \| history
Debbugs/UTF8.pm	[new file with mode: 0644]	patch \| blob