+=head2 cleanup_eval_fail()
+
+ print "Something failed with: ".cleanup_eval_fail($@);
+
+Does various bits of cleanup on the failure message from an eval (or
+any other die message)
+
+Takes at most two options; the first is the actual failure message
+(usually $@ and defaults to $@), the second is the debug level
+(defaults to $DEBUG).
+
+If debug is non-zero, the code at which the failure occured is output.
+
+=cut
+
+sub cleanup_eval_fail {
+ my ($error,$debug) = @_;
+ if (not defined $error or not @_) {
+ $error = $@ // 'unknown reason';
+ }
+ if (@_ <= 1) {
+ $debug = $DEBUG // 0;
+ }
+ $debug = 0 if not defined $debug;
+
+ if ($debug > 0) {
+ return $error;
+ }
+ # ditch the "at foo/bar/baz.pm line 5"
+ $error =~ s/\sat\s\S+\sline\s\d+//;
+ # ditch croak messages
+ $error =~ s/^\t+.+\n?//g;
+ # ditch trailing multiple periods in case there was a cascade of
+ # die messages.
+ $error =~ s/\.+$/\./;
+ return $error;
+}
+
+=head2 hash_slice
+
+ hash_slice(%hash,qw(key1 key2 key3))
+
+For each key, returns matching values and keys of the hash if they exist
+
+=cut
+
+
+# NB: We use prototypes here SPECIFICALLY so that we can be passed a
+# hash without uselessly making a reference to first. DO NOT USE
+# PROTOTYPES USELESSLY ELSEWHERE.
+sub hash_slice(\%@) {
+ my ($hashref,@keys) = @_;
+ return map {exists $hashref->{$_}?($_,$hashref->{$_}):()} @keys;
+}
+
+
+=head1 UTF-8
+
+These functions are exported with the :utf8 tag
+
+=head2 encode_utf8_structure
+
+ %newdata = encode_utf8_structure(%newdata);
+
+Takes a complex data structure and encodes any strings with is_utf8
+set into their constituent octets.
+
+=cut
+
+our $depth = 0;
+sub encode_utf8_structure {
+ ++$depth;
+ my @ret;
+ for my $_ (@_) {
+ if (ref($_) eq 'HASH') {
+ push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
+ }
+ elsif (ref($_) eq 'ARRAY') {
+ push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
+ }
+ elsif (ref($_)) {
+ # we don't know how to handle non hash or non arrays
+ push @ret,$_;
+ }
+ else {
+ push @ret,encode_utf8_safely($_);
+ }
+ }
+ --$depth;
+ return @ret;
+}
+
+=head2 encode_utf8_safely
+
+ $octets = encode_utf8_safely($string);
+
+Given a $string, returns the octet equivalent of $string if $string is
+in perl's internal encoding; otherwise returns $string.
+
+Silently returns REFs without encoding them. [If you want to deeply
+encode REFs, see encode_utf8_structure.]
+
+=cut
+
+
+sub encode_utf8_safely{
+ my @ret;
+ for my $r (@_) {
+ if (not ref($r) and is_utf8($r)) {
+ $r = encode_utf8($r);
+ }
+ push @ret,$r;
+ }
+ return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]);
+}
+
+=head2 convert_to_utf8
+
+ $utf8 = convert_to_utf8("text","charset");
+
+=cut
+
+our %iconv_converters;
+
+sub convert_to_utf8 {
+ my ($data,$charset) = @_;
+ if (is_utf8($data)) {
+ return encode_utf8($data);
+ }
+ $charset = uc($charset);
+ if (not defined $iconv_converters{$charset}) {
+ eval {
+ $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
+ die "Unable to create converter for '$charset'";
+ };
+ if ($@) {
+ warn $@;
+ # We weren't able to create the converter, so use Encode
+ # instead
+ return __fallback_convert_to_utf8($data,$charset);
+ }
+ # It shouldn't be necessary when converting to UTF8, but lets
+ # allow for transliteration and silent discarding of broken
+ # sequences
+ eval {
+ $iconv_converters{$charset}->set_attr("transliterate");
+ $iconv_converters{$charset}->set_attr("discard_ilseq");
+ };
+ # This shouldn't fail on Debian systems; we're warning here
+ # just in case we've made a mistake above. This warning should
+ # probably be disabled on non-GNU libc systems.
+ warn $@ if $@;
+ }
+ if (not defined $iconv_converters{$charset}) {
+ warn "The converter for $charset wasn't created properly somehow!";
+ return __fallback_convert_to_utf8($data,$charset);
+ }
+ my $converted_data = $iconv_converters{$charset}->convert($data);
+ # if the conversion failed, retval will be undefined or perhaps
+ # -1.
+ if (not defined $iconv_converters{$charset}->retval() or
+ $iconv_converters{$charset}->retval() < 0
+ ) {
+ # Fallback to encode, which will probably also fail.
+ return __fallback_convert_to_utf8($data,$charset);
+ }
+ return $converted_data;
+}
+
+# Bug #61342 et al.
+# we're switching this to return UTF8 octets instead of perl's internal
+# encoding
+sub __Fallback_convert_to_utf8 {
+ my ($data, $charset) = @_;
+ # raw data just gets returned (that's the charset WordDecorder
+ # uses when it doesn't know what to do)
+ return $data if $charset eq 'raw';
+ if (not defined $charset and not is_utf8($data)) {
+ warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
+ return $data;
+ }
+ # lets assume everything that doesn't have a charset is utf8
+ $charset //= 'utf8';
+ my $result;
+ eval {
+ $result = decode($charset,$data) unless is_utf8($data);
+ $result = encode_utf8($result);
+ };
+ if ($@) {
+ warn "Unable to decode charset; '$charset' and '$data': $@";
+ return $data;
+ }
+ return $result;
+}
+
+