X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FCommon.pm;h=e892d701d411164e742ec47d0dd9242e445e0ce0;hb=07128905acc67b85d91ab2070dc172ac0152905f;hp=eb068edbf4bc589a586989fda55b0ba9678fbacb;hpb=c70c019a5771d994f0022309c2800115ea97163e;p=debbugs.git

diff --git a/Debbugs/Common.pm b/Debbugs/Common.pm
index eb068ed..e892d70 100644
--- a/Debbugs/Common.pm
+++ b/Debbugs/Common.pm
@@ -31,7 +31,7 @@ with equivalent (or better) functionality here.
 use warnings;
 use strict;
 use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT);
-use base qw(Exporter);
+use Exporter qw(import);
 
 BEGIN{
      $VERSION = 1.00;
@@ -45,16 +45,15 @@ BEGIN{
 				qw(getpseudodesc),
 				qw(package_maintainer),
 				qw(sort_versions),
+				qw(open_compressed_file),
 			       ],
 		     misc   => [qw(make_list globify_scalar english_join checkpid),
 				qw(cleanup_eval_fail),
 				qw(hash_slice),
 			       ],
-		     utf8   => [qw(encode_utf8_structure encode_utf8_safely),
-                                qw(convert_to_utf8)],
 		     date   => [qw(secs_to_english)],
 		     quit   => [qw(quit)],
-		     lock   => [qw(filelock unfilelock lockpid)],
+		     lock   => [qw(filelock unfilelock lockpid simple_filelock simple_unlockfile)],
 		    );
      @EXPORT_OK = ();
      Exporter::export_ok_tags(keys %EXPORT_TAGS);
@@ -72,13 +71,13 @@ use IO::Scalar;
 use Debbugs::MIME qw(decode_rfc1522);
 use Mail::Address;
 use Cwd qw(cwd);
-use Encode qw(encode_utf8 is_utf8 decode);
-use Text::Iconv;
 use Storable qw(dclone);
+use Time::HiRes qw(usleep);
 
 use Params::Validate qw(validate_with :types);
 
 use Fcntl qw(:DEFAULT :flock);
+use Encode qw(is_utf8 decode_utf8);
 
 our $DEBUG_FH = \*STDERR if not defined $DEBUG_FH;
 
@@ -241,7 +240,40 @@ sub overwritefile {
 	    die "Unable to rename ${file}.new to $file: $!";
 }
 
+=head2 open_compressed_file
 
+     my $fh = open_compressed_file('foo.gz') or
+          die "Unable to open compressed file: $!";
+
+
+Opens a file; if the file ends in .gz, .xz, or .bz2, the appropriate
+decompression program is forked and output from it is read.
+
+This routine by default opens the file with UTF-8 encoding; if you want some
+other encoding, specify it with the second option.
+
+=cut
+sub open_compressed_file {
+    my ($file,$encoding) = @_;
+    $encoding //= ':encoding(UTF-8)';
+    my $fh;
+    my $mode = "<$encoding";
+    my @opts;
+    if ($file =~ /\.gz$/) {
+	$mode = "-|$encoding";
+	push @opts,'gzip','-dc';
+    }
+    if ($file =~ /\.xz$/) {
+	$mode = "-|$encoding";
+	push @opts,'xz','-dc';
+    }
+    if ($file =~ /\.bz2$/) {
+	$mode = "-|$encoding";
+	push @opts,'bzip2','-dc';
+    }
+    open($fh,$mode,@opts,$file);
+    return $fh;
+}
 
 
 
@@ -366,7 +398,7 @@ sub package_maintainer {
 	for my $fn (@config{('source_maintainer_file',
 			     'source_maintainer_file_override',
 			     'pseudo_maint_file')}) {
-	    next unless defined $fn;
+	    next unless defined $fn and length $fn;
 	    if (not -e $fn) {
 		warn "Missing source maintainer file '$fn'";
 		next;
@@ -382,7 +414,7 @@ sub package_maintainer {
 	for my $fn (@config{('maintainer_file',
 			     'maintainer_file_override',
 			     'pseudo_maint_file')}) {
-	    next unless defined $fn;
+	    next unless defined $fn and length $fn;
 	    if (not -e $fn) {
 		warn "Missing maintainer file '$fn'";
 		next;
@@ -433,12 +465,12 @@ sub __add_to_hash {
     }
     $type //= 'address';
     my $fh = IO::File->new($fn,'r') or
-	die "Unable to open $fn for reading: $!";
+	croak "Unable to open $fn for reading: $!";
     binmode($fh,':encoding(UTF-8)');
     while (<$fh>) {
 	chomp;
-	next unless m/^(\S+)\s+(\S.*\S)\s*$/;
-	my ($key,$value)=($1,$2);
+        next unless m/^(\S+)\s+(\S.*\S)\s*$/;
+        my ($key,$value)=($1,$2);
 	$key = lc $key;
 	$forward->{$key}= $value;
 	if (defined $reverse) {
@@ -473,7 +505,8 @@ sub getpseudodesc {
     return $_pseudodesc if defined $_pseudodesc;
     $_pseudodesc = {};
     __add_to_hash($config{pseudo_desc_file},$_pseudodesc) if
-	defined $config{pseudo_desc_file};
+	defined $config{pseudo_desc_file} and
+	length $config{pseudo_desc_file};
     return $_pseudodesc;
 }
 
@@ -576,35 +609,77 @@ sub filelock {
 	    confess "Locking already locked file: $lockfile\n".Data::Dumper->Dump([$lockfile,$locks],[qw(lockfile locks)]);
 	}
     }
-    my ($count,$errors);
-    $count= 10; $errors= '';
-    for (;;) {
-	my $fh = eval {
+    my ($fh,$t_lockfile,$errors) =
+        simple_filelock($lockfile,10,1);
+    if ($fh) {
+        push @filelocks, {fh => $fh, file => $lockfile};
+        if (defined $locks) {
+            $locks->{locks}{$lockfile}++;
+            push @{$locks->{lockorder}},$lockfile;
+        }
+    } else {
+        use Data::Dumper;
+        croak "failed to get lock on $lockfile -- $errors".
+            (defined $locks?Data::Dumper->Dump([$locks],[qw(locks)]):'');
+    }
+}
+
+=head2 simple_filelock
+
+    my ($fh,$t_lockfile,$errors) =
+        simple_filelock($lockfile,$count,$wait);
+
+Does a flock of lockfile. If C<$count> is zero, does a blocking lock.
+Otherwise, does a non-blocking lock C<$count> times, waiting C<$wait>
+seconds in between.
+
+In list context, returns the lockfile filehandle, lockfile name, and
+any errors which occured.
+
+When the lockfile filehandle is undef, locking failed.
+
+These lockfiles must be unlocked manually at process end.
+
+
+=cut
+
+sub simple_filelock {
+    my ($lockfile,$count,$wait) = @_;
+    if (not defined $count) {
+        $count = 10;
+    }
+    if ($count < 0) {
+        $count = 0;
+    }
+    if (not defined $wait) {
+        $wait = 1;
+    }
+    my $errors= '';
+    my $fh;
+    while (1) {
+        $fh = eval {
 	     my $fh2 = IO::File->new($lockfile,'w')
 		  or die "Unable to open $lockfile for writing: $!";
-	     flock($fh2,LOCK_EX|LOCK_NB)
+             # Do a blocking lock if count is zero
+	     flock($fh2,LOCK_EX|($count == 0?0:LOCK_NB))
 		  or die "Unable to lock $lockfile $!";
 	     return $fh2;
 	};
 	if ($@) {
 	     $errors .= $@;
 	}
-	if ($fh) {
-	     push @filelocks, {fh => $fh, file => $lockfile};
-	     if (defined $locks) {
-		 $locks->{locks}{$lockfile}++;
-		 push @{$locks->{lockorder}},$lockfile;
-	     }
-	     last;
-	}
-        if (--$count <=0) {
-            $errors =~ s/\n+$//;
-	    use Data::Dumper;
-            croak "failed to get lock on $lockfile -- $errors".
-		(defined $locks?Data::Dumper->Dump([$locks],[qw(locks)]):'');
+        if ($fh) {
+            last;
         }
-#        sleep 10;
+        # use usleep for fractional wait seconds
+        usleep($wait * 1_000_000);
+    } continue {
+        last unless (--$count > 0);
+    } 
+    if ($fh) {
+        return wantarray?($fh,$lockfile,$errors):$fh
     }
+    return wantarray?(undef,$lockfile,$errors):undef;
 }
 
 # clean up all outstanding locks at end time
@@ -614,6 +689,23 @@ END {
      }
 }
 
+=head2 simple_unlockfile
+
+     simple_unlockfile($fh,$lockfile);
+
+
+=cut
+
+sub simple_unlockfile {
+    my ($fh,$lockfile) = @_;
+    flock($fh,LOCK_UN)
+        or warn "Unable to unlock lockfile $lockfile: $!";
+    close($fh)
+        or warn "Unable to close lockfile $lockfile: $!";
+    unlink($lockfile)
+        or warn "Unable to unlink lockfile $lockfile: $!";
+}
+
 
 =head2 unfilelock
 
@@ -647,12 +739,7 @@ sub unfilelock {
 	delete $locks->{locks}{$lockfile};
     }
     my %fl = %{pop(@filelocks)};
-    flock($fl{fh},LOCK_UN)
-	 or warn "Unable to unlock lockfile $fl{file}: $!";
-    close($fl{fh})
-	 or warn "Unable to close lockfile $fl{file}: $!";
-    unlink($fl{file})
-	 or warn "Unable to unlink lockfile $fl{file}: $!";
+    simple_unlockfile($fl{fh},$fl{file});
 }
 
 
@@ -820,6 +907,10 @@ Will carp if given a scalar which isn't a scalarref or a glob (or
 globref), and return /dev/null. May return undef if IO::Scalar or
 IO::File fails. (Check $!)
 
+The scalar will fill with octets, not perl's internal encoding, so you
+must use decode_utf8() after on the scalar, and encode_utf8() on it
+before. This appears to be a bug in the underlying modules.
+
 =cut
 
 sub globify_scalar {
@@ -829,6 +920,10 @@ sub globify_scalar {
 	  if (defined ref($scalar)) {
 	       if (ref($scalar) eq 'SCALAR' and
 		   not UNIVERSAL::isa($scalar,'GLOB')) {
+                   if (is_utf8(${$scalar})) {
+                       ${$scalar} = decode_utf8(${$scalar});
+                       carp(q(\$scalar must not be in perl's internal encoding));
+                   }
 		    open $handle, '>:scalar:utf8', $scalar;
 		    return $handle;
 	       }
@@ -843,7 +938,7 @@ sub globify_scalar {
 	       carp "Given a non-scalar reference, non-glob to globify_scalar; returning /dev/null handle";
 	  }
      }
-     return IO::File->new('/dev/null','>:utf8');
+     return IO::File->new('/dev/null','>:encoding(UTF-8)');
 }
 
 =head2 cleanup_eval_fail()
@@ -877,7 +972,7 @@ sub cleanup_eval_fail {
     # ditch the "at foo/bar/baz.pm line 5"
     $error =~ s/\sat\s\S+\sline\s\d+//;
     # ditch croak messages
-    $error =~ s/^\t+.+\n?//g;
+    $error =~ s/^\t+.+\n?//mg;
     # ditch trailing multiple periods in case there was a cascade of
     # die messages.
     $error =~ s/\.+$/\./;
@@ -902,147 +997,6 @@ sub hash_slice(\%@) {
 }
 
 
-=head1 UTF-8
-
-These functions are exported with the :utf8 tag
-
-=head2 encode_utf8_structure
-
-     %newdata = encode_utf8_structure(%newdata);
-
-Takes a complex data structure and encodes any strings with is_utf8
-set into their constituent octets.
-
-=cut
-
-our $depth = 0;
-sub encode_utf8_structure {
-    ++$depth;
-    my @ret;
-    for my $_ (@_) {
-	if (ref($_) eq 'HASH') {
-	    push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})};
-	}
-	elsif (ref($_) eq 'ARRAY') {
-	    push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})];
-	}
-	elsif (ref($_)) {
-	    # we don't know how to handle non hash or non arrays
-	    push @ret,$_;
-	}
-	else {
-	    push @ret,encode_utf8_safely($_);
-	}
-    }
-    --$depth;
-    return @ret;
-}
-
-=head2 encode_utf8_safely
-
-     $octets = encode_utf8_safely($string);
-
-Given a $string, returns the octet equivalent of $string if $string is
-in perl's internal encoding; otherwise returns $string.
-
-Silently returns REFs without encoding them. [If you want to deeply
-encode REFs, see encode_utf8_structure.]
-
-=cut
-
-
-sub encode_utf8_safely{
-    my @ret;
-    for my $r (@_) {
-        if (not ref($r) and is_utf8($r)) {
-	    $r = encode_utf8($r);
-	}
-	push @ret,$r;
-    }
-    return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]);
-}
-
-=head2 convert_to_utf8
-
-    $utf8 = convert_to_utf8("text","charset");
-
-=cut
-
-our %iconv_converters;
-
-sub convert_to_utf8 {
-    my ($data,$charset) = @_;
-    if (is_utf8($data)) {
-        return encode_utf8($data);
-    }
-    $charset = uc($charset);
-    if (not defined $iconv_converters{$charset}) {
-        eval {
-            $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or
-                die "Unable to create converter for '$charset'";
-        };
-        if ($@) {
-            warn $@;
-            # We weren't able to create the converter, so use Encode
-            # instead
-            return __fallback_convert_to_utf8($data,$charset);
-        }
-        # It shouldn't be necessary when converting to UTF8, but lets
-        # allow for transliteration and silent discarding of broken
-        # sequences
-        eval {
-            $iconv_converters{$charset}->set_attr("transliterate");
-            $iconv_converters{$charset}->set_attr("discard_ilseq");
-        };
-        # This shouldn't fail on Debian systems; we're warning here
-        # just in case we've made a mistake above. This warning should
-        # probably be disabled on non-GNU libc systems.
-        warn $@ if $@;
-    }
-    if (not defined $iconv_converters{$charset}) {
-        warn "The converter for $charset wasn't created properly somehow!";
-        return __fallback_convert_to_utf8($data,$charset);
-    }
-    my $converted_data = $iconv_converters{$charset}->convert($data);
-    # if the conversion failed, retval will be undefined or perhaps
-    # -1.
-    if (not defined $iconv_converters{$charset}->retval() or
-        $iconv_converters{$charset}->retval() < 0
-       ) {
-        # Fallback to encode, which will probably also fail.
-        return __fallback_convert_to_utf8($data,$charset);
-    }
-    return $converted_data;
-}
-
-# Bug #61342 et al.
-# we're switching this to return UTF8 octets instead of perl's internal
-# encoding
-sub __Fallback_convert_to_utf8 {
-     my ($data, $charset) = @_;
-     # raw data just gets returned (that's the charset WordDecorder
-     # uses when it doesn't know what to do)
-     return $data if $charset eq 'raw';
-     if (not defined $charset and not is_utf8($data)) {
-         warn ("Undefined charset, and string '$data' is not in perl's internal encoding");
-         return $data;
-     }
-     # lets assume everything that doesn't have a charset is utf8
-     $charset //= 'utf8';
-     my $result;
-     eval {
-	 $result = decode($charset,$data) unless is_utf8($data);
-         $result = encode_utf8($result);
-     };
-     if ($@) {
-	  warn "Unable to decode charset; '$charset' and '$data': $@";
-	  return $data;
-     }
-     return $result;
-}
-
-
-
 1;
 
 __END__