X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=Debbugs%2FCommon.pm;h=e892d701d411164e742ec47d0dd9242e445e0ce0;hb=07128905acc67b85d91ab2070dc172ac0152905f;hp=eb068edbf4bc589a586989fda55b0ba9678fbacb;hpb=c70c019a5771d994f0022309c2800115ea97163e;p=debbugs.git diff --git a/Debbugs/Common.pm b/Debbugs/Common.pm index eb068ed..e892d70 100644 --- a/Debbugs/Common.pm +++ b/Debbugs/Common.pm @@ -31,7 +31,7 @@ with equivalent (or better) functionality here. use warnings; use strict; use vars qw($VERSION $DEBUG %EXPORT_TAGS @EXPORT_OK @EXPORT); -use base qw(Exporter); +use Exporter qw(import); BEGIN{ $VERSION = 1.00; @@ -45,16 +45,15 @@ BEGIN{ qw(getpseudodesc), qw(package_maintainer), qw(sort_versions), + qw(open_compressed_file), ], misc => [qw(make_list globify_scalar english_join checkpid), qw(cleanup_eval_fail), qw(hash_slice), ], - utf8 => [qw(encode_utf8_structure encode_utf8_safely), - qw(convert_to_utf8)], date => [qw(secs_to_english)], quit => [qw(quit)], - lock => [qw(filelock unfilelock lockpid)], + lock => [qw(filelock unfilelock lockpid simple_filelock simple_unlockfile)], ); @EXPORT_OK = (); Exporter::export_ok_tags(keys %EXPORT_TAGS); @@ -72,13 +71,13 @@ use IO::Scalar; use Debbugs::MIME qw(decode_rfc1522); use Mail::Address; use Cwd qw(cwd); -use Encode qw(encode_utf8 is_utf8 decode); -use Text::Iconv; use Storable qw(dclone); +use Time::HiRes qw(usleep); use Params::Validate qw(validate_with :types); use Fcntl qw(:DEFAULT :flock); +use Encode qw(is_utf8 decode_utf8); our $DEBUG_FH = \*STDERR if not defined $DEBUG_FH; @@ -241,7 +240,40 @@ sub overwritefile { die "Unable to rename ${file}.new to $file: $!"; } +=head2 open_compressed_file + my $fh = open_compressed_file('foo.gz') or + die "Unable to open compressed file: $!"; + + +Opens a file; if the file ends in .gz, .xz, or .bz2, the appropriate +decompression program is forked and output from it is read. + +This routine by default opens the file with UTF-8 encoding; if you want some +other encoding, specify it with the second option. + +=cut +sub open_compressed_file { + my ($file,$encoding) = @_; + $encoding //= ':encoding(UTF-8)'; + my $fh; + my $mode = "<$encoding"; + my @opts; + if ($file =~ /\.gz$/) { + $mode = "-|$encoding"; + push @opts,'gzip','-dc'; + } + if ($file =~ /\.xz$/) { + $mode = "-|$encoding"; + push @opts,'xz','-dc'; + } + if ($file =~ /\.bz2$/) { + $mode = "-|$encoding"; + push @opts,'bzip2','-dc'; + } + open($fh,$mode,@opts,$file); + return $fh; +} @@ -366,7 +398,7 @@ sub package_maintainer { for my $fn (@config{('source_maintainer_file', 'source_maintainer_file_override', 'pseudo_maint_file')}) { - next unless defined $fn; + next unless defined $fn and length $fn; if (not -e $fn) { warn "Missing source maintainer file '$fn'"; next; @@ -382,7 +414,7 @@ sub package_maintainer { for my $fn (@config{('maintainer_file', 'maintainer_file_override', 'pseudo_maint_file')}) { - next unless defined $fn; + next unless defined $fn and length $fn; if (not -e $fn) { warn "Missing maintainer file '$fn'"; next; @@ -433,12 +465,12 @@ sub __add_to_hash { } $type //= 'address'; my $fh = IO::File->new($fn,'r') or - die "Unable to open $fn for reading: $!"; + croak "Unable to open $fn for reading: $!"; binmode($fh,':encoding(UTF-8)'); while (<$fh>) { chomp; - next unless m/^(\S+)\s+(\S.*\S)\s*$/; - my ($key,$value)=($1,$2); + next unless m/^(\S+)\s+(\S.*\S)\s*$/; + my ($key,$value)=($1,$2); $key = lc $key; $forward->{$key}= $value; if (defined $reverse) { @@ -473,7 +505,8 @@ sub getpseudodesc { return $_pseudodesc if defined $_pseudodesc; $_pseudodesc = {}; __add_to_hash($config{pseudo_desc_file},$_pseudodesc) if - defined $config{pseudo_desc_file}; + defined $config{pseudo_desc_file} and + length $config{pseudo_desc_file}; return $_pseudodesc; } @@ -576,35 +609,77 @@ sub filelock { confess "Locking already locked file: $lockfile\n".Data::Dumper->Dump([$lockfile,$locks],[qw(lockfile locks)]); } } - my ($count,$errors); - $count= 10; $errors= ''; - for (;;) { - my $fh = eval { + my ($fh,$t_lockfile,$errors) = + simple_filelock($lockfile,10,1); + if ($fh) { + push @filelocks, {fh => $fh, file => $lockfile}; + if (defined $locks) { + $locks->{locks}{$lockfile}++; + push @{$locks->{lockorder}},$lockfile; + } + } else { + use Data::Dumper; + croak "failed to get lock on $lockfile -- $errors". + (defined $locks?Data::Dumper->Dump([$locks],[qw(locks)]):''); + } +} + +=head2 simple_filelock + + my ($fh,$t_lockfile,$errors) = + simple_filelock($lockfile,$count,$wait); + +Does a flock of lockfile. If C<$count> is zero, does a blocking lock. +Otherwise, does a non-blocking lock C<$count> times, waiting C<$wait> +seconds in between. + +In list context, returns the lockfile filehandle, lockfile name, and +any errors which occured. + +When the lockfile filehandle is undef, locking failed. + +These lockfiles must be unlocked manually at process end. + + +=cut + +sub simple_filelock { + my ($lockfile,$count,$wait) = @_; + if (not defined $count) { + $count = 10; + } + if ($count < 0) { + $count = 0; + } + if (not defined $wait) { + $wait = 1; + } + my $errors= ''; + my $fh; + while (1) { + $fh = eval { my $fh2 = IO::File->new($lockfile,'w') or die "Unable to open $lockfile for writing: $!"; - flock($fh2,LOCK_EX|LOCK_NB) + # Do a blocking lock if count is zero + flock($fh2,LOCK_EX|($count == 0?0:LOCK_NB)) or die "Unable to lock $lockfile $!"; return $fh2; }; if ($@) { $errors .= $@; } - if ($fh) { - push @filelocks, {fh => $fh, file => $lockfile}; - if (defined $locks) { - $locks->{locks}{$lockfile}++; - push @{$locks->{lockorder}},$lockfile; - } - last; - } - if (--$count <=0) { - $errors =~ s/\n+$//; - use Data::Dumper; - croak "failed to get lock on $lockfile -- $errors". - (defined $locks?Data::Dumper->Dump([$locks],[qw(locks)]):''); + if ($fh) { + last; } -# sleep 10; + # use usleep for fractional wait seconds + usleep($wait * 1_000_000); + } continue { + last unless (--$count > 0); + } + if ($fh) { + return wantarray?($fh,$lockfile,$errors):$fh } + return wantarray?(undef,$lockfile,$errors):undef; } # clean up all outstanding locks at end time @@ -614,6 +689,23 @@ END { } } +=head2 simple_unlockfile + + simple_unlockfile($fh,$lockfile); + + +=cut + +sub simple_unlockfile { + my ($fh,$lockfile) = @_; + flock($fh,LOCK_UN) + or warn "Unable to unlock lockfile $lockfile: $!"; + close($fh) + or warn "Unable to close lockfile $lockfile: $!"; + unlink($lockfile) + or warn "Unable to unlink lockfile $lockfile: $!"; +} + =head2 unfilelock @@ -647,12 +739,7 @@ sub unfilelock { delete $locks->{locks}{$lockfile}; } my %fl = %{pop(@filelocks)}; - flock($fl{fh},LOCK_UN) - or warn "Unable to unlock lockfile $fl{file}: $!"; - close($fl{fh}) - or warn "Unable to close lockfile $fl{file}: $!"; - unlink($fl{file}) - or warn "Unable to unlink lockfile $fl{file}: $!"; + simple_unlockfile($fl{fh},$fl{file}); } @@ -820,6 +907,10 @@ Will carp if given a scalar which isn't a scalarref or a glob (or globref), and return /dev/null. May return undef if IO::Scalar or IO::File fails. (Check $!) +The scalar will fill with octets, not perl's internal encoding, so you +must use decode_utf8() after on the scalar, and encode_utf8() on it +before. This appears to be a bug in the underlying modules. + =cut sub globify_scalar { @@ -829,6 +920,10 @@ sub globify_scalar { if (defined ref($scalar)) { if (ref($scalar) eq 'SCALAR' and not UNIVERSAL::isa($scalar,'GLOB')) { + if (is_utf8(${$scalar})) { + ${$scalar} = decode_utf8(${$scalar}); + carp(q(\$scalar must not be in perl's internal encoding)); + } open $handle, '>:scalar:utf8', $scalar; return $handle; } @@ -843,7 +938,7 @@ sub globify_scalar { carp "Given a non-scalar reference, non-glob to globify_scalar; returning /dev/null handle"; } } - return IO::File->new('/dev/null','>:utf8'); + return IO::File->new('/dev/null','>:encoding(UTF-8)'); } =head2 cleanup_eval_fail() @@ -877,7 +972,7 @@ sub cleanup_eval_fail { # ditch the "at foo/bar/baz.pm line 5" $error =~ s/\sat\s\S+\sline\s\d+//; # ditch croak messages - $error =~ s/^\t+.+\n?//g; + $error =~ s/^\t+.+\n?//mg; # ditch trailing multiple periods in case there was a cascade of # die messages. $error =~ s/\.+$/\./; @@ -902,147 +997,6 @@ sub hash_slice(\%@) { } -=head1 UTF-8 - -These functions are exported with the :utf8 tag - -=head2 encode_utf8_structure - - %newdata = encode_utf8_structure(%newdata); - -Takes a complex data structure and encodes any strings with is_utf8 -set into their constituent octets. - -=cut - -our $depth = 0; -sub encode_utf8_structure { - ++$depth; - my @ret; - for my $_ (@_) { - if (ref($_) eq 'HASH') { - push @ret, {encode_utf8_structure(%{$depth == 1 ? dclone($_):$_})}; - } - elsif (ref($_) eq 'ARRAY') { - push @ret, [encode_utf8_structure(@{$depth == 1 ? dclone($_):$_})]; - } - elsif (ref($_)) { - # we don't know how to handle non hash or non arrays - push @ret,$_; - } - else { - push @ret,encode_utf8_safely($_); - } - } - --$depth; - return @ret; -} - -=head2 encode_utf8_safely - - $octets = encode_utf8_safely($string); - -Given a $string, returns the octet equivalent of $string if $string is -in perl's internal encoding; otherwise returns $string. - -Silently returns REFs without encoding them. [If you want to deeply -encode REFs, see encode_utf8_structure.] - -=cut - - -sub encode_utf8_safely{ - my @ret; - for my $r (@_) { - if (not ref($r) and is_utf8($r)) { - $r = encode_utf8($r); - } - push @ret,$r; - } - return wantarray ? @ret : (length @_ > 1 ? @ret : $_[0]); -} - -=head2 convert_to_utf8 - - $utf8 = convert_to_utf8("text","charset"); - -=cut - -our %iconv_converters; - -sub convert_to_utf8 { - my ($data,$charset) = @_; - if (is_utf8($data)) { - return encode_utf8($data); - } - $charset = uc($charset); - if (not defined $iconv_converters{$charset}) { - eval { - $iconv_converters{$charset} = Text::Iconv->new($charset,"UTF-8") or - die "Unable to create converter for '$charset'"; - }; - if ($@) { - warn $@; - # We weren't able to create the converter, so use Encode - # instead - return __fallback_convert_to_utf8($data,$charset); - } - # It shouldn't be necessary when converting to UTF8, but lets - # allow for transliteration and silent discarding of broken - # sequences - eval { - $iconv_converters{$charset}->set_attr("transliterate"); - $iconv_converters{$charset}->set_attr("discard_ilseq"); - }; - # This shouldn't fail on Debian systems; we're warning here - # just in case we've made a mistake above. This warning should - # probably be disabled on non-GNU libc systems. - warn $@ if $@; - } - if (not defined $iconv_converters{$charset}) { - warn "The converter for $charset wasn't created properly somehow!"; - return __fallback_convert_to_utf8($data,$charset); - } - my $converted_data = $iconv_converters{$charset}->convert($data); - # if the conversion failed, retval will be undefined or perhaps - # -1. - if (not defined $iconv_converters{$charset}->retval() or - $iconv_converters{$charset}->retval() < 0 - ) { - # Fallback to encode, which will probably also fail. - return __fallback_convert_to_utf8($data,$charset); - } - return $converted_data; -} - -# Bug #61342 et al. -# we're switching this to return UTF8 octets instead of perl's internal -# encoding -sub __Fallback_convert_to_utf8 { - my ($data, $charset) = @_; - # raw data just gets returned (that's the charset WordDecorder - # uses when it doesn't know what to do) - return $data if $charset eq 'raw'; - if (not defined $charset and not is_utf8($data)) { - warn ("Undefined charset, and string '$data' is not in perl's internal encoding"); - return $data; - } - # lets assume everything that doesn't have a charset is utf8 - $charset //= 'utf8'; - my $result; - eval { - $result = decode($charset,$data) unless is_utf8($data); - $result = encode_utf8($result); - }; - if ($@) { - warn "Unable to decode charset; '$charset' and '$data': $@"; - return $data; - } - return $result; -} - - - 1; __END__