From: martinahansen Date: Mon, 21 Sep 2009 07:21:16 +0000 (+0000) Subject: cleaned up calc_fixedstep X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=1f312e4c394bca417990ee3b20353853ab1203cd;p=biopieces.git cleaned up calc_fixedstep git-svn-id: http://biopieces.googlecode.com/svn/trunk@679 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/calc_fixedstep b/bp_bin/calc_fixedstep index 17c3d0d..f39fd20 100755 --- a/bp_bin/calc_fixedstep +++ b/bp_bin/calc_fixedstep @@ -42,9 +42,8 @@ my ( $options, $in, $out, $record, $tmp_dir, $bed_file, $fh_in, $fh_out, $options = Maasha::Biopieces::parse_options( [ - { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'use_score', short => 'u', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'use_log10', short => 'l', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'log10', short => 'l', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); @@ -75,7 +74,7 @@ foreach $chr ( sort keys %{ $file_hash } ) { $bed_file = $file_hash->{ $chr }; - $fixedstep_file = Maasha::UCSC::Wiggle::fixedstep_calc( $bed_file, $chr, $options->{ 'use_score' }, $options->{ 'use_log10' } ); + $fixedstep_file = Maasha::UCSC::Wiggle::fixedstep_calc( $bed_file, $chr, $options->{ 'log10' } ); #$fixedstep_file = "$bed_file.fixedstep"; diff --git a/bp_bin/mate_pair_dist b/bp_bin/mate_pair_dist index d652679..0475f7d 100755 --- a/bp_bin/mate_pair_dist +++ b/bp_bin/mate_pair_dist @@ -52,16 +52,19 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) { ( $q_id, $mate ) = split "/", $record->{ 'Q_ID' }, 2; - if ( $mate == 2 ) + if ( $mate ) { - if ( $record->{ 'STRAND' } eq '+' ) { - $record->{ 'STRAND' } = '-'; - } else { - $record->{ 'STRAND' } = '+'; + if ( $mate == 2 ) + { + if ( $record->{ 'STRAND' } eq '+' ) { + $record->{ 'STRAND' } = '-'; + } else { + $record->{ 'STRAND' } = '+'; + } } - } - push @{ $data{ $record->{ 'S_ID' } }{ $record->{ 'STRAND' } }{ $q_id }{ $mate } }, $record; + push @{ $data{ $record->{ 'S_ID' } }{ $record->{ 'STRAND' } }{ $q_id }{ $mate } }, $record; + } } Maasha::Biopieces::put_record( $record, $out ); @@ -89,9 +92,16 @@ foreach $contig ( keys %data ) foreach $mate2 ( @mates2 ) { $new_record = { - S_ID => $contig, - Q_ID1 => $mate1->{ 'Q_ID' }, - Q_ID2 => $mate2->{ 'Q_ID' }, + S_ID => $contig, + Q_ID1 => $mate1->{ 'Q_ID' }, + Q_ID2 => $mate2->{ 'Q_ID' }, + S_BEG1 => $mate1->{ 'S_BEG' }, + S_BEG2 => $mate2->{ 'S_BEG' }, + S_END1 => $mate1->{ 'S_END' }, + S_END2 => $mate2->{ 'S_END' }, + SEQ_LEN1 => $mate1->{ 'SEQ_LEN' }, + SEQ_LEN2 => $mate2->{ 'SEQ_LEN' }, + STRAND => $mate1->{ 'STRAND' }, DIST => abs( $mate2->{ 'S_BEG' } - $mate1->{ 'S_BEG' } ), }; @@ -125,41 +135,3 @@ END __END__ - - -@records = sort { $a->{ 'S_ID' } cmp $b->{ 'S_ID' } or - $a->{ 'STRAND' } cmp $b->{ 'STRAND' } or - $a->{ 'Q_ID' } cmp $b->{ 'Q_ID' } or - $a->{ 'S_BEG' } <=> $b->{ 'S_BEG' } } @records; - -print STDERR "done.\n" if $options->{ 'verbose' }; - -# map { print join( "\t", $_->{ 'S_ID' }, $_->{ 'STRAND' }, $_->{ 'Q_ID' }, $_->{ 'S_BEG' } ), "\n" } @records; # DEBUG -print Dumper( \@records ); - -$i = 0; - -while ( $i < @records - 1 ) -{ - $j = $i + 1; - - while ( $j < @records and - $records[ $i ]->{ 'S_ID' } eq $records[ $j ]->{ 'S_ID' } and - $records[ $i ]->{ 'STRAND' } eq $records[ $j ]->{ 'STRAND' } - ) - { - ( $id1, $mate1 ) = split "/", $records[ $i ]->{ 'Q_ID' }, 2; - ( $id2, $mate2 ) = split "/", $records[ $j ]->{ 'Q_ID' }, 2; - - print Dumper( $i, $j, $id1, $mate1, $id1, $mate2 ); - - if ( $id1 eq $id2 and $mate1 == 1 and $mate2 == 2 ) - { - print "Dist: " . ( $records[ $j ]->{ 'S_BEG' } - $records[ $i ]->{ 'S_BEG' } ) . "\n"; - } - - $j++; - } - - $i = $j; -} diff --git a/code_perl/Maasha/Matrix.pm b/code_perl/Maasha/Matrix.pm index 641a9c2..4a29c4d 100644 --- a/code_perl/Maasha/Matrix.pm +++ b/code_perl/Maasha/Matrix.pm @@ -33,6 +33,7 @@ use strict; use Data::Dumper; use Storable qw( dclone ); use Maasha::Common; +use Maasha::Filesys; use Maasha::Calc; use vars qw ( @ISA @EXPORT ); use Exporter; @@ -838,7 +839,7 @@ sub list_check_numeric my ( $elem ); foreach $elem ( @{ $list } ) { - return 0 if not $elem =~ /^\d+$/; # how about scientific notation ala 123.2312e-03 ? + return 0 if not Maasha::Calc::is_a_number( $elem ); } return 1; @@ -905,6 +906,35 @@ sub list_check_sort } +sub list_shrink +{ + # Martin A. Hansen, September 2009. + + # Shrinks a list of values to a specified size + # and at the same time average the values. + + my ( $list, + $new_size, + ) = @_; + + # Returns nothing. + + my ( $old_size, $bucket_size, $i, @new_list ); + + $old_size = scalar @{ $list }; + + Maasha::Common::error( qq(can't shrink to a bigger list: $old_size < $new_size ) ) if $old_size < $new_size; + + $bucket_size = int( $old_size / $new_size ); + + for ( $i = 0; $i < $old_size - $bucket_size + 1; $i += $bucket_size ) { + push @new_list, Maasha::Calc::mean( [ splice @{ $list }, $i, $bucket_size ] ); + } + + $list = \@new_list; +} + + sub list_uniq { # Martin A. Hansen, April 2007. @@ -1076,16 +1106,11 @@ sub interval_search # print "num->$num low->$low high->$high try->$try int1->$matrix->[ $try ]->[ $col1 ] int2->$matrix->[ $try ]->[ $col2 ]\n"; - if ( $num < $matrix->[ $try ]->[ $col1 ] ) - { + if ( $num < $matrix->[ $try ]->[ $col1 ] ) { $high = $try; - } - elsif ( $num > $matrix->[ $try ]->[ $col2 ] ) - { + } elsif ( $num > $matrix->[ $try ]->[ $col2 ] ) { $low = $try + 1; - } - else - { + } else { return $try; } } @@ -1119,16 +1144,11 @@ sub list_search # print "num->$num low->$low high->$high try->$try int->$list->[ $try ]\n"; - if ( $num < $list->[ $try ] ) - { + if ( $num < $list->[ $try ] ) { $high = $try; - } - elsif ( $num > $list->[ $try ] ) - { + } elsif ( $num > $list->[ $try ] ) { $low = $try + 1; - } - else - { + } else { return $try; } } @@ -1159,7 +1179,7 @@ sub matrix_read $delimiter ||= "\t"; - $fh = Maasha::Common::read_open( $path ); + $fh = Maasha::Filesys::file_read_open( $path ); while ( $line = <$fh> ) { @@ -1193,7 +1213,7 @@ sub matrix_write my ( $fh, $row ); - $fh = Maasha::Common::write_open( $path ) if $path; + $fh = Maasha::Filesys::file_write_open( $path ) if $path; $delimiter ||= "\t"; @@ -1220,7 +1240,7 @@ sub matrix_store $matrix, # data structure ) = @_; - Maasha::Common::file_store( $path, $matrix ); + Maasha::Filesys::file_store( $path, $matrix ); } @@ -1233,7 +1253,7 @@ sub matrix_retrive my ( $path, # full path to file ) = @_; - my $matrix = Maasha::Common::file_retrieve( $path ); + my $matrix = Maasha::Filesys::file_retrieve( $path ); return wantarray ? @{ $matrix } : $matrix; } diff --git a/code_perl/Maasha/UCSC/Wiggle.pm b/code_perl/Maasha/UCSC/Wiggle.pm index b9a9dd9..374f9f2 100644 --- a/code_perl/Maasha/UCSC/Wiggle.pm +++ b/code_perl/Maasha/UCSC/Wiggle.pm @@ -198,8 +198,7 @@ sub fixedstep_calc my ( $bed_file, # path to BED file $chr, # chromosome name - $use_score, # flag indicating that the score field should be used - OPTIONAL - $use_log10, # flag indicating that the score should be in log10 - OPTIONAL + $log10, # flag indicating that the score should be in log10 - OPTIONAL ) = @_; # Returns a string @@ -210,7 +209,7 @@ sub fixedstep_calc $fh_in = Maasha::Filesys::file_read_open( $bed_file ); - $array = fixedstep_calc_array( $fh_in, $use_score ); + $array = fixedstep_calc_array( $fh_in ); close $fh_in; @@ -224,7 +223,7 @@ sub fixedstep_calc for ( $i = $beg; $i < $end; $i++ ) { - if ( $use_log10 ) { + if ( $log10 ) { push @block, sprintf "%.4f", Maasha::Calc::log10( $array->[ $i ] ); } else { push @block, $array->[ $i ]; @@ -249,27 +248,22 @@ sub fixedstep_calc_array # Martin A. Hansen, November 2008. # Given a filehandle to a BED file for a single - # chromosome fills an array with scores based on - # clone count (or sequence read count) for the - # begin position through the end position of each - # BED entry. + # chromosome fills an array with SCORE for the + # begin/end positions of each BED entry. - my ( $fh_in, # filehandle to BED file - $use_score, # flag indicating that the score field should be used - OPTIONAL + my ( $fh_in, # filehandle to BED file ) = @_; - # Returns a bitarray. + # Returns a bytearray. - my ( $bed, $clones, @begs, @lens, $i, @array ); + my ( $bed, $score, @begs, @lens, $i, @array ); while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in ) ) { - if ( $use_score ) { - $clones = $bed->[ score ]; - } elsif ( defined $bed->[ name ] and $bed->[ name ] =~ /_(\d+)$/ ) { - $clones = $1; + if ( defined $bed->[ score ] ) { + $score = $bed->[ score ]; } else { - $clones = 1; + $score = 1; } if ( $bed->[ blockCount ] and $bed->[ blockCount ] > 1 ) @@ -278,12 +272,12 @@ sub fixedstep_calc_array @lens = split ",", $bed->[ blockSizes ]; for ( $i = 0; $i < @begs; $i++ ) { - map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 ); + map { $array[ $_ ] += $score } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 ); } } else { - map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 ); + map { $array[ $_ ] += $score } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 ); } }