$options = Maasha::Biopieces::parse_options(
[
- { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'use_score', short => 'u', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'use_log10', short => 'l', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'log10', short => 'l', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
]
);
{
$bed_file = $file_hash->{ $chr };
- $fixedstep_file = Maasha::UCSC::Wiggle::fixedstep_calc( $bed_file, $chr, $options->{ 'use_score' }, $options->{ 'use_log10' } );
+ $fixedstep_file = Maasha::UCSC::Wiggle::fixedstep_calc( $bed_file, $chr, $options->{ 'log10' } );
#$fixedstep_file = "$bed_file.fixedstep";
{
( $q_id, $mate ) = split "/", $record->{ 'Q_ID' }, 2;
- if ( $mate == 2 )
+ if ( $mate )
{
- if ( $record->{ 'STRAND' } eq '+' ) {
- $record->{ 'STRAND' } = '-';
- } else {
- $record->{ 'STRAND' } = '+';
+ if ( $mate == 2 )
+ {
+ if ( $record->{ 'STRAND' } eq '+' ) {
+ $record->{ 'STRAND' } = '-';
+ } else {
+ $record->{ 'STRAND' } = '+';
+ }
}
- }
- push @{ $data{ $record->{ 'S_ID' } }{ $record->{ 'STRAND' } }{ $q_id }{ $mate } }, $record;
+ push @{ $data{ $record->{ 'S_ID' } }{ $record->{ 'STRAND' } }{ $q_id }{ $mate } }, $record;
+ }
}
Maasha::Biopieces::put_record( $record, $out );
foreach $mate2 ( @mates2 )
{
$new_record = {
- S_ID => $contig,
- Q_ID1 => $mate1->{ 'Q_ID' },
- Q_ID2 => $mate2->{ 'Q_ID' },
+ S_ID => $contig,
+ Q_ID1 => $mate1->{ 'Q_ID' },
+ Q_ID2 => $mate2->{ 'Q_ID' },
+ S_BEG1 => $mate1->{ 'S_BEG' },
+ S_BEG2 => $mate2->{ 'S_BEG' },
+ S_END1 => $mate1->{ 'S_END' },
+ S_END2 => $mate2->{ 'S_END' },
+ SEQ_LEN1 => $mate1->{ 'SEQ_LEN' },
+ SEQ_LEN2 => $mate2->{ 'SEQ_LEN' },
+ STRAND => $mate1->{ 'STRAND' },
DIST => abs( $mate2->{ 'S_BEG' } - $mate1->{ 'S_BEG' } ),
};
__END__
-
-
-@records = sort { $a->{ 'S_ID' } cmp $b->{ 'S_ID' } or
- $a->{ 'STRAND' } cmp $b->{ 'STRAND' } or
- $a->{ 'Q_ID' } cmp $b->{ 'Q_ID' } or
- $a->{ 'S_BEG' } <=> $b->{ 'S_BEG' } } @records;
-
-print STDERR "done.\n" if $options->{ 'verbose' };
-
-# map { print join( "\t", $_->{ 'S_ID' }, $_->{ 'STRAND' }, $_->{ 'Q_ID' }, $_->{ 'S_BEG' } ), "\n" } @records; # DEBUG
-print Dumper( \@records );
-
-$i = 0;
-
-while ( $i < @records - 1 )
-{
- $j = $i + 1;
-
- while ( $j < @records and
- $records[ $i ]->{ 'S_ID' } eq $records[ $j ]->{ 'S_ID' } and
- $records[ $i ]->{ 'STRAND' } eq $records[ $j ]->{ 'STRAND' }
- )
- {
- ( $id1, $mate1 ) = split "/", $records[ $i ]->{ 'Q_ID' }, 2;
- ( $id2, $mate2 ) = split "/", $records[ $j ]->{ 'Q_ID' }, 2;
-
- print Dumper( $i, $j, $id1, $mate1, $id1, $mate2 );
-
- if ( $id1 eq $id2 and $mate1 == 1 and $mate2 == 2 )
- {
- print "Dist: " . ( $records[ $j ]->{ 'S_BEG' } - $records[ $i ]->{ 'S_BEG' } ) . "\n";
- }
-
- $j++;
- }
-
- $i = $j;
-}
use Data::Dumper;
use Storable qw( dclone );
use Maasha::Common;
+use Maasha::Filesys;
use Maasha::Calc;
use vars qw ( @ISA @EXPORT );
use Exporter;
my ( $elem );
foreach $elem ( @{ $list } ) {
- return 0 if not $elem =~ /^\d+$/; # how about scientific notation ala 123.2312e-03 ?
+ return 0 if not Maasha::Calc::is_a_number( $elem );
}
return 1;
}
+sub list_shrink
+{
+ # Martin A. Hansen, September 2009.
+
+ # Shrinks a list of values to a specified size
+ # and at the same time average the values.
+
+ my ( $list,
+ $new_size,
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $old_size, $bucket_size, $i, @new_list );
+
+ $old_size = scalar @{ $list };
+
+ Maasha::Common::error( qq(can't shrink to a bigger list: $old_size < $new_size ) ) if $old_size < $new_size;
+
+ $bucket_size = int( $old_size / $new_size );
+
+ for ( $i = 0; $i < $old_size - $bucket_size + 1; $i += $bucket_size ) {
+ push @new_list, Maasha::Calc::mean( [ splice @{ $list }, $i, $bucket_size ] );
+ }
+
+ $list = \@new_list;
+}
+
+
sub list_uniq
{
# Martin A. Hansen, April 2007.
# print "num->$num low->$low high->$high try->$try int1->$matrix->[ $try ]->[ $col1 ] int2->$matrix->[ $try ]->[ $col2 ]\n";
- if ( $num < $matrix->[ $try ]->[ $col1 ] )
- {
+ if ( $num < $matrix->[ $try ]->[ $col1 ] ) {
$high = $try;
- }
- elsif ( $num > $matrix->[ $try ]->[ $col2 ] )
- {
+ } elsif ( $num > $matrix->[ $try ]->[ $col2 ] ) {
$low = $try + 1;
- }
- else
- {
+ } else {
return $try;
}
}
# print "num->$num low->$low high->$high try->$try int->$list->[ $try ]\n";
- if ( $num < $list->[ $try ] )
- {
+ if ( $num < $list->[ $try ] ) {
$high = $try;
- }
- elsif ( $num > $list->[ $try ] )
- {
+ } elsif ( $num > $list->[ $try ] ) {
$low = $try + 1;
- }
- else
- {
+ } else {
return $try;
}
}
$delimiter ||= "\t";
- $fh = Maasha::Common::read_open( $path );
+ $fh = Maasha::Filesys::file_read_open( $path );
while ( $line = <$fh> )
{
my ( $fh, $row );
- $fh = Maasha::Common::write_open( $path ) if $path;
+ $fh = Maasha::Filesys::file_write_open( $path ) if $path;
$delimiter ||= "\t";
$matrix, # data structure
) = @_;
- Maasha::Common::file_store( $path, $matrix );
+ Maasha::Filesys::file_store( $path, $matrix );
}
my ( $path, # full path to file
) = @_;
- my $matrix = Maasha::Common::file_retrieve( $path );
+ my $matrix = Maasha::Filesys::file_retrieve( $path );
return wantarray ? @{ $matrix } : $matrix;
}
my ( $bed_file, # path to BED file
$chr, # chromosome name
- $use_score, # flag indicating that the score field should be used - OPTIONAL
- $use_log10, # flag indicating that the score should be in log10 - OPTIONAL
+ $log10, # flag indicating that the score should be in log10 - OPTIONAL
) = @_;
# Returns a string
$fh_in = Maasha::Filesys::file_read_open( $bed_file );
- $array = fixedstep_calc_array( $fh_in, $use_score );
+ $array = fixedstep_calc_array( $fh_in );
close $fh_in;
for ( $i = $beg; $i < $end; $i++ )
{
- if ( $use_log10 ) {
+ if ( $log10 ) {
push @block, sprintf "%.4f", Maasha::Calc::log10( $array->[ $i ] );
} else {
push @block, $array->[ $i ];
# Martin A. Hansen, November 2008.
# Given a filehandle to a BED file for a single
- # chromosome fills an array with scores based on
- # clone count (or sequence read count) for the
- # begin position through the end position of each
- # BED entry.
+ # chromosome fills an array with SCORE for the
+ # begin/end positions of each BED entry.
- my ( $fh_in, # filehandle to BED file
- $use_score, # flag indicating that the score field should be used - OPTIONAL
+ my ( $fh_in, # filehandle to BED file
) = @_;
- # Returns a bitarray.
+ # Returns a bytearray.
- my ( $bed, $clones, @begs, @lens, $i, @array );
+ my ( $bed, $score, @begs, @lens, $i, @array );
while ( $bed = Maasha::UCSC::BED::bed_entry_get( $fh_in ) )
{
- if ( $use_score ) {
- $clones = $bed->[ score ];
- } elsif ( defined $bed->[ name ] and $bed->[ name ] =~ /_(\d+)$/ ) {
- $clones = $1;
+ if ( defined $bed->[ score ] ) {
+ $score = $bed->[ score ];
} else {
- $clones = 1;
+ $score = 1;
}
if ( $bed->[ blockCount ] and $bed->[ blockCount ] > 1 )
@lens = split ",", $bed->[ blockSizes ];
for ( $i = 0; $i < @begs; $i++ ) {
- map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 );
+ map { $array[ $_ ] += $score } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 );
}
}
else
{
- map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 );
+ map { $array[ $_ ] += $score } ( $bed->[ chromStart ] .. $bed->[ chromEnd ] - 1 );
}
}