From: martinahansen Date: Tue, 2 Jun 2009 15:14:14 +0000 (+0000) Subject: 3 more again X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=b3ff97ce02fe6ec5c1f81c42f3759c7c85c8e3f4;p=biopieces.git 3 more again git-svn-id: http://biopieces.googlecode.com/svn/trunk@467 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/read_psl b/bp_bin/read_psl index fdf5bd2..e2aa437 100755 --- a/bp_bin/read_psl +++ b/bp_bin/read_psl @@ -1,6 +1,97 @@ -#!/usr/bin/env perl +#!/usr/bin/env perl -w + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read PSL data (BLAT's native output) from one or more files. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use warnings; use strict; +use Maasha::Biopieces; +use Maasha::Filesys; +use Maasha::UCSC::PSL; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $run_time_beg, $run_time_end, $options, $in, $out, $record, $data_in, $num, $entry ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) { + Maasha::Biopieces::put_record( $record, $out ); +} + +if ( $options->{ 'data_in' } ) +{ + $data_in = Maasha::Filesys::files_read_open( $options->{ 'data_in' } ); + + $num = 1; + + while ( $record = Maasha::UCSC::PSL::psl_get_entry( $data_in ) ) + { + Maasha::Biopieces::put_record( $record, $out ); + + last if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + + close $data_in; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + $run_time_beg = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::log_biopiece(); +} + +END +{ + Maasha::Biopieces::close_stream( $in ); + Maasha::Biopieces::close_stream( $out ); + + $run_time_end = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options ); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use Maasha::BioRun; +__END__ diff --git a/bp_bin/read_soft b/bp_bin/read_soft index fdf5bd2..f83aab7 100755 --- a/bp_bin/read_soft +++ b/bp_bin/read_soft @@ -1,6 +1,131 @@ -#!/usr/bin/env perl +#!/usr/bin/env perl -w + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read SOFT entries from one or more files. +# http://www.ncbi.nlm.nih.gov/geo/info/soft2.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use warnings; use strict; +use Maasha::Biopieces; +use Maasha::Filesys; +use Maasha::NCBI; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $run_time_beg, $run_time_end, $options, $in, $out, $record, $data_in, $num, $entry, $records, $soft_index, + $fh, @platforms, $plat_table, @samples, $sample, $old_end, $skip, $file ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'samples', short => 's', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) { + Maasha::Biopieces::put_record( $record, $out ); +} + +$num = 1; + +foreach $file ( @{ $options->{ "data_in" } } ) +{ + print STDERR "Creating index for file: $file\n" if $options->{ "verbose" }; + + $soft_index = Maasha::NCBI::soft_index_file( $file ); + + $fh = Maasha::Filesys::file_read_open( $file ); + + @platforms = grep { $_->{ "SECTION" } =~ /PLATFORM/ } @{ $soft_index }; + + print STDERR "Getting platform tables for file: $file\n" if $options->{ "verbose" }; + + $plat_table = Maasha::NCBI::soft_get_platform( $fh, $platforms[ 0 ]->{ "LINE_BEG" }, $platforms[ -1 ]->{ "LINE_END" } ); + + @samples = grep { $_->{ "SECTION" } =~ /SAMPLE/ } @{ $soft_index }; + + $old_end = $platforms[ -1 ]->{ "LINE_END" }; + + foreach $sample ( @samples ) + { + $skip = 0; + $skip = 1 if ( $options->{ "samples" } and grep { $sample->{ "SECTION" } !~ /$_/ } @{ $options->{ "samples" } } ); + + print STDERR "Getting samples for dataset: $sample->{ 'SECTION' }\n" if $options->{ "verbose" } and not $skip; + + $records = Maasha::NCBI::soft_get_sample( $fh, $plat_table, $sample->{ "LINE_BEG" } - $old_end - 1, $sample->{ "LINE_END" } - $old_end - 1, $skip ); + + foreach $record ( @{ $records } ) + { + Maasha::Biopieces::put_record( $record, $out ); + + goto NUM if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + + $old_end = $sample->{ "LINE_END" }; + } + + close $fh; +} + +NUM: + +close $data_in if $data_in; +close $fh if $fh; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + $run_time_beg = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::log_biopiece(); +} + +END +{ + Maasha::Biopieces::close_stream( $in ); + Maasha::Biopieces::close_stream( $out ); + + $run_time_end = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options ); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use Maasha::BioRun; +__END__ diff --git a/bp_bin/read_solexa b/bp_bin/read_solexa index fdf5bd2..34fd6e9 100755 --- a/bp_bin/read_solexa +++ b/bp_bin/read_solexa @@ -1,6 +1,117 @@ -#!/usr/bin/env perl +#!/usr/bin/env perl -w + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Read Solexa entries from one or more files. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use warnings; use strict; +use Maasha::Biopieces; +use Maasha::Filesys; +use Maasha::Solexa; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $run_time_beg, $run_time_end, $options, $in, $out, $record, $data_in, $num, $entry, @seqs, @scores, $i ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, + { long => 'format', short => 'f', type => 'string', mandatory => 'no', default => 'octal', allowed => 'octal,decimal', disallowed => undef }, + { long => 'quality', short => 'q', type => 'uint', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) { + Maasha::Biopieces::put_record( $record, $out ); +} + +if ( $options->{ 'data_in' } ) +{ + $data_in = Maasha::Filesys::files_read_open( $options->{ 'data_in' } ); + + $num = 1; + + if ( $options->{ "format" } eq "octal" ) + { + while ( $entry = Maasha::Solexa::solexa_get_entry_octal( $data_in ) ) + { + $record = Maasha::Solexa::solexa2biopiece( $entry, $options->{ "quality" } ); + + Maasha::Biopieces::put_record( $record, $out ); + + last if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + } + else + { + while ( $entry = Maasha::Solexa::solexa_get_entry_decimal( $data_in ) ) + { + $record = Maasha::Solexa::solexa2biopiece( $entry, $options->{ "quality" } ); + + Maasha::Biopieces::put_record( $record, $out ); + + last if $options->{ "num" } and $num == $options->{ "num" }; + + $num++; + } + } + + close $data_in; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + $run_time_beg = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::log_biopiece(); +} + +END +{ + Maasha::Biopieces::close_stream( $in ); + Maasha::Biopieces::close_stream( $out ); + + $run_time_end = Maasha::Biopieces::run_time(); + + Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options ); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use Maasha::BioRun; +__END__ diff --git a/code_perl/Maasha/BioRun.pm b/code_perl/Maasha/BioRun.pm index 7dbd694..dbc5e8b 100644 --- a/code_perl/Maasha/BioRun.pm +++ b/code_perl/Maasha/BioRun.pm @@ -111,11 +111,8 @@ sub run_script $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); if ( $script eq "print_usage" ) { script_print_usage( $in, $out, $options ) } - elsif ( $script eq "read_psl" ) { script_read_psl( $in, $out, $options ) } elsif ( $script eq "read_stockholm" ) { script_read_stockholm( $in, $out, $options ) } elsif ( $script eq "read_phastcons" ) { script_read_phastcons( $in, $out, $options ) } - elsif ( $script eq "read_soft" ) { script_read_soft( $in, $out, $options ) } - elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) } elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) } elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) } elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) } @@ -155,13 +152,6 @@ sub get_options data_in|i=s ); } - elsif ( $script eq "read_psl" ) - { - @options = qw( - data_in|i=s - num|n=s - ); - } elsif ( $script eq "read_stockholm" ) { @options = qw( @@ -180,23 +170,6 @@ sub get_options gap|g=s ); } - elsif ( $script eq "read_soft" ) - { - @options = qw( - data_in|i=s - samples|s=s - num|n=s - ); - } - elsif ( $script eq "read_solexa" ) - { - @options = qw( - data_in|i=s - num|n=s - format|f=s - quality|q=s - ); - } elsif ( $script eq "read_solid" ) { @options = qw( @@ -380,10 +353,6 @@ sub get_options { Maasha::Common::error( qq(Argument to --$opt must be AandB, AorB, BorA, AnotB, or BnotA - not "$options{ $opt }") ); } - elsif ( $opt eq "format" and $script eq "read_solexa" and $options{ $opt } !~ /octal|decimal/ ) - { - Maasha::Common::error( qq(Argument to --$opt must be octal or decimal - not "$options{ $opt }") ); - } } Maasha::Common::error( qq(no --database specified) ) if $script eq "remove_ucsc_tracks" and not $options{ "database" }; @@ -442,45 +411,6 @@ sub script_print_usage } -sub script_read_psl -{ - # Martin A. Hansen, August 2007. - - # Read psl table from stream or file. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $record, $file, $data_in, $num ); - - while ( $record = Maasha::Biopieces::get_record( $in ) ) { - Maasha::Biopieces::put_record( $record, $out ); - } - - $num = 1; - - foreach $file ( @{ $options->{ "files" } } ) - { - $data_in = Maasha::Common::read_open( $file ); - - while ( $record = Maasha::UCSC::psl_get_entry( $data_in ) ) - { - Maasha::Biopieces::put_record( $record, $out ); - - goto NUM if $options->{ "num" } and $num == $options->{ "num" }; - - $num++; - } - } - - NUM: -} - - sub script_read_stockholm { # Martin A. Hansen, August 2007. @@ -602,141 +532,6 @@ sub script_read_phastcons } -sub script_read_soft -{ - # Martin A. Hansen, December 2007. - - # Read soft format. - # http://www.ncbi.nlm.nih.gov/geo/info/soft2.html - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $data_in, $file, $num, $records, $record, $soft_index, $fh, @platforms, $plat_table, @samples, $sample, $old_end, $skip ); - - while ( $record = Maasha::Biopieces::get_record( $in ) ) { - Maasha::Biopieces::put_record( $record, $out ); - } - - $num = 1; - - foreach $file ( @{ $options->{ "files" } } ) - { - print STDERR "Creating index for file: $file\n" if $options->{ "verbose" }; - - $soft_index = Maasha::NCBI::soft_index_file( $file ); - - $fh = Maasha::Common::read_open( $file ); - - @platforms = grep { $_->{ "SECTION" } =~ /PLATFORM/ } @{ $soft_index }; - - print STDERR "Getting platform tables for file: $file\n" if $options->{ "verbose" }; - - $plat_table = Maasha::NCBI::soft_get_platform( $fh, $platforms[ 0 ]->{ "LINE_BEG" }, $platforms[ -1 ]->{ "LINE_END" } ); - - @samples = grep { $_->{ "SECTION" } =~ /SAMPLE/ } @{ $soft_index }; - - $old_end = $platforms[ -1 ]->{ "LINE_END" }; - - foreach $sample ( @samples ) - { - $skip = 0; - $skip = 1 if ( $options->{ "samples" } and grep { $sample->{ "SECTION" } !~ /$_/ } @{ $options->{ "samples" } } ); - - print STDERR "Getting samples for dataset: $sample->{ 'SECTION' }\n" if $options->{ "verbose" } and not $skip; - - $records = Maasha::NCBI::soft_get_sample( $fh, $plat_table, $sample->{ "LINE_BEG" } - $old_end - 1, $sample->{ "LINE_END" } - $old_end - 1, $skip ); - - foreach $record ( @{ $records } ) - { - Maasha::Biopieces::put_record( $record, $out ); - - goto NUM if $options->{ "num" } and $num == $options->{ "num" }; - - $num++; - } - - $old_end = $sample->{ "LINE_END" }; - } - - close $fh; - } - - NUM: - - close $data_in if $data_in; - close $fh if $fh; -} - - -sub script_read_solexa -{ - # Martin A. Hansen, March 2008. - - # Read Solexa sequence reads from file. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $record, $file, $data_in, $entry, $num, @seqs, @scores, $i ); - - $options->{ "format" } ||= "octal"; - $options->{ "quality" } ||= 20; - - while ( $record = Maasha::Biopieces::get_record( $in ) ) { - Maasha::Biopieces::put_record( $record, $out ); - } - - $num = 1; - - foreach $file ( @{ $options->{ "files" } } ) - { - $data_in = Maasha::Common::read_open( $file ); - - if ( $options->{ "format" } eq "octal" ) - { - while ( $entry = Maasha::Solexa::solexa_get_entry_octal( $data_in ) ) - { - $record = Maasha::Solexa::solexa2biopiece( $entry, $options->{ "quality" } ); - - Maasha::Biopieces::put_record( $record, $out ); - - goto NUM if $options->{ "num" } and $num == $options->{ "num" }; - - $num++; - } - } - else - { - while ( $entry = Maasha::Solexa::solexa_get_entry_decimal( $data_in ) ) - { - $record = Maasha::Solexa::solexa2biopiece( $entry, $options->{ "quality" } ); - - Maasha::Biopieces::put_record( $record, $out ); - - goto NUM if $options->{ "num" } and $num == $options->{ "num" }; - - $num++; - } - } - - close $data_in; - } - - NUM: - - close $data_in if $data_in; -} - - sub script_read_solid { # Martin A. Hansen, April 2008. diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index e4c275a..7445807 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -683,130 +683,6 @@ CREATE TABLE $table ( } -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -sub psl_get_entry -{ - # Martin A. Hansen, August 2008. - - # Reads PSL next entry from a PSL file and returns a record. - - my ( $fh, # file handle of PSL filefull path to PSL file - ) = @_; - - # Returns hashref. - - my ( $line, @fields, %record ); - - while ( $line = <$fh> ) - { - chomp $line; - - @fields = split "\t", $line; - - if ( scalar @fields == 21 ) - { - %record = ( - REC_TYPE => "PSL", - MATCHES => $fields[ 0 ], - MISMATCHES => $fields[ 1 ], - REPMATCHES => $fields[ 2 ], - NCOUNT => $fields[ 3 ], - QNUMINSERT => $fields[ 4 ], - QBASEINSERT => $fields[ 5 ], - SNUMINSERT => $fields[ 6 ], - SBASEINSERT => $fields[ 7 ], - STRAND => $fields[ 8 ], - Q_ID => $fields[ 9 ], - Q_LEN => $fields[ 10 ], - Q_BEG => $fields[ 11 ], - Q_END => $fields[ 12 ] - 1, - S_ID => $fields[ 13 ], - S_LEN => $fields[ 14 ], - S_BEG => $fields[ 15 ], - S_END => $fields[ 16 ] - 1, - BLOCK_COUNT => $fields[ 17 ], - BLOCK_LENS => $fields[ 18 ], - Q_BEGS => $fields[ 19 ], - S_BEGS => $fields[ 20 ], - ); - - $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" }; - $record{ "Q_SPAN" } = $fields[ 12 ] - $fields[ 11 ]; - $record{ "S_SPAN" } = $fields[ 16 ] - $fields[ 15 ]; - - return wantarray ? %record : \%record; - } - } - - return undef; -} - - -sub psl_get_entries -{ - # Martin A. Hansen, February 2008. - - # Reads PSL entries and returns a list of records. - - my ( $path, # full path to PSL file - ) = @_; - - # Returns hashref. - - my ( $fh, @lines, @fields, $i, %record, @records ); - - $fh = Maasha::Common::read_open( $path ); - - @lines = <$fh>; - - close $fh; - - chomp @lines; - - for ( $i = 5; $i < @lines; $i++ ) - { - @fields = split "\t", $lines[ $i ]; - - Maasha::Common::error( qq(Bad PSL format in file "$path") ) if not @fields == 21; - - undef %record; - - %record = ( - REC_TYPE => "PSL", - MATCHES => $fields[ 0 ], - MISMATCHES => $fields[ 1 ], - REPMATCHES => $fields[ 2 ], - NCOUNT => $fields[ 3 ], - QNUMINSERT => $fields[ 4 ], - QBASEINSERT => $fields[ 5 ], - SNUMINSERT => $fields[ 6 ], - SBASEINSERT => $fields[ 7 ], - STRAND => $fields[ 8 ], - Q_ID => $fields[ 9 ], - Q_LEN => $fields[ 10 ], - Q_BEG => $fields[ 11 ], - Q_END => $fields[ 12 ] - 1, - S_ID => $fields[ 13 ], - S_LEN => $fields[ 14 ], - S_BEG => $fields[ 15 ], - S_END => $fields[ 16 ] - 1, - BLOCK_COUNT => $fields[ 17 ], - BLOCK_LENS => $fields[ 18 ], - Q_BEGS => $fields[ 19 ], - S_BEGS => $fields[ 20 ], - ); - - $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" }; - - push @records, { %record }; - } - - return wantarray ? @records : \@records; -} - - sub psl_put_header { # Martin A. Hansen, September 2007. diff --git a/code_perl/Maasha/UCSC/PSL.pm b/code_perl/Maasha/UCSC/PSL.pm index 4dfaf84..75dfb59 100644 --- a/code_perl/Maasha/UCSC/PSL.pm +++ b/code_perl/Maasha/UCSC/PSL.pm @@ -189,8 +189,6 @@ sub psl_calc_score } - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<