From 85603673e563a97338f0a69656e4fb8158761ebf Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 11 Dec 2008 08:06:42 +0000 Subject: [PATCH] lots of PSL related changes git-svn-id: http://biopieces.googlecode.com/svn/trunk@356 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/UCSC.pm | 121 ++++++++++++++++---------------- code_perl/Maasha/UCSC/BED.pm | 25 +++++-- code_perl/Maasha/UCSC/PSL.pm | 48 ++++++------- code_perl/Maasha/UCSC/Wiggle.pm | 2 +- 4 files changed, 105 insertions(+), 91 deletions(-) diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 557053e..179b8f1 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -52,9 +52,9 @@ use constant { STRAND => 5, THICK_BEG => 6, THICK_END => 7, - ITEMRGB => 8, - BLOCKCOUNT => 9, - BLOCKSIZES => 10, + COLOR => 8, + BLOCK_COUNT => 9, + BLOCK_LENS => 10, Q_BEGS => 11, }; @@ -171,18 +171,18 @@ sub bed_get_entry elsif ( $columns == 12 ) { %entry = ( - "CHR" => $fields[ 0 ], - "CHR_BEG" => $fields[ 1 ], - "CHR_END" => $fields[ 2 ] - 1, - "Q_ID" => $fields[ 3 ], - "SCORE" => $fields[ 4 ], - "STRAND" => $fields[ 5 ], - "THICK_BEG" => $fields[ 6 ], - "THICK_END" => $fields[ 7 ] - 1, - "ITEMRGB" => $fields[ 8 ], - "BLOCKCOUNT" => $fields[ 9 ], - "BLOCKSIZES" => $fields[ 10 ], - "Q_BEGS" => $fields[ 11 ], + "CHR" => $fields[ 0 ], + "CHR_BEG" => $fields[ 1 ], + "CHR_END" => $fields[ 2 ] - 1, + "Q_ID" => $fields[ 3 ], + "SCORE" => $fields[ 4 ], + "STRAND" => $fields[ 5 ], + "THICK_BEG" => $fields[ 6 ], + "THICK_END" => $fields[ 7 ] - 1, + "COLOR" => $fields[ 8 ], + "BLOCK_COUNT" => $fields[ 9 ], + "BLOCK_LENS" => $fields[ 10 ], + "Q_BEGS" => $fields[ 11 ], ); } else @@ -322,9 +322,9 @@ sub bed_put_entry push @fields, $record->{ "STRAND" }; push @fields, $record->{ "THICK_BEG" } if defined $record->{ "THICK_BEG" }; push @fields, $record->{ "THICK_END" } + 1 if defined $record->{ "THICK_END" }; - push @fields, $record->{ "ITEMRGB" } if defined $record->{ "ITEMRGB" }; - push @fields, $record->{ "BLOCKCOUNT" } if defined $record->{ "BLOCKCOUNT" }; - push @fields, $record->{ "BLOCKSIZES" } if defined $record->{ "BLOCKSIZES" }; + push @fields, $record->{ "COLOR" } if defined $record->{ "COLOR" }; + push @fields, $record->{ "BLOCK_COUNT" } if defined $record->{ "BLOCK_COUNT" }; + push @fields, $record->{ "BLOCK_LENS" } if defined $record->{ "BLOCK_LENS" }; push @fields, $record->{ "Q_BEGS" } if defined $record->{ "Q_BEGS" }; } @@ -371,12 +371,12 @@ sub bed_analyze $intron_max = 0; $intron_min = 9999999999; - $entry->{ "EXONS" } = $entry->{ "BLOCKCOUNT" }; + $entry->{ "EXONS" } = $entry->{ "BLOCK_COUNT" }; @begs = split /,/, $entry->{ "Q_BEGS" }; - @lens = split /,/, $entry->{ "BLOCKSIZES" }; + @lens = split /,/, $entry->{ "BLOCK_LENS" }; - for ( $i = 0; $i < $entry->{ "BLOCKCOUNT" }; $i++ ) + for ( $i = 0; $i < $entry->{ "BLOCK_COUNT" }; $i++ ) { $exon_len = @lens[ $i ]; @@ -393,12 +393,12 @@ sub bed_analyze $entry->{ "EXON_MIN_LEN" } = $exon_min; $entry->{ "EXON_MEAN_LEN" } = int( $exon_tot / $entry->{ "EXONS" } ); - $entry->{ "INTRONS" } = $entry->{ "BLOCKCOUNT" } - 1; + $entry->{ "INTRONS" } = $entry->{ "BLOCK_COUNT" } - 1; $entry->{ "INTRONS" } = 0 if $entry->{ "INTRONS" } < 0; if ( $entry->{ "INTRONS" } ) { - for ( $i = 1; $i < $entry->{ "BLOCKCOUNT" }; $i++ ) + for ( $i = 1; $i < $entry->{ "BLOCK_COUNT" }; $i++ ) { $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] ); @@ -510,7 +510,7 @@ sub bed_merge_entries if ( exists $entries->[ $i ]->{ "Q_BEGS" } ) { @q_begs = split ",", $entries->[ $i ]->{ "Q_BEGS" }; - @blocksizes = split ",", $entries->[ $i ]->{ "BLOCKSIZES" }; + @blocksizes = split ",", $entries->[ $i ]->{ "BLOCK_LENS" }; } else { @@ -527,21 +527,21 @@ sub bed_merge_entries map { $_ -= $entries->[ 0 ]->{ "CHR_BEG" } } @new_q_begs; %new_entry = ( - CHR => $entries->[ 0 ]->{ "CHR" }, - CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" }, - CHR_END => $entries->[ -1 ]->{ "CHR_END" }, - REC_TYPE => "BED", - BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1, - BED_COLS => 12, - Q_ID => join( ":", @q_ids ), - SCORE => 999, - STRAND => $entries->[ 0 ]->{ "STRAND" } || "+", - THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" }, - THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" }, - ITEMRGB => "0,0,0", - BLOCKCOUNT => scalar @new_q_begs, - BLOCKSIZES => join( ",", @new_blocksizes ), - Q_BEGS => join( ",", @new_q_begs ), + CHR => $entries->[ 0 ]->{ "CHR" }, + CHR_BEG => $entries->[ 0 ]->{ "CHR_BEG" }, + CHR_END => $entries->[ -1 ]->{ "CHR_END" }, + REC_TYPE => "BED", + BED_LEN => $entries->[ -1 ]->{ "CHR_END" } - $entries->[ 0 ]->{ "CHR_BEG" } + 1, + BED_COLS => 12, + Q_ID => join( ":", @q_ids ), + SCORE => 999, + STRAND => $entries->[ 0 ]->{ "STRAND" } || "+", + THICK_BEG => $entries->[ 0 ]->{ "THICK_BEG" } || $entries->[ 0 ]->{ "CHR_BEG" }, + THICK_END => $entries->[ -1 ]->{ "THICK_END" } || $entries->[ -1 ]->{ "CHR_END" }, + COLOR => 0, + BLOCK_COUNT => scalar @new_q_begs, + BLOCK_LENS => join( ",", @new_blocksizes ), + Q_BEGS => join( ",", @new_q_begs ), ); return wantarray ? %new_entry : \%new_entry; @@ -562,10 +562,10 @@ sub bed_split_entry my ( @q_begs, @blocksizes, $block, @blocks, $i ); - if ( exists $entry->{ "BLOCKCOUNT" } ) + if ( exists $entry->{ "BLOCK_COUNT" } ) { @q_begs = split ",", $entry->{ "Q_BEGS" }; - @blocksizes = split ",", $entry->{ "BLOCKSIZES" }; + @blocksizes = split ",", $entry->{ "BLOCK_LENS" }; for ( $i = 0; $i < @q_begs; $i++ ) { @@ -726,14 +726,15 @@ sub psl_get_entry S_LEN => $fields[ 14 ], S_BEG => $fields[ 15 ], S_END => $fields[ 16 ] - 1, - BLOCKCOUNT => $fields[ 17 ], - BLOCKSIZES => $fields[ 18 ], + BLOCK_COUNT => $fields[ 17 ], + BLOCK_LENS => $fields[ 18 ], Q_BEGS => $fields[ 19 ], S_BEGS => $fields[ 20 ], ); $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" }; - $record{ "SPAN" } = $fields[ 12 ] - $fields[ 11 ]; + $record{ "Q_SPAN" } = $fields[ 12 ] - $fields[ 11 ]; + $record{ "S_SPAN" } = $fields[ 16 ] - $fields[ 15 ]; return wantarray ? %record : \%record; } @@ -791,8 +792,8 @@ sub psl_get_entries S_LEN => $fields[ 14 ], S_BEG => $fields[ 15 ], S_END => $fields[ 16 ] - 1, - BLOCKCOUNT => $fields[ 17 ], - BLOCKSIZES => $fields[ 18 ], + BLOCK_COUNT => $fields[ 17 ], + BLOCK_LENS => $fields[ 18 ], Q_BEGS => $fields[ 19 ], S_BEGS => $fields[ 20 ], ); @@ -859,8 +860,8 @@ sub psl_put_entry push @output, $record->{ "S_LEN" }; push @output, $record->{ "S_BEG" }; push @output, $record->{ "S_END" } + 1; - push @output, $record->{ "BLOCKCOUNT" }; - push @output, $record->{ "BLOCKSIZES" }; + push @output, $record->{ "BLOCK_COUNT" }; + push @output, $record->{ "BLOCK_LENS" }; push @output, $record->{ "Q_BEGS" }; push @output, $record->{ "S_BEGS" }; @@ -1446,18 +1447,18 @@ sub phastcons_parse_entry $lens[ -1 ]++; push @entries, { - CHR => $super_block->[ 0 ]->{ "CHR" }, - CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, - CHR_END => $super_block->[ -1 ]->{ "CHR_END" }, - Q_ID => "Q_ID", - SCORE => 100, - STRAND => "+", - THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, - THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1, - ITEMRGB => "0,200,100", - BLOCKCOUNT => scalar @{ $super_block }, - BLOCKSIZES => join( ",", @lens ), - Q_BEGS => join( ",", @begs ), + CHR => $super_block->[ 0 ]->{ "CHR" }, + CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, + CHR_END => $super_block->[ -1 ]->{ "CHR_END" }, + Q_ID => "Q_ID", + SCORE => 100, + STRAND => "+", + THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" }, + THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1, + COLOR => 0, + BLOCK_COUNT => scalar @{ $super_block }, + BLOCK_LENS => join( ",", @lens ), + Q_BEGS => join( ",", @begs ), }; undef @begs; diff --git a/code_perl/Maasha/UCSC/BED.pm b/code_perl/Maasha/UCSC/BED.pm index 2fde7c9..b259086 100644 --- a/code_perl/Maasha/UCSC/BED.pm +++ b/code_perl/Maasha/UCSC/BED.pm @@ -471,7 +471,7 @@ sub biopiece2bed # Returns arrayref. - my ( @bed_entry ); + my ( @bed_entry, @begs ); $cols ||= 12; # max number of columns possible @@ -525,18 +525,31 @@ sub biopiece2bed $bed_entry[ thickStart ] = $bp_record->{ "THICK_BEG" }; $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" }; } + else + { + $bed_entry[ thickStart ] = $bed_entry[ chromStart ]; + $bed_entry[ thickEnd ] = $bed_entry[ chromEnd ] - 1; + } return wantarray ? @bed_entry : \@bed_entry if $cols == 8; - if ( exists $bp_record->{ "COLOR" } ) { - $bed_entry[ itemRgb ] = $bp_record->{ "COLOR" }; - } else { - return wantarray ? @bed_entry : \@bed_entry; - } + $bed_entry[ itemRgb ] = $bp_record->{ "COLOR" } || 0; return wantarray ? @bed_entry : \@bed_entry if $cols == 9; if ( defined $bp_record->{ "BLOCK_COUNT" } and + defined $bp_record->{ "BLOCK_LENS" } and + defined $bp_record->{ "S_BEGS" } ) + { + @begs = split ",", $bp_record->{ "S_BEGS" }; + map { $_ -= $bed_entry[ chromStart ] } @begs; + + $bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" }; + $bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" }; + $bed_entry[ blockStarts ] = join ",", @begs; + $bed_entry[ thickEnd ]++; + } + elsif ( defined $bp_record->{ "BLOCK_COUNT" } and defined $bp_record->{ "BLOCK_LENS" } and defined $bp_record->{ "Q_BEGS" } ) { diff --git a/code_perl/Maasha/UCSC/PSL.pm b/code_perl/Maasha/UCSC/PSL.pm index 4c64565..a45006e 100644 --- a/code_perl/Maasha/UCSC/PSL.pm +++ b/code_perl/Maasha/UCSC/PSL.pm @@ -84,8 +84,8 @@ use constant { S_LEN => 14, S_BEG => 15, S_END => 16, - BLOCKCOUNT => 17, - BLOCKSIZES => 18, + BLOCK_COUNT => 17, + BLOCK_LENS => 18, Q_BEGS => 19, S_BEGS => 20, }; @@ -134,28 +134,28 @@ sub psl2record my %record; %record = ( - REC_TYPE => "PSL", - BLOCKSIZES => $psl->{ 'blockSize' }, - SNUMINSERT => $psl->{ 'tNumInsert' }, - Q_END => $psl->{ 'qEnd' }, - SBASEINSERT => $psl->{ 'tBaseInsert' }, - S_END => $psl->{ 'tEnd' }, - QBASEINSERT => $psl->{ 'qBaseInsert' }, - REPMATCHES => $psl->{ 'repMatches' }, - QNUMINSERT => $psl->{ 'qNumInsert' }, - MISMATCHES => $psl->{ 'misMatches' }, - BLOCKCOUNT => $psl->{ 'blockCount' }, - Q_LEN => $psl->{ 'qSize' }, - S_ID => $psl->{ 'tName' }, - STRAND => $psl->{ 'strand' }, - Q_ID => $psl->{ 'qName' }, - MATCHES => $psl->{ 'matches' }, - S_LEN => $psl->{ 'tSize' }, - NCOUNT => $psl->{ 'nCount' }, - Q_BEGS => $psl->{ 'qStarts' }, - S_BEGS => $psl->{ 'tStarts' }, - S_BEG => $psl->{ 'tStart' }, - Q_BEG => $psl->{ 'qStart ' }, + REC_TYPE => "PSL", + BLOCK_LENS => $psl->{ 'blockSize' }, + SNUMINSERT => $psl->{ 'tNumInsert' }, + Q_END => $psl->{ 'qEnd' }, + SBASEINSERT => $psl->{ 'tBaseInsert' }, + S_END => $psl->{ 'tEnd' }, + QBASEINSERT => $psl->{ 'qBaseInsert' }, + REPMATCHES => $psl->{ 'repMatches' }, + QNUMINSERT => $psl->{ 'qNumInsert' }, + MISMATCHES => $psl->{ 'misMatches' }, + BLOCK_COUNT => $psl->{ 'blockCount' }, + Q_LEN => $psl->{ 'qSize' }, + S_ID => $psl->{ 'tName' }, + STRAND => $psl->{ 'strand' }, + Q_ID => $psl->{ 'qName' }, + MATCHES => $psl->{ 'matches' }, + S_LEN => $psl->{ 'tSize' }, + NCOUNT => $psl->{ 'nCount' }, + Q_BEGS => $psl->{ 'qStarts' }, + S_BEGS => $psl->{ 'tStarts' }, + S_BEG => $psl->{ 'tStart' }, + Q_BEG => $psl->{ 'qStart ' }, ); $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" }; diff --git a/code_perl/Maasha/UCSC/Wiggle.pm b/code_perl/Maasha/UCSC/Wiggle.pm index b8131bf..6103a49 100644 --- a/code_perl/Maasha/UCSC/Wiggle.pm +++ b/code_perl/Maasha/UCSC/Wiggle.pm @@ -275,7 +275,7 @@ sub fixedstep_calc_array @lens = split ",", $bed->[ blockSizes ]; for ( $i = 0; $i < @begs; $i++ ) { - map { $array[ $_ ] += $clones } ( $begs[ $i ] .. $begs[ $i ] + $lens[ $i ] - 1 ); + map { $array[ $_ ] += $clones } ( $bed->[ chromStart ] + $begs[ $i ] .. $bed->[ chromStart ] + $begs[ $i ] + $lens[ $i ] - 1 ); } } else -- 2.39.5