From 872cc26398670984c4e80fcad564a550290674a3 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 4 Nov 2009 08:06:24 +0000 Subject: [PATCH] fixed details in bowtie_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@743 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/bowtie_seq | 19 ++++++-- code_perl/Maasha/KISS/IO.pm | 94 ++++++++++++++++++++++++++++++++++++- 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/bp_bin/bowtie_seq b/bp_bin/bowtie_seq index 6c1659b..44d09ae 100755 --- a/bp_bin/bowtie_seq +++ b/bp_bin/bowtie_seq @@ -28,6 +28,7 @@ use warnings; use strict; +use Data::Dumper; use Maasha::Biopieces; use Maasha::Common; use Maasha::Fastq; @@ -150,16 +151,28 @@ sub bowtie2biopiece # Returns a hash. - my ( $record, @scores ); + my ( $record, $s_id, $s_len, $hits ); + + # S_ID: contig00005 length=75232 numreads=13115 + + if ( $entry->[ 2 ] =~ /^(.+) length=(\d+)\s+numreads=(\d+)$/ ) + { + $record->{ 'S_ID' } = $1; + # $record->{ 'S_LEN' } = $2; + $record->{ 'HITS' } = $3; + } + else + { + Maasha::Common::error( qq(BAD S_ID: "$entry->[ 2 ]") ); + } $record->{ 'Q_ID' } = $entry->[ 0 ]; $record->{ 'STRAND' } = $entry->[ 1 ]; - $record->{ 'S_ID' } = $entry->[ 2 ]; $record->{ 'S_BEG' } = $entry->[ 3 ]; $record->{ 'SEQ' } = $entry->[ 4 ]; $record->{ 'SCORES' } = $entry->[ 5 ]; $record->{ 'SCORE' } = $entry->[ 6 ] + 1; - $record->{ 'DESCRIPTOR' } = $entry->[ 7 ]; + $record->{ 'ALIGN' } = $entry->[ 7 ] || '.'; $record->{ 'S_LEN' } = length $entry->[ 4 ]; $record->{ 'SEQ_LEN' } = length $entry->[ 4 ]; $record->{ 'S_END' } = $record->{ 'S_BEG' } + $record->{ 'SEQ_LEN' } - 1; diff --git a/code_perl/Maasha/KISS/IO.pm b/code_perl/Maasha/KISS/IO.pm index 4c3ac14..a24f696 100644 --- a/code_perl/Maasha/KISS/IO.pm +++ b/code_perl/Maasha/KISS/IO.pm @@ -256,8 +256,6 @@ sub kiss_index_search { $try = int( ( $high + $low ) / 2 ); - # print "low: $low high: $high try: $try\n"; - if ( $num < $index->[ $try ]->[ 0 ] ) { $high = $try; } elsif ( $num > $index->[ $try ]->[ 1 ] ) { @@ -301,6 +299,98 @@ sub kiss_index_get } +sub kiss_align +{ + # S.aur_COL 41 75 5_vnlh2ywXsN1/1 1 - . 17:A>T,31:A>N . . . . + + my ( $s_seqref, # subject sequence reference + $entry, # KISS entry + ) = @_; + + # Returns a string. + + my ( $align, $pos, $s_symbol, $q_symbol, $s_seq, $q_seq ); + + $s_seq = substr ${ $s_seqref }, $entry->{ 'S_BEG' }, $entry->{ 'S_END' } - $entry->{ 'S_BEG' } + 1; + $q_seq = $s_seq; + + foreach $align ( split /,/, $entry->{ 'ALIGN' } ) + { + if ( $align =~ /(\d+):(.)>(.)/ ) + { + $pos = $1; + $s_symbol = $2; + $q_symbol = $3; + + if ( $s_symbol eq '-' ) # insertion + { + substr $s_seq, $pos, 0, $s_symbol; + substr $q_seq, $pos, 0, $q_symbol; + } + elsif ( $q_symbol eq '-' ) # deletion + { + substr $q_seq, $pos, 1, $q_symbol; + } + else # mismatch + { + substr $q_seq, $pos, 1, $q_symbol; + } + } + else + { + Maasha::Common::error( qq(Bad alignment descriptor: "$align") ); + } + } + + print Dumper( "s_seq: $s_seq", "q_seq: $q_seq" ) and exit; +} + + +sub sam2kiss +{ + my ( $sam_entry, # SAM entry + ) = @_; + + # Returns a hashref + + my ( $cigar, $offset, $op, $len, @descriptors, $kiss_entry ); + + $cigar = $sam_entry->{ 'CIGAR' }; + + $cigar =~ tr/\*//d; + + $offset = 0; + + while ( length $cigar > 0 ) + { + if ( $cigar =~ s/^([MINDSHP])(\d+)// ) + { + $op = $1; + $len = $2; + + print "CIGAR: $cigar OP: $op LEN: $len\n"; + + if ( $op eq 'I' ) + { + + } + elsif ( $op eq 'D' ) + { + + } + + $offset += $len; + } + else + { + Maasha::Common::error( qq(Bad CIGAR format: "$cigar") ); + } + } + + return wantarray ? %{ $kiss_entry } : $kiss_entry; +} + + sub kiss2biopiece { my ( $entry, # KISS entry -- 2.39.2