use warnings;
use strict;
+use Data::Dumper;
use Maasha::Biopieces;
use Maasha::Common;
use Maasha::Fastq;
# Returns a hash.
- my ( $record, @scores );
+ my ( $record, $s_id, $s_len, $hits );
+
+ # S_ID: contig00005 length=75232 numreads=13115
+
+ if ( $entry->[ 2 ] =~ /^(.+) length=(\d+)\s+numreads=(\d+)$/ )
+ {
+ $record->{ 'S_ID' } = $1;
+ # $record->{ 'S_LEN' } = $2;
+ $record->{ 'HITS' } = $3;
+ }
+ else
+ {
+ Maasha::Common::error( qq(BAD S_ID: "$entry->[ 2 ]") );
+ }
$record->{ 'Q_ID' } = $entry->[ 0 ];
$record->{ 'STRAND' } = $entry->[ 1 ];
- $record->{ 'S_ID' } = $entry->[ 2 ];
$record->{ 'S_BEG' } = $entry->[ 3 ];
$record->{ 'SEQ' } = $entry->[ 4 ];
$record->{ 'SCORES' } = $entry->[ 5 ];
$record->{ 'SCORE' } = $entry->[ 6 ] + 1;
- $record->{ 'DESCRIPTOR' } = $entry->[ 7 ];
+ $record->{ 'ALIGN' } = $entry->[ 7 ] || '.';
$record->{ 'S_LEN' } = length $entry->[ 4 ];
$record->{ 'SEQ_LEN' } = length $entry->[ 4 ];
$record->{ 'S_END' } = $record->{ 'S_BEG' } + $record->{ 'SEQ_LEN' } - 1;
{
$try = int( ( $high + $low ) / 2 );
- # print "low: $low high: $high try: $try\n";
-
if ( $num < $index->[ $try ]->[ 0 ] ) {
$high = $try;
} elsif ( $num > $index->[ $try ]->[ 1 ] ) {
}
+sub kiss_align
+{
+ # S.aur_COL 41 75 5_vnlh2ywXsN1/1 1 - . 17:A>T,31:A>N . . . .
+
+ my ( $s_seqref, # subject sequence reference
+ $entry, # KISS entry
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $align, $pos, $s_symbol, $q_symbol, $s_seq, $q_seq );
+
+ $s_seq = substr ${ $s_seqref }, $entry->{ 'S_BEG' }, $entry->{ 'S_END' } - $entry->{ 'S_BEG' } + 1;
+ $q_seq = $s_seq;
+
+ foreach $align ( split /,/, $entry->{ 'ALIGN' } )
+ {
+ if ( $align =~ /(\d+):(.)>(.)/ )
+ {
+ $pos = $1;
+ $s_symbol = $2;
+ $q_symbol = $3;
+
+ if ( $s_symbol eq '-' ) # insertion
+ {
+ substr $s_seq, $pos, 0, $s_symbol;
+ substr $q_seq, $pos, 0, $q_symbol;
+ }
+ elsif ( $q_symbol eq '-' ) # deletion
+ {
+ substr $q_seq, $pos, 1, $q_symbol;
+ }
+ else # mismatch
+ {
+ substr $q_seq, $pos, 1, $q_symbol;
+ }
+ }
+ else
+ {
+ Maasha::Common::error( qq(Bad alignment descriptor: "$align") );
+ }
+ }
+
+ print Dumper( "s_seq: $s_seq", "q_seq: $q_seq" ) and exit;
+}
+
+
+sub sam2kiss
+{
+ my ( $sam_entry, # SAM entry
+ ) = @_;
+
+ # Returns a hashref
+
+ my ( $cigar, $offset, $op, $len, @descriptors, $kiss_entry );
+
+ $cigar = $sam_entry->{ 'CIGAR' };
+
+ $cigar =~ tr/\*//d;
+
+ $offset = 0;
+
+ while ( length $cigar > 0 )
+ {
+ if ( $cigar =~ s/^([MINDSHP])(\d+)// )
+ {
+ $op = $1;
+ $len = $2;
+
+ print "CIGAR: $cigar OP: $op LEN: $len\n";
+
+ if ( $op eq 'I' )
+ {
+
+ }
+ elsif ( $op eq 'D' )
+ {
+
+ }
+
+ $offset += $len;
+ }
+ else
+ {
+ Maasha::Common::error( qq(Bad CIGAR format: "$cigar") );
+ }
+ }
+
+ return wantarray ? %{ $kiss_entry } : $kiss_entry;
+}
+
+
sub kiss2biopiece
{
my ( $entry, # KISS entry