# Routines for parsing and emitting KISS records.
+# http://code.google.com/p/biopieces/wiki/KissFormat
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
BLOCK_TYPE => 11,
};
-# 0 1 2
-# 012345678901234567890
-# --------------------- S.aur complete genome
-# -===__===- TAG_000001
-# 0123456789
-#
-# S_ID = 'S.aur complete genome'
-# S_BEG = 3
-# S_END = 12
-# Q_ID = 'TAG_000001'
-# SCORE => 1
-# STRAND => +
-# HITS => 31
-# ALIGN => 0:A>T,3:G>C
-# BLOCK_COUNT => 2
-# BLOCK_BEGS => 1,6
-# BLOCK_LENS => 3,3
-# BLOCK_TYPE => 1,1
-#
-#
-# 'S.aur complete genome' 3 12 'TAG_000001' 1 + 31 2 1,6 3,3 1,1
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# Returns a string.
- my ( $align, $pos, $s_symbol, $q_symbol, $s_seq, $q_seq );
+ my ( $align, $pos, $s_symbol, $q_symbol, $s_seq, $q_seq, $insertions );
$s_seq = substr ${ $s_seqref }, $entry->{ 'S_BEG' }, $entry->{ 'S_END' } - $entry->{ 'S_BEG' } + 1;
$q_seq = $s_seq;
+ $insertions = 0;
+
foreach $align ( split /,/, $entry->{ 'ALIGN' } )
{
if ( $align =~ /(\d+):(.)>(.)/ )
if ( $s_symbol eq '-' ) # insertion
{
- substr $s_seq, $pos, 0, $s_symbol;
- substr $q_seq, $pos, 0, $q_symbol;
+ substr $s_seq, $pos + $insertions, 0, $s_symbol;
+ substr $q_seq, $pos + $insertions, 0, $q_symbol;
+
+ $insertions++;
}
elsif ( $q_symbol eq '-' ) # deletion
{
- substr $q_seq, $pos, 1, $q_symbol;
+ substr $q_seq, $pos + $insertions, 1, $q_symbol;
}
else # mismatch
{
- substr $q_seq, $pos, 1, $q_symbol;
+ substr $q_seq, $pos + $insertions, 1, $q_symbol;
}
}
else