int i = 0;
- for ( i + 0; i < strlen( scores ); i++ ) {
+ for ( i = 0; i < strlen( scores ); i++ ) {
scores[ i ] = solexa2phred( scores[ i ] );
}
}
int i = 0;
- for ( i + 0; i < strlen( scores ); i++ ) {
+ for ( i = 0; i < strlen( scores ); i++ ) {
scores[ i ] = phred2solexa( scores[ i ] );
}
}
+double phred_str_mean( char *scores )
+{
+ /* Martin A. Hansen, November 2009 */
+
+ /* Calculates the mean score as a float which is retuned. */
+
+ int len = 0;
+ int i = 0;
+ int sum = 0;
+ double mean = 0.0;
+
+ len = strlen( scores );
+
+ for ( i = 0; i < len; i++ ) {
+ sum += phred2dec( scores[ i ] );
+ }
+
+ mean = ( double ) sum / ( double ) len;
+
+ return mean;
+}
+
+
void softmask_solexa_str( char *seq, char *scores, int threshold )
{
/* Martin A. Hansen, July 2009 */
use warnings;
use strict;
use Maasha::Common;
+use Maasha::Fastq;
use Data::Dumper;
-use vars qw ( @ISA @EXPORT );
+use vars qw( @ISA @EXPORT );
@ISA = qw( Exporter );
ISIZE => 8,
SEQ => 9,
QUAL => 10,
- TAG => 11,
- VTYPE => 12,
- VALUE => 13,
};
# Returns a hashref.
- my ( $record );
+ my ( $record, $i, $tag, $vtype, $value );
+
+ return if $entry->[ RNAME ] eq '*';
$record->{ 'REC_TYPE' } = 'SAM';
$record->{ 'Q_ID' } = $entry->[ QNAME ];
$record->{ 'ISIZE' } = $entry->[ ISIZE ];
$record->{ 'SEQ' } = $entry->[ SEQ ];
$record->{ 'SCORES' } = $entry->[ QUAL ];
- $record->{ 'TAG' } = $entry->[ TAG ];
- $record->{ 'VTYPE' } = $entry->[ VTYPE ];
- $record->{ 'VALUE' } = $entry->[ VALUE ];
+ $record->{ 'SCORE' } = sprintf( "%.2f", Maasha::Fastq::phred_str_mean( $entry->[ QUAL ] ) );
$record->{ 'S_BEG' } -= 1 if $record->{ 'S_BEG' } != 0;
$record->{ 'S_BEG' } -= 1 if $record->{ 'S_BEG2' } != 0;
$record->{ 'READ_FAIL' } = $record->{ 'FLAG' } & 0x0200;
$record->{ 'READ_BAD' } = $record->{ 'FLAG' } & 0x0400;
+
+ # extra fields - (hate hate hate SAM!!!)
+
+ for ( $i = QUAL + 1; $i < @{ $entry }; $i++ )
+ {
+ ( $tag, $vtype, $value ) = split /:/, $entry->[ $i ];
+
+ if ( $tag eq "MD" or $tag eq "NM" ) {
+ $record->{ $tag } = $value;
+ }
+ }
+
+ if ( defined $record->{ "NM" } and $record->{ "NM" } > 0 ) {
+ $record->{ "ALIGN" } = sam2align( $record->{ 'SEQ' }, $record->{ 'CIGAR' }, $record->{ 'NM' }, $record->{ 'MD' } );
+ } else {
+ $record->{ "ALIGN" } = ".";
+ }
+
return wantarray ? %{ $record } : $record;
}
+sub sam2align
+{
+ my ( $seq,
+ $cigar,
+ $nm,
+ $md,
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $offset, $len, $op, $i, $nt, $nt2, @align, $aln_str );
+
+ $offset = 0;
+
+ while ( length( $cigar ) > 0 )
+ {
+ if ( $cigar =~ s/^(\d+)(\w)// )
+ {
+ $len = $1;
+ $op = $2;
+
+ if ( $op eq 'I' )
+ {
+ for ( $i = 0; $i < $len; $i++ )
+ {
+ $nt = substr $seq, $offset + $i, 1;
+
+ push @align, [ $offset + $i, "->$nt" ];
+
+ $nm--;
+ }
+ }
+
+ $offset += $len;
+ }
+ }
+
+ $offset = 0;
+
+ while ( $nm > 0 )
+ {
+ if ( $md =~ s/^(\d+)// )
+ {
+ $offset += $1;
+ }
+ elsif ( $md =~ s/^(\w)// )
+ {
+ $nt = $1;
+
+ $nt2 = substr $seq, $offset, 1;
+
+ push @align, [ $offset, "$nt>$nt2" ];
+
+ $offset++;
+
+ $nm--;
+ }
+ elsif( $md =~ s/^\^// )
+ {
+ $nt = substr $seq, $offset, 1;
+
+ push @align, [ $offset, "$nt>-" ];
+
+ $offset++;
+
+ $nm--;
+ }
+ else
+ {
+ Maasha::Common::error( qq(Bad SAM field -> MD: "$md") );
+ }
+ }
+
+
+ $aln_str = "";
+
+ @align = sort { $a->[ 0 ] <=> $b->[ 0 ] } @align;
+
+ map { $aln_str .= "$_->[ 0 ]:$_->[ 1 ]," } @align;
+
+ $aln_str =~ s/,$//;
+
+ return $aln_str;
+}
+
+
sub biopiece2sam
{