# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
+use POSIX qw( islower );
+use Maasha::Common;
use Data::Dumper;
use IPC::Open2;
use List::Util qw( shuffle );
}
if ( $count = $check_seq =~ tr/FLPQIEflpqie// and $count > 0 ) {
- return "protein";
+ return "PROTEIN";
} elsif ( $count = $check_seq =~ tr/Uu// and $count > 0 ) {
- return "rna";
+ return "RNA";
} else {
- return "dna";
+ return "DNA";
}
}
if ( $frame =~ /-?[1-3]/ )
{
if ( $frame < 0 ) {
- $dna = &Maasha::Seq::dna_revcomp( $dna );
+ $dna = Maasha::Seq::dna_revcomp( $dna );
}
$frame = abs( $frame ) - 1;
}
else
{
- &Maasha::Common::error( qq(Badly formated frame "$frame") );
+ Maasha::Common::error( qq(Badly formated frame "$frame") );
}
$pos = 0;
{
last if not length $codon == 3;
- $pep .= &codon2aa( $codon );
+ $pep .= codon2aa( $codon );
$pos += 3;
}
$pid = open2( $fh_out, $fh_in, "RNAfold -noPS" );
- &Maasha::Fasta::put_entry( [ "RNAfold", $seq ], $fh_in );
+ Maasha::Fasta::put_entry( [ "RNAfold", $seq ], $fh_in );
close $fh_in;
$out_file1 = "$tmp_dir/fold.out1";
$out_file2 = "$tmp_dir/fold.out2";
- &Maasha::Fasta::put_entries( [ [ "fold", $seq ] ], $tmp_file );
+ Maasha::Fasta::put_entries( [ [ "fold", $seq ] ], $tmp_file );
- &Maasha::Common::run( "contrafold", "predict --parens $out_file1 --bpseq $out_file2 $tmp_file" );
+ Maasha::Common::run( "contrafold", "predict --parens $out_file1 --bpseq $out_file2 $tmp_file" );
unlink $tmp_file;
- $fh = &Maasha::Common::read_open( $out_file1 );
+ $fh = Maasha::Common::read_open( $out_file1 );
while ( $line = <$fh> )
{
unlink $out_file1;
- $fh = &Maasha::Common::read_open( $out_file2 );
+ $fh = Maasha::Common::read_open( $out_file2 );
while ( $line = <$fh> )
{
{
last if $AoA[ $i ]->[ 0 ] > $AoA[ $i ]->[ 2 ];
- $temp += &base_pair_melting_temp( $AoA[ $i ]->[ 1 ] . $AoA[ $AoA[ $i ]->[ 2 ] - 1 ]->[ 1 ] );
+ $temp += base_pair_melting_temp( $AoA[ $i ]->[ 1 ] . $AoA[ $AoA[ $i ]->[ 2 ] - 1 ]->[ 1 ] );
}
}
# Generates all possible DNA oligos of a given wordsize.
- # alternative way: perl -MData::Dumper -e '@CONV = glob( "{T,C,A,G}" x 4 ); print Dumper( \@CONV )'
+ # alternative way: perl -MData::Dumper -e '@CONV = glob( "{A,T,C,G}" x 4 ); print Dumper( \@CONV )'
my ( $wordsize, # size of DNA oligos
$alph, # sequence alphabet
) = @_;
- # returns string
+ # Returns a string
my ( $alph_len, $i, $seq );
}
+sub seq_insert
+{
+ # Martin A. Hansen, June 2009.
+
+ # Randomly duplicates a given number of residues in a given sequence.
+
+ my ( $seq, # sequence to mutate
+ $insertions, # number of residues to insert
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $i, $pos );
+
+ for ( $i = 0; $i < $insertions; $i++ )
+ {
+ $pos = int( rand( length $seq ) );
+
+ substr $seq, $pos, 0, substr $seq , $pos, 1;
+ }
+
+ return $seq;
+}
+
+
+sub seq_delete
+{
+ # Martin A. Hansen, June 2009.
+
+ # Randomly deletes a given number of residues from a given sequence.
+
+ my ( $seq, # sequence to mutate
+ $deletions, # number of residues to delete
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $i, $pos );
+
+ for ( $i = 0; $i < $deletions; $i++ )
+ {
+ $pos = int( rand( length $seq ) );
+
+ substr $seq, $pos, 1, '';
+ }
+
+ return $seq;
+}
+
+
+sub seq_mutate
+{
+ # Martin A. Hansen, June 2009.
+
+ # Introduces a given number of random mutations in a
+ # given sequence of a specified alphabet.
+
+ my ( $seq, # sequence to mutate
+ $mutations, # number of mutations
+ $alph, # alphabet of sequence
+ ) = @_;
+
+ # Returns a string.
+
+ my ( $i, $pos, %lookup_hash );
+
+ $i = 0;
+
+ while ( $i < $mutations )
+ {
+ $pos = int( rand( length $seq ) );
+
+ if ( not exists $lookup_hash{ $pos } )
+ {
+ substr $seq, $pos, 1, res_mutate( substr( $seq , $pos, 1 ), $alph );
+
+ $lookup_hash{ $pos } = 1;
+
+ $i++;
+ }
+ }
+
+ return $seq;
+}
+
+
+sub res_mutate
+{
+ # Martin A. Hansen, June 2009.
+
+ # Mutates a given residue to another from a given alphabet.
+
+ my ( $res, # residue to mutate
+ $alph, # alphabet
+ ) = @_;
+
+ # Returns a char.
+
+ my ( $alph_len, $new );
+
+ $alph_len = scalar @{ $alph };
+ $new = $res;
+
+ while ( uc $new eq uc $res ) {
+ $new = $alph->[ int( rand( $alph_len ) ) ];
+ }
+
+ return POSIX::islower( $res ) ? lc $new : uc $new;
+}
+
+
sub seq_shuffle
{
# Martin A. Hansen, December 2007.
my ( $seq, # sequence string
) = @_;
- # Returns string.
+ # Returns a string.
my ( @list );
{
# Martin A. Hansen, May 2007.
- # returns a requested alphabet
+ # Returns a requested sequence alphabet.
my ( $type, # alphabet type
) = @_;
# returns list
- my ( @alph );
+ my ( %alph_hash, $alph );
+
+ %alph_hash = (
+ DNA => [ qw(A T C G) ],
+ DNA_AMBI => [ qw(A G C U T R Y W S M K H D V B N) ],
+ RNA => [ qw(A U C G) ],
+ RNA_AMBI => [ qw(A G C U T R Y W S M K H D V B N) ],
+ PROTEIN => [ qw(F L S Y C W P H Q R I M T N K V A D E G) ],
+ );
- if ( $type =~ /^dna$/i ) {
- @alph = qw( A T C G );
- } elsif ( $type =~ /^rna$/i ) {
- @alph = qw( A U C G );
- } elsif ( $type =~ /^prot/i ) {
- @alph = qw( F L S Y C W P H Q R I M T N K V A D E G );
+ if ( exists $alph_hash{ uc $type } ) {
+ $alph = $alph_hash{ uc $type };
} else {
die qq(ERROR: Unknown alphabet type: "$type"\n);
}
- return wantarray ? @alph : \@alph;
+ return wantarray ? @{ $alph } : $alph;
}
my ( %analysis, @chars, @chars_lc, $char, %char_hash, $gc, $at, $lc, $max, $res_sum, @indels, %indel_hash );
- $analysis{ "SEQ_TYPE" } = uc &Maasha::Seq::seq_guess_type( $seq );
+ $analysis{ "SEQ_TYPE" } = Maasha::Seq::seq_guess_type( $seq );
$analysis{ "SEQ_LEN" } = length $seq;
@indels = qw( - ~ . _ );
$char_hash{ $char } += $char_hash{ lc $char };
- $analysis{ "RES:$char" } = $char_hash{ $char };
+ $analysis{ "RES[$char]" } = $char_hash{ $char };
$max = $char_hash{ $char } if $char_hash{ $char } > $max;
$analysis{ "RES_SUM" } += $char_hash{ $char };
}
- map { $analysis{ "RES:$_" } = $indel_hash{ $_ } } @indels;
+ map { $analysis{ "RES[$_]" } = $indel_hash{ $_ } } @indels;
$analysis{ "MIX_INDEX" } = sprintf( "%.2f", $max / $analysis{ "SEQ_LEN" } );
- $analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
+ #$analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
return wantarray ? %analysis : \%analysis;
}
delete $char_hash{ "~" };
delete $char_hash{ "." };
- $bit_height = &seqlogo_calc_bit_height( \%char_hash, $char_tot );
+ $bit_height = seqlogo_calc_bit_height( \%char_hash, $char_tot );
$bit_diff = $bit_max - $bit_height;
- $char_heights = &seqlogo_calc_char_heights( \%char_hash, $char_tot, $bit_diff );
+ $char_heights = seqlogo_calc_char_heights( \%char_hash, $char_tot, $bit_diff );
push @logo, $char_heights;
}
return $word;
}
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ADAPTOR LOCATING <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+############################### REDUNDANT ##############################
+
+# these functions have been replaced by index_m and match_m in Common.pm
+
+sub find_adaptor
+{
+ # Martin A. Hansen & Selene Fernandez, August 2008
+
+ # Locates an adaptor sequence in a given sequence
+ # starting from a given offset position allowing a given
+ # number of mismatches (no indels).
+
+ my ( $adaptor, # list of chars
+ $tag, # list of chars
+ $adaptor_len, # length of adaptor sequence
+ $tag_len, # length of tag sequence
+ $tag_offset, # offset position
+ $max_match, # number of matches indicating success
+ $max_mismatch, # number of mismatches
+ ) = @_;
+
+ # Returns an integer.
+
+ my ( $i, $j, $match, $mismatch );
+
+ $i = $tag_offset;
+
+ while ( $i < $tag_len - ( $max_match + $max_mismatch ) + 1 )
+ {
+ $j = 0;
+
+ while ( $j < $adaptor_len - ( $max_match + $max_mismatch ) + 1 )
+ {
+ return $i if check_diag( $adaptor, $tag, $adaptor_len, $tag_len, $j, $i, $max_match, $max_mismatch );
+
+ $j++
+ }
+
+ $i++;
+ }
+
+ return -1;
+}
+
+
+sub check_diag
+{
+ # Martin A. Hansen & Selene Fernandez, August 2008
+
+ # Checks the diagonal starting at a given coordinate
+ # of a search space constituted by an adaptor and a tag sequence.
+ # Residues in the diagonal are compared between the sequences allowing
+ # for a given number of mismatches. We terminate when search space is
+ # exhausted or if max matches or mismatches is reached.
+
+ my ( $adaptor, # list of chars
+ $tag, # list of chars
+ $adaptor_len, # length of adaptor sequence
+ $tag_len, # length of tag sequence
+ $adaptor_beg, # adaptor begin coordinate
+ $tag_beg, # tag begin coordinate
+ $max_match, # number of matches indicating success
+ $max_mismatch, # number of mismatches
+ ) = @_;
+
+ # Returns boolean.
+
+ my ( $match, $mismatch );
+
+ $match = 0;
+ $mismatch = 0;
+
+ while ( $adaptor_beg <= $adaptor_len and $tag_beg <= $tag_len )
+ {
+ if ( $adaptor->[ $adaptor_beg ] eq $tag->[ $tag_beg ] )
+ {
+ $match++;
+
+ return 1 if $match >= $max_match;
+ }
+ else
+ {
+ $mismatch++;
+
+ return 0 if $mismatch > $max_mismatch;
+ }
+
+ $adaptor_beg++;
+ $tag_beg++;
+ }
+
+ return 0;
+}
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<