From 288c4e7fc6fedad07c7297dfb14be17c6a7fa364 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 15 Jun 2009 17:13:24 +0000 Subject: [PATCH] added mutate_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@527 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/mutate_seq | 93 +++++++++++++++++++++++++++++++++++++++++ code_perl/Maasha/Seq.pm | 67 ++++++++++++++++++++++++++++- 2 files changed, 158 insertions(+), 2 deletions(-) create mode 100755 bp_bin/mutate_seq diff --git a/bp_bin/mutate_seq b/bp_bin/mutate_seq new file mode 100755 index 0000000..5d6d6db --- /dev/null +++ b/bp_bin/mutate_seq @@ -0,0 +1,93 @@ +#!/usr/bin/env perl -w + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Introduce mutations into sequences in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use strict; +use Maasha::Common; +use Maasha::Biopieces; +use Maasha::Seq; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $type, $alph, $mutations, $seq_len, $record ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'number', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'percent', short => 'p', type => 'float', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'type', short => 't', type => 'string', mandatory => 'no', default => undef, allowed => 'rna,dna,protein', disallowed => undef }, + ] +); + +Maasha::Common::error( qq(both --number and --percent specified) ) if $options->{ "number" } and $options->{ "percent" }; +Maasha::Common::error( qq(no --number or --percent specified) ) if not $options->{ "number" } and not $options->{ "percent" }; + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( $record->{ "SEQ" } ) + { + $seq_len = length $record->{ "SEQ" }; + $type = $options->{ "type" } || Maasha::Seq::seq_guess_type( $record->{ "SEQ" } ); + $alph = Maasha::Seq::seq_alph( $type ); + $mutations = $options->{ "number" } || int( $seq_len * ( $options->{ "percent" } / 100 ) ); + + Maasha::Common::error( qq(mutations > sequence length: $mutations > $seq_len) ) if $mutations > $seq_len; + + $record->{ "SEQ" } = Maasha::Seq::seq_mutate( $record->{ "SEQ" }, $mutations, $alph ); + } + + Maasha::Biopieces::put_record( $record, $out ); +} + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/code_perl/Maasha/Seq.pm b/code_perl/Maasha/Seq.pm index 42df6d3..1283965 100644 --- a/code_perl/Maasha/Seq.pm +++ b/code_perl/Maasha/Seq.pm @@ -29,6 +29,8 @@ package Maasha::Seq; use strict; +use POSIX qw( islower ); +use Maasha::Common; use Data::Dumper; use IPC::Open2; use List::Util qw( shuffle ); @@ -763,7 +765,7 @@ sub seq_generate $alph, # sequence alphabet ) = @_; - # returns string + # Returns a string my ( $alph_len, $i, $seq ); @@ -777,6 +779,67 @@ sub seq_generate } +sub seq_mutate +{ + # Martin A. Hansen, June 2009. + + # Introduces a given number of random mutations in a + # given sequence of a specified alphabet. + + my ( $seq, # sequence to mutate + $mutations, # number of mutations + $alph, # alphabet of sequence + ) = @_; + + # Returns a string. + + my ( $i, $pos, %lookup_hash ); + + $i = 0; + + while ( $i < $mutations ) + { + $pos = int( rand( length $seq ) ); + + if ( not exists $lookup_hash{ $pos } ) + { + substr $seq, $pos, 1, res_mutate( substr( $seq , $pos, 1 ), $alph ); + + $lookup_hash{ $pos } = 1; + + $i++; + } + } + + return $seq; +} + + +sub res_mutate +{ + # Martin A. Hansen, June 2009. + + # Mutates a given residue to another from a given alphabet. + + my ( $res, # residue to mutate + $alph, # alphabet + ) = @_; + + # Returns a char. + + my ( $alph_len, $new ); + + $alph_len = scalar @{ $alph }; + $new = $res; + + while ( uc $new eq uc $res ) { + $new = $alph->[ int( rand( $alph_len ) ) ]; + } + + return POSIX::islower( $res ) ? lc $new : uc $new; +} + + sub seq_shuffle { # Martin A. Hansen, December 2007. @@ -786,7 +849,7 @@ sub seq_shuffle my ( $seq, # sequence string ) = @_; - # Returns string. + # Returns a string. my ( @list ); -- 2.39.2