]> git.donarmstrong.com Git - biopieces.git/commitdiff
updated remove_illumina_adaptor
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 8 Mar 2011 11:21:41 +0000 (11:21 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 8 Mar 2011 11:21:41 +0000 (11:21 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1286 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/remove_illumina_adaptor
code_ruby/Maasha/lib/seq.rb

index cb8179961e146ed5c5fb881b8d896f78dcc73149..7f74c8a90578b7047c7b096a1a7b748abe5a139d 100755 (executable)
@@ -33,6 +33,7 @@ require 'biopieces'
 require 'seq'
 
 casts = []
+casts << {:long=>'min',                :short=>'m', :type=>'uint',   :mandatory=>false, :default=>0,   :allowed=>nil, :disallowed=>nil}
 casts << {:long=>'right_adaptor',      :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
 casts << {:long=>'left_adaptor',       :short=>'l', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
 casts << {:long=>'right_hamming_dist', :short=>'R', :type=>'uint',   :mandatory=>false, :default=>25,  :allowed=>nil, :disallowed=>nil}
@@ -48,7 +49,7 @@ bp.each_record do |record|
 
     if options[:right_adaptor]
       pos_right = entry.adaptor_locate_right(options[:right_adaptor], options[:right_hamming_dist])
-      entry.subseq!(0, pos_right) if pos_right >= 0
+      entry.subseq!(0, pos_right) if pos_right >= 0 and entry.length - pos_right >= options[:min]
       record[:CLIP_ADAPTOR_RIGHT] = pos_right
     else
       record[:CLIP_ADAPTOR_RIGHT] = -1
@@ -56,7 +57,7 @@ bp.each_record do |record|
 
     if options[:left_adaptor]
       pos_left = entry.adaptor_locate_left(options[:left_adaptor], options[:left_hamming_dist])
-      entry.subseq!(pos_left + 1) if pos_left >= 0
+      entry.subseq!(pos_left + 1) if pos_left >= options[:min]
       record[:CLIP_ADAPTOR_LEFT] = pos_left
     else
       record[:CLIP_ADAPTOR_LEFT] = -1
index 1a7d698299c3259b87f9904fdbfd145506126e95..6d739b930850cf556cd1bd17ef0b0a5c448c5794 100644 (file)
@@ -22,7 +22,7 @@
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-require 'bits'
+require 'amatch'
 
 # Residue alphabets
 DNA     = %w[a t c g]
@@ -38,6 +38,8 @@ SCORE_ILLUMINA = 64
 class SeqError < StandardError; end
 
 class Seq
+  include Amatch
+
   attr_accessor :seq_name, :seq, :type, :qual
 
   # Method that generates all possible oligos of a specifed length and type.
@@ -318,8 +320,9 @@ class Seq
       len          = self.length - pos
       subseq       = self.seq[pos ... pos + len].upcase
       subadaptor   = adaptor[0 ... len].upcase
+      m            = Hamming.new(subseq)
+      hamming_dist = m.match(subadaptor)
       hamming_max  = (len * hd_percent * 0.01).round
-      hamming_dist = String.hamming_dist(subseq, subadaptor)
       return pos if hamming_dist <= hamming_max
 
       pos += 1
@@ -341,8 +344,9 @@ class Seq
       len          = pos
       subseq       = self.seq[0 ... len].upcase
       subadaptor   = adaptor[adaptor.length - len ... adaptor.length].upcase
+      m            = Hamming.new(subseq)
+      hamming_dist = m.match(subadaptor)
       hamming_max  = (len * hd_percent * 0.01).round
-      hamming_dist = String.hamming_dist(subseq, subadaptor)
 
       pos -= 1