]> git.donarmstrong.com Git - biopieces.git/blob - code_ruby/lib/maasha/seq/assemble.rb
e59e178b73df8ba45689836db02c852d5632f637
[biopieces.git] / code_ruby / lib / maasha / seq / assemble.rb
1 # Copyright (C) 2007-2013 Martin A. Hansen.
2
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
7
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU General Public License for more details.
12
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
17 # http://www.gnu.org/copyleft/gpl.html
18
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
20
21 # This software is part of the Biopieces framework (www.biopieces.org).
22
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
24
25 class Assemble
26   def self.pair(entry1, entry2, options = {})
27     assemble = self.new(entry1, entry2, options)
28     assemble.match
29   end
30
31   def initialize(entry1, entry2, options)
32     @entry1  = entry1
33     @entry2  = entry2
34     @options = options
35     @options[:mismatches_max] ||= 0
36     @options[:overlap_min]    ||= 1
37     @options[:overlap_max]    ||= entry1.length
38     @options[:overlap_max]      = [@options[:overlap_max], entry1.length, entry2.length].min
39   end
40
41   def match
42     overlap = @options[:overlap_max]
43
44     na_seq1 = NArray.to_na(@entry1.seq.downcase, "byte")
45     na_seq2 = NArray.to_na(@entry2.seq.downcase, "byte")
46
47     while overlap >= @options[:overlap_min]
48       hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true
49
50       if hamming_dist <= percent2real(overlap, @options[:mismatches_max])
51         merged = @entry1 + @entry2[overlap .. -1]
52
53         if @entry1.qual and @entry2.qual
54           qual1 = @entry1.qual[@entry1.length - overlap .. -1]
55           qual2 = @entry2.qual[0 ... overlap]
56
57           na_qual1 = NArray.to_na(qual1, "byte")
58           na_qual2 = NArray.to_na(qual2, "byte")
59
60           qual = ((na_qual1 + na_qual2) / 2).to_s
61
62           merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}:hamming=#{hamming_dist}"
63           merged.qual[@entry1.length - overlap ... @entry1.length]  = qual
64         end
65
66         return merged
67       end
68
69       overlap -= 1
70     end
71   end
72
73   def percent2real(length, percent)
74       (length * percent * 0.01).round
75   end
76 end
77
78
79 __END__
80