From 5526cf56fc20602b9be197db8b1a9a44502791ab Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 2 Dec 2013 14:44:35 +0000 Subject: [PATCH] fixed bug in assemble_pairs git-svn-id: http://biopieces.googlecode.com/svn/trunk@2264 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/assemble_pairs | 17 +++-- bp_test/out/assemble_pairs.out.1 | 15 ++-- bp_test/out/assemble_pairs.out.2 | 9 ++- code_ruby/lib/maasha/seq/assemble.rb | 86 +++++++++++++++------- code_ruby/test/maasha/seq/test_assemble.rb | 34 ++++++++- 5 files changed, 117 insertions(+), 44 deletions(-) diff --git a/bp_bin/assemble_pairs b/bp_bin/assemble_pairs index a101f3e..df1fd03 100755 --- a/bp_bin/assemble_pairs +++ b/bp_bin/assemble_pairs @@ -54,9 +54,9 @@ def names_match(entry1, entry2) end casts = [] -casts << {:long=>'mismatches', :short=>'m', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'overlap_min', :short=>'o', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} -casts << {:long=>'overlap_max', :short=>'p', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"} +casts << {long: 'mismatches', short: 'm', type: 'uint', mandatory: false, default: 5, allowed: nil, disallowed: nil} +casts << {long: 'overlap_min', short: 'o', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"} +casts << {long: 'overlap_max', short: 'p', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} options = Biopieces.options_parse(ARGV, casts) @@ -84,16 +84,17 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| merged = Assemble.pair( entry1, entry2, - mismatches_max:options[:mismatches], - overlap_min:options[:overlap_min], - overlap_max:options[:overlap_max] + mismatches_max: options[:mismatches], + overlap_min: options[:overlap_min], + overlap_max: options[:overlap_max] ) if merged new_record = merged.to_bp - if merged.seq_name =~ /overlap=(\d+)$/ - new_record[:OVERLAP_LEN] = $1 + if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/ + new_record[:OVERLAP_LEN] = $1 + new_record[:HAMMING_DIST] = $2 end output.puts new_record diff --git a/bp_test/out/assemble_pairs.out.1 b/bp_test/out/assemble_pairs.out.1 index f9cf175..f804cb7 100644 --- a/bp_test/out/assemble_pairs.out.1 +++ b/bp_test/out/assemble_pairs.out.1 @@ -1,30 +1,35 @@ -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14:overlap=49 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14:overlap=49:hamming=1 SEQ: tggggaatattggacaatgggggcaaccctgatccagcaataccgcgtgtgtgaagaaggcctgagggttgtaaagcactttcaattgtgaagaaaagttaacggttaataaccgttagccttgacgttaactttagaagaagcaccggctaactccgtgccagcagccgcggtaatacggagggtgcaagcgttaatcgGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTTTATTAAGTCAGATGTGaaagccccgggcttaacctgggaactgcatttgaaactggtcaactagagtatggtagaggaaagtggaatttctggtgtagcggtgaaatgcgtagatatcagaaggaacatcaatggcgaaggcagctttctggaccaatactgacgctgaggtacgaaagcgtgggtagcaaacagg SEQ_LEN: 429 SCORES: !??????BDDDDDDDDGGGGGGGHHIIIEHIHHFGGHFHHGHFHHDHEHEHHFAFFGFFHFHHFFHHHEFEEHHHHHHHHHHHHFFFHFHHHHHHHHHHHHGBDEGGGGGGGGGGGGGGGGEGEGGGGGCEGGGGECCECEEECGGG!ADGCGGGEGGEGGGGGEGCE8!2!DC!EEEGGC?!DGCCCEC:C?CCEGGGG??288<8B47>43,(195??=36)745<6.;:=?D?@6AB?@D8?@C=?AA;4'8D8?:::A?1*)=,,==EC==,,ACFCAAEBC=AEFCEBEDBDEEDED=EDD=?BFBFDFB!DFF@FFFHHHHHHHHHGGGDDHHHHFHDHIHHHHIIIIIHHFGCFFHHE!EEDFG?HIHGFGGFFFFF?CDCE?9FEHDHHGE;F;IHFFHFFFEEFDDDDDB!-!BB????? OVERLAP_LEN: 49 +HAMMING_DIST: 1 --- -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96:hamming=2 SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacgAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTGccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacgctgaggaacgaaagcatgggtagcgaaga SEQ_LEN: 402 SCORES: !???9?BBBDBDDBDDFFFFFFHHHIFHFHHIHHFHHHHEDDEGHHHCEHE?EFHFGHFHFHIHIIIHCECEHHHIFHIIIHGHFHHHHHHEEFFFFFEEFFFEFFFEEEEEFFFFFFFCEFBEEDEFFFFFFEFEFFEEFFFFDDDDA?EEEFFB@C?8B=:7785;660@?@FEB?7B;?2BBA?CED@@@B?=5@DEFD??;8@E0@BEC788>@?95*4-:=7BEB8B7BB2@B?8&+98>2CDBB>A=AEECCEEEEDB=FFEFEEEDEFFFBFFFFF?.HFDBDD?FFHFHFFFHFFFHHGGGGCBFAHHIHFHHHGCGGIIIHHHGE5!HFBDE@E@DGHHHFFFHIHHFFGFDC0?E?FHEHEHIIHHFHIHFHFFFFFFDDBDBADD?BB??! OVERLAP_LEN: 96 +HAMMING_DIST: 2 --- -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96:hamming=0 SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacactgaggaacgaaagcatgggtagcgaag SEQ_LEN: 401 SCORES: ?????BBBBBDDBDDBFFFFFFECHFHFHFHGHHFGD?!CFHHHHHH!DEEFFFFDFFHF@FHHHIFHEEHHHIIHHHIHIHHHDEHHHHFEEFFF?FEEEEFEEFFEE:!CEEFEFFFBEEEEEDEFE::AE:?AECEFEF?A!;D88;CEEEC@@668CCBC??C;8?+02>CEA>CB@;;?8@B@CCDE@D@?5@B>ABB:2::1>CDB=/@>BB@<19=4?6:7@A6@=?36875:8?A=DBDC@ACC;EEEEEEDEEDD;=DFB?FFDFFFB?C=FFFF?HHFDD@.DCFHHFHHHGFHHFHHGECBFFFEF?CHFGEA??HHHIHHHFC!E@DDEDDFHHHDHGGFBE?C=FF?CC?=F?CHFA9C0GBHFFHF;HFFF?FFBBBBB?BB?B??! OVERLAP_LEN: 96 +HAMMING_DIST: 0 --- -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96:hamming=1 SEQ: agggaatcttgcacaatgggggaaaccctgatgcagcgatgccgcgtgagtgaagaaggcccttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagtggaatttctagtgtagaggtgaaattcgtagatattagaaagaacatcaaaagcgaaggcaactttctggatcattactgacactgaggaacgaaagcatgggtagcgaagagg SEQ_LEN: 403 SCORES: !???B-!-?!BBBBBFFF;!CEEECC;EFFH=EGBE=CEFHD@CDCE!+!5C=FBAEADHFEHHHHHHECCG=CD=CCF=DC=DDACEE5:DEEEECBECEC3?3;?B:@CEEECEEC?CC????E?CAAAE?ACEEEEEEEE????A;)::ABCEB8CA669967)2955CA@=FEB=??<2:>?@6@7B7>449*<<9<957:9D;AD88;60?CEB2@8:;:B?>@749-663=<2-6<>46>A=64.EFEEEEDDEFBDDFFFFFFHHHHFFCC.?HFFCFHFDFDDFFGBBHHHHFGDHHGGBHIHHIIHHHDGHHHFECHHIIHFHGEA+HHFC@EHIIHHGFEGHHFGDHGDDC0?GGFGC90A0DBFFIIHFHFFFFFBBBBBDBB??BB????? OVERLAP_LEN: 96 +HAMMING_DIST: 1 --- -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13072:2276 1:N:0:14:overlap=9 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13072:2276 1:N:0:14:overlap=9:hamming=0 SEQ: gggaattttgcgcaatgggcgaaagcctgacgcagcaacgccgcgggatcgaagaagctctgcggagtgtaaagatctgtcataagggaagaataacgagtattctaacaaaatattcgtctgacggtaccttataagaaagccacggctaacttcgtgccagcagccgcggtaatacgaggggggcaagcgttatccggaatcattgggcgtaaaggggtcgtatgaggACTGATCAGttggaattaaaagaccgcggcttaaccgggggaacggtttcaatactgtcagtcttgagtgaggtagaggtaagcggaattcctggtgtagcggtgaaatgcgtagatatcaggaggaacatcaaaggcgaaggcagcttactgggcctatactgacgctgaggaacgaaagcgtggggagcaaac SEQ_LEN: 425 SCORES: ?????B?BB!-5@BC9CFFB!+C!C!E?@B=!,!5!FE,!77!CE)!!:CD=@DEFHDDBCFFEE8:=@,BD=,3;BEEEE?;BCCECCCE?B;?B3:AA88?:**:CAAA88:::::??8*)AC).8?)*:???:0?CEAAE8**4;8;AA:??:8:??8?EE:AC?;42.)4?*::8).'''..'.8C8A)8AAE:A;?'0:?CAEE?8''4A8?*8'0)''8*0*/:2?.?32;816666..//((6/'(.'''6(6;64-2'8(6///-'96;/(;8@;*78*;@@@@9**0*88;@0*2*9@3*D199;99DDD99D@EDECCDDDDDEEDD9EDDDDEEEDDEC!DE@AC5A=CCAE!5C9+E=@DB@DFFC=C+EEEA9.A999EC7@7-C!A/ECAAEEEEEEC@@@@@-5-!!!!! OVERLAP_LEN: 9 +HAMMING_DIST: 0 --- diff --git a/bp_test/out/assemble_pairs.out.2 b/bp_test/out/assemble_pairs.out.2 index a11b38c..1bf0ec5 100644 --- a/bp_test/out/assemble_pairs.out.2 +++ b/bp_test/out/assemble_pairs.out.2 @@ -1,18 +1,21 @@ -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96:hamming=2 SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacgAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTGccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacgctgaggaacgaaagcatgggtagcgaaga SEQ_LEN: 402 SCORES: !???9?BBBDBDDBDDFFFFFFHHHIFHFHHIHHFHHHHEDDEGHHHCEHE?EFHFGHFHFHIHIIIHCECEHHHIFHIIIHGHFHHHHHHEEFFFFFEEFFFEFFFEEEEEFFFFFFFCEFBEEDEFFFFFFEFEFFEEFFFFDDDDA?EEEFFB@C?8B=:7785;660@?@FEB?7B;?2BBA?CED@@@B?=5@DEFD??;8@E0@BEC788>@?95*4-:=7BEB8B7BB2@B?8&+98>2CDBB>A=AEECCEEEEDB=FFEFEEEDEFFFBFFFFF?.HFDBDD?FFHFHFFFHFFFHHGGGGCBFAHHIHFHHHGCGGIIIHHHGE5!HFBDE@E@DGHHHFFFHIHHFFGFDC0?E?FHEHEHIIHHFHIHFHFFFFFFDDBDBADD?BB??! OVERLAP_LEN: 96 +HAMMING_DIST: 2 --- -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96:hamming=0 SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacactgaggaacgaaagcatgggtagcgaag SEQ_LEN: 401 SCORES: ?????BBBBBDDBDDBFFFFFFECHFHFHFHGHHFGD?!CFHHHHHH!DEEFFFFDFFHF@FHHHIFHEEHHHIIHHHIHIHHHDEHHHHFEEFFF?FEEEEFEEFFEE:!CEEFEFFFBEEEEEDEFE::AE:?AECEFEF?A!;D88;CEEEC@@668CCBC??C;8?+02>CEA>CB@;;?8@B@CCDE@D@?5@B>ABB:2::1>CDB=/@>BB@<19=4?6:7@A6@=?36875:8?A=DBDC@ACC;EEEEEEDEEDD;=DFB?FFDFFFB?C=FFFF?HHFDD@.DCFHHFHHHGFHHFHHGECBFFFEF?CHFGEA??HHHIHHHFC!E@DDEDDFHHHDHGGFBE?C=FF?CC?=F?CHFA9C0GBHFFHF;HFFF?FFBBBBB?BB?B??! OVERLAP_LEN: 96 +HAMMING_DIST: 0 --- -SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96 +SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96:hamming=1 SEQ: agggaatcttgcacaatgggggaaaccctgatgcagcgatgccgcgtgagtgaagaaggcccttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagtggaatttctagtgtagaggtgaaattcgtagatattagaaagaacatcaaaagcgaaggcaactttctggatcattactgacactgaggaacgaaagcatgggtagcgaagagg SEQ_LEN: 403 SCORES: !???B-!-?!BBBBBFFF;!CEEECC;EFFH=EGBE=CEFHD@CDCE!+!5C=FBAEADHFEHHHHHHECCG=CD=CCF=DC=DDACEE5:DEEEECBECEC3?3;?B:@CEEECEEC?CC????E?CAAAE?ACEEEEEEEE????A;)::ABCEB8CA669967)2955CA@=FEB=??<2:>?@6@7B7>449*<<9<957:9D;AD88;60?CEB2@8:;:B?>@749-663=<2-6<>46>A=64.EFEEEEDDEFBDDFFFFFFHHHHFFCC.?HFFCFHFDFDDFFGBBHHHHFGDHHGGBHIHHIIHHHDGHHHFECHHIIHFHGEA+HHFC@EHIIHHGFEGHHFGDHGDDC0?GGFGC90A0DBFFIIHFHFFFFFBBBBBDBB??BB????? OVERLAP_LEN: 96 +HAMMING_DIST: 1 --- diff --git a/code_ruby/lib/maasha/seq/assemble.rb b/code_ruby/lib/maasha/seq/assemble.rb index c6f528b..bab869c 100644 --- a/code_ruby/lib/maasha/seq/assemble.rb +++ b/code_ruby/lib/maasha/seq/assemble.rb @@ -39,6 +39,9 @@ class Assemble def initialize(entry1, entry2, options) @entry1 = entry1 @entry2 = entry2 + @overlap = 0 + @offset1 = 0 + @offset2 = 0 @options = options @options[:mismatches_max] ||= 0 @options[:overlap_min] ||= 1 @@ -47,41 +50,70 @@ class Assemble # Method to locate overlapping matches between two sequences. def match if @options[:overlap_max] - overlap = [@options[:overlap_max], @entry1.length, @entry2.length].min + @overlap = [@options[:overlap_max], @entry1.length, @entry2.length].min else - overlap = [@entry1.length, @entry2.length].min + @overlap = [@entry1.length, @entry2.length].min end - while overlap >= @options[:overlap_min] - mismatches_max = (overlap * @options[:mismatches_max] * 0.01).round - - offset1 = @entry2.length - overlap - offset2 = 0 + diff = @entry1.length - @entry2.length + diff = 0 if diff < 0 - if match_C(@entry1.seq, @entry2.seq, offset1, offset2, overlap, mismatches_max) - entry_left = @entry1[0 ... @entry1.length - overlap] - entry_right = @entry2[overlap .. -1] + @offset1 = @entry1.length - @overlap - diff - if @entry1.qual and @entry2.qual - entry_overlap1 = @entry1[-1 * overlap .. -1] - entry_overlap2 = @entry2[0 ... overlap] + while @overlap >= @options[:overlap_min] + mismatches_max = (@overlap * @options[:mismatches_max] * 0.01).round + + # puts "diff: #{diff} offset1: #{@offset1} offset2: #{@offset2} overlap: #{@overlap}" - entry_overlap = merge_overlap(entry_overlap1, entry_overlap2) - else - entry_overlap = @entry1[-1 * overlap .. -1] - end - - entry_left.seq.downcase! - entry_overlap.seq.upcase! - entry_right.seq.downcase! + if mismatches = match_C(@entry1.seq, @entry2.seq, @offset1, @offset2, @overlap, mismatches_max) and mismatches >= 0 entry_merged = entry_left + entry_overlap + entry_right - entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}" + entry_merged.seq_name = @entry1.seq_name + ":overlap=#{@overlap}:hamming=#{mismatches}" return entry_merged end - overlap -= 1 + if diff > 0 + diff -= 1 + else + @overlap -= 1 + end + + @offset1 += 1 + end + end + + # Method to extract and downcase the left part of an assembled pair. + def entry_left + entry = @entry1[0 ... @offset1] + entry.seq.downcase! + entry + end + + # Method to extract and downcase the right part of an assembled pair. + def entry_right + if @entry1.length > @offset1 + @overlap + entry = @entry1[@offset1 + @overlap .. -1] + else + entry = @entry2[@offset2 + @overlap .. -1] + end + + entry.seq.downcase! + entry + end + + # Method to extract and upcase the overlapping part of an assembled pair. + def entry_overlap + if @entry1.qual and @entry2.qual + entry_overlap1 = @entry1[@offset1 ... @offset1 + @overlap] + entry_overlap2 = @entry2[@offset2 ... @offset2 + @overlap] + + entry = merge_overlap(entry_overlap1, entry_overlap2) + else + entry = @entry1[@offset1 ... @offset1 + @overlap] end + + entry.seq.upcase! + entry end # Method to merge sequence and quality scores in an overlap. @@ -111,7 +143,7 @@ class Assemble # C method for determining if two strings of equal length match # given a maximum allowed mismatches and allowing for IUPAC - # ambiguity codes. Returns true if match, else false. + # ambiguity codes. Returns number of mismatches is true if match, else false. builder.c %{ VALUE match_C( VALUE _string1, // String 1 @@ -141,7 +173,7 @@ class Assemble match++; if (match >= max_match) { - return Qtrue; + return UINT2NUM(mismatch); } } else @@ -149,12 +181,12 @@ class Assemble mismatch++; if (mismatch > max_mismatch) { - return Qfalse; + return INT2NUM(-1); } } } - return Qfalse; + return INT2NUM(-1); } } end diff --git a/code_ruby/test/maasha/seq/test_assemble.rb b/code_ruby/test/maasha/seq/test_assemble.rb index bd1baf0..9317408 100755 --- a/code_ruby/test/maasha/seq/test_assemble.rb +++ b/code_ruby/test/maasha/seq/test_assemble.rb @@ -18,6 +18,38 @@ class TestAssemble < Test::Unit::TestCase assert_equal("ATCG", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "atcg")).seq) end + test "Assemble.pair with first sequence longer than second and terminal overlap returns correctly" do + e1 = Seq.new(seq_name: "t1", seq: "AGGCGT") + e2 = Seq.new(seq_name: "t2", seq: "GT") + a = Assemble.pair(e1, e2) + assert_equal("t1:overlap=2:hamming=0", a.seq_name) + assert_equal("aggcGT", a.seq) + end + + test "Assemble.pair with first sequence longer than second and internal overlap returns correctly" do + e1 = Seq.new(seq_name: "t1", seq: "AGGCGT") + e2 = Seq.new(seq_name: "t2", seq: "GC") + a = Assemble.pair(e1, e2) + assert_equal("t1:overlap=2:hamming=0", a.seq_name) + assert_equal("agGCgt", a.seq) + end + + test "Assemble.pair with first sequence longer than second and initial overlap returns correctly" do + e1 = Seq.new(seq_name: "t1", seq: "GTCAGA") + e2 = Seq.new(seq_name: "t2", seq: "GT") + a = Assemble.pair(e1, e2) + assert_equal("t1:overlap=2:hamming=0", a.seq_name) + assert_equal("GTcaga", a.seq) + end + + test "Assemble.pair with first sequence shorter than second and initial overlap returns correctly" do + e1 = Seq.new(seq_name: "t1", seq: "AG") + e2 = Seq.new(seq_name: "t2", seq: "AGTCAG") + a = Assemble.pair(e1, e2) + assert_equal("t1:overlap=2:hamming=0", a.seq_name) + assert_equal("AGtcag", a.seq) + end + test "Assemble.pair with overlap and overlap_min returns correctly" do assert_nil(Assemble.pair(Seq.new(seq: "atcg"), Seq.new(seq: "gatc"), :overlap_min => 2)) assert_equal("atCGat", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "cgat"), :overlap_min => 2).seq) @@ -25,7 +57,7 @@ class TestAssemble < Test::Unit::TestCase test "Assemble.pair with overlap and overlap_max returns correctly" do assert_equal("aTCGa", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "tcga"), :overlap_max => 3).seq) - assert_nil(Assemble.pair(Seq.new(seq: "atcg"), Seq.new(seq: "atcg"), :overlap_max => 3)) + assert_nil(Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "atcg"), :overlap_max => 3)) end test "Assemble.pair with overlap returns correct quality" do -- 2.39.2