end
casts = []
-casts << {:long=>'mismatches', :short=>'m', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'overlap_min', :short=>'o', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"}
-casts << {:long=>'overlap_max', :short=>'p', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>"0"}
+casts << {long: 'mismatches', short: 'm', type: 'uint', mandatory: false, default: 5, allowed: nil, disallowed: nil}
+casts << {long: 'overlap_min', short: 'o', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"}
+casts << {long: 'overlap_max', short: 'p', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"}
options = Biopieces.options_parse(ARGV, casts)
merged = Assemble.pair(
entry1,
entry2,
- mismatches_max:options[:mismatches],
- overlap_min:options[:overlap_min],
- overlap_max:options[:overlap_max]
+ mismatches_max: options[:mismatches],
+ overlap_min: options[:overlap_min],
+ overlap_max: options[:overlap_max]
)
if merged
new_record = merged.to_bp
- if merged.seq_name =~ /overlap=(\d+)$/
- new_record[:OVERLAP_LEN] = $1
+ if merged.seq_name =~ /overlap=(\d+):hamming=(\d+)$/
+ new_record[:OVERLAP_LEN] = $1
+ new_record[:HAMMING_DIST] = $2
end
output.puts new_record
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14:overlap=49
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14862:1868 1:N:0:14:overlap=49:hamming=1
SEQ: tggggaatattggacaatgggggcaaccctgatccagcaataccgcgtgtgtgaagaaggcctgagggttgtaaagcactttcaattgtgaagaaaagttaacggttaataaccgttagccttgacgttaactttagaagaagcaccggctaactccgtgccagcagccgcggtaatacggagggtgcaagcgttaatcgGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTTTATTAAGTCAGATGTGaaagccccgggcttaacctgggaactgcatttgaaactggtcaactagagtatggtagaggaaagtggaatttctggtgtagcggtgaaatgcgtagatatcagaaggaacatcaatggcgaaggcagctttctggaccaatactgacgctgaggtacgaaagcgtgggtagcaaacagg
SEQ_LEN: 429
SCORES: !??????BDDDDDDDDGGGGGGGHHIIIEHIHHFGGHFHHGHFHHDHEHEHHFAFFGFFHFHHFFHHHEFEEHHHHHHHHHHHHFFFHFHHHHHHHHHHHHGBDEGGGGGGGGGGGGGGGGEGEGGGGGCEGGGGECCECEEECGGG!ADGCGGGEGGEGGGGGEGCE8!2!DC!EEEGGC?!DGCCCEC:C?CCEGGGG??288<8B47>43,(195??=36)745<6.;:=?D?@6AB?@D8?@C=?AA;4'8D8?:::A?1*)=,,==EC==,,ACFCAAEBC=AEFCEBEDBDEEDED=EDD=?BFBFDFB!DFF@FFFHHHHHHHHHGGGDDHHHHFHDHIHHHHIIIIIHHFGCFFHHE!EEDFG?HIHGFGGFFFFF?CDCE?9FEHDHHGE;F;IHFFHFFFEEFDDDDDB!-!BB?????
OVERLAP_LEN: 49
+HAMMING_DIST: 1
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96:hamming=2
SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacgAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTGccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacgctgaggaacgaaagcatgggtagcgaaga
SEQ_LEN: 402
SCORES: !???9?BBBDBDDBDDFFFFFFHHHIFHFHHIHHFHHHHEDDEGHHHCEHE?EFHFGHFHFHIHIIIHCECEHHHIFHIIIHGHFHHHHHHEEFFFFFEEFFFEFFFEEEEEFFFFFFFCEFBEEDEFFFFFFEFEFFEEFFFFDDDDA?EEEFFB@C?8B=:7785;660@?@FEB?7B;?2BBA?CED@@@B?=5@DEFD??;8@E0@BEC788>@?95*4-:=7BEB8B7BB2@B?8&+98>2CDBB>A=AEECCEEEEDB=FFEFEEEDEFFFBFFFFF?.HFDBDD?FFHFHFFFHFFFHHGGGGCBFAHHIHFHHHGCGGIIIHHHGE5!HFBDE@E@DGHHHFFFHIHHFFGFDC0?E?FHEHEHIIHHFHIHFHFFFFFFDDBDBADD?BB??!
OVERLAP_LEN: 96
+HAMMING_DIST: 2
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96:hamming=0
SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacactgaggaacgaaagcatgggtagcgaag
SEQ_LEN: 401
SCORES: ?????BBBBBDDBDDBFFFFFFECHFHFHFHGHHFGD?!CFHHHHHH!DEEFFFFDFFHF@FHHHIFHEEHHHIIHHHIHIHHHDEHHHHFEEFFF?FEEEEFEEFFEE:!CEEFEFFFBEEEEEDEFE::AE:?AECEFEF?A!;D88;CEEEC@@668CCBC??C;8?+02>CEA>CB@;;?8@B@CCDE@D@?5@B>ABB:2::1>CDB=/@>BB@<19=4?6:7@A6@=?36875:8?A=DBDC@ACC;EEEEEEDEEDD;=DFB?FFDFFFB?C=FFFF?HHFDD@.DCFHHFHHHGFHHFHHGECBFFFEF?CHFGEA??HHHIHHHFC!E@DDEDDFHHHDHGGFBE?C=FF?CC?=F?CHFA9C0GBHFFHF;HFFF?FFBBBBB?BB?B??!
OVERLAP_LEN: 96
+HAMMING_DIST: 0
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96:hamming=1
SEQ: agggaatcttgcacaatgggggaaaccctgatgcagcgatgccgcgtgagtgaagaaggcccttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagtggaatttctagtgtagaggtgaaattcgtagatattagaaagaacatcaaaagcgaaggcaactttctggatcattactgacactgaggaacgaaagcatgggtagcgaagagg
SEQ_LEN: 403
SCORES: !???B-!-?!BBBBBFFF;!CEEECC;EFFH=EGBE=CEFHD@CDCE!+!5C=FBAEADHFEHHHHHHECCG=CD=CCF=DC=DDACEE5:DEEEECBECEC3?3;?B:@CEEECEEC?CC????E?CAAAE?ACEEEEEEEE????A;)::ABCEB8CA669967)2955CA@=FEB=??<2:>?@6@7B7>449*<<9<957:9D;AD88;60?CEB2@8:;:B?>@749-663=<2-6<>46>A=64.EFEEEEDDEFBDDFFFFFFHHHHFFCC.?HFFCFHFDFDDFFGBBHHHHFGDHHGGBHIHHIIHHHDGHHHFECHHIIHFHGEA+HHFC@EHIIHHGFEGHHFGDHGDDC0?GGFGC90A0DBFFIIHFHFFFFFBBBBBDBB??BB?????
OVERLAP_LEN: 96
+HAMMING_DIST: 1
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13072:2276 1:N:0:14:overlap=9
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13072:2276 1:N:0:14:overlap=9:hamming=0
SEQ: gggaattttgcgcaatgggcgaaagcctgacgcagcaacgccgcgggatcgaagaagctctgcggagtgtaaagatctgtcataagggaagaataacgagtattctaacaaaatattcgtctgacggtaccttataagaaagccacggctaacttcgtgccagcagccgcggtaatacgaggggggcaagcgttatccggaatcattgggcgtaaaggggtcgtatgaggACTGATCAGttggaattaaaagaccgcggcttaaccgggggaacggtttcaatactgtcagtcttgagtgaggtagaggtaagcggaattcctggtgtagcggtgaaatgcgtagatatcaggaggaacatcaaaggcgaaggcagcttactgggcctatactgacgctgaggaacgaaagcgtggggagcaaac
SEQ_LEN: 425
SCORES: ?????B?BB!-5@BC9CFFB!+C!C!E?@B=!,!5!FE,!77!CE)!!:CD=@DEFHDDBCFFEE8:=@,BD=,3;BEEEE?;BCCECCCE?B;?B3:AA88?:**:CAAA88:::::??8*)AC).8?)*:???:0?CEAAE8**4;8;AA:??:8:??8?EE:AC?;42.)4?*::8).'''..'.8C8A)8AAE:A;?'0:?CAEE?8''4A8?*8'0)''8*0*/:2?.?32;816666..//((6/'(.'''6(6;64-2'8(6///-'96;/(;8@;*78*;@@@@9**0*88;@0*2*9@3*D199;99DDD99D@EDECCDDDDDEEDD9EDDDDEEEDDEC!DE@AC5A=CCAE!5C9+E=@DB@DFFC=C+EEEA9.A999EC7@7-C!A/ECAAEEEEEEC@@@@@-5-!!!!!
OVERLAP_LEN: 9
+HAMMING_DIST: 0
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:13906:2139 1:N:0:14:overlap=96:hamming=2
SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacgAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTGccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacgctgaggaacgaaagcatgggtagcgaaga
SEQ_LEN: 402
SCORES: !???9?BBBDBDDBDDFFFFFFHHHIFHFHHIHHFHHHHEDDEGHHHCEHE?EFHFGHFHFHIHIIIHCECEHHHIFHIIIHGHFHHHHHHEEFFFFFEEFFFEFFFEEEEEFFFFFFFCEFBEEDEFFFFFFEFEFFEEFFFFDDDDA?EEEFFB@C?8B=:7785;660@?@FEB?7B;?2BBA?CED@@@B?=5@DEFD??;8@E0@BEC788>@?95*4-:=7BEB8B7BB2@B?8&+98>2CDBB>A=AEECCEEEEDB=FFEFEEEDEFFFBFFFFF?.HFDBDD?FFHFHFFFHFFFHHGGGGCBFAHHIHFHHHGCGGIIIHHHGE5!HFBDE@E@DGHHHFFFHIHHFFGFDC0?E?FHEHEHIIHHFHIHFHFFFFFFDDBDBADD?BB??!
OVERLAP_LEN: 96
+HAMMING_DIST: 2
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:14865:2158 1:N:0:14:overlap=96:hamming=0
SEQ: tagggaatcttgcacaatggaggaaactctgatgcagcgatgccgcgtgagtgaagaaggcctttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGGTGGTGAAATCCCAGAGCTTAACTCTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagcagaatttctagtgtagaggtgaaattcgtagatattagaaagaataccgattgcgaaggcagctttctggatcattactgacactgaggaacgaaagcatgggtagcgaag
SEQ_LEN: 401
SCORES: ?????BBBBBDDBDDBFFFFFFECHFHFHFHGHHFGD?!CFHHHHHH!DEEFFFFDFFHF@FHHHIFHEEHHHIIHHHIHIHHHDEHHHHFEEFFF?FEEEEFEEFFEE:!CEEFEFFFBEEEEEDEFE::AE:?AECEFEF?A!;D88;CEEEC@@668CCBC??C;8?+02>CEA>CB@;;?8@B@CCDE@D@?5@B>ABB:2::1>CDB=/@>BB@<19=4?6:7@A6@=?36875:8?A=DBDC@ACC;EEEEEEDEEDD;=DFB?FFDFFFB?C=FFFF?HHFDD@.DCFHHFHHHGFHHFHHGECBFFFEF?CHFGEA??HHHIHHHFC!E@DDEDDFHHHDHGGFBE?C=FF?CC?=F?CHFA9C0GBHFFHF;HFFF?FFBBBBB?BB?B??!
OVERLAP_LEN: 96
+HAMMING_DIST: 0
---
-SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96
+SEQ_NAME: M01168:16:000000000-A1R9L:1:1101:17246:2253 1:N:0:14:overlap=96:hamming=1
SEQ: agggaatcttgcacaatgggggaaaccctgatgcagcgatgccgcgtgagtgaagaaggcccttgggttgtaaagctctttcgtcggggaagaaaatgactgtacccgaataagaaggtccggctaacttcgtgccagcagccgcggtaatacGAAGGGACCTAGCGTAGTTCGGAATTACTGGGCTTAAAGAGTTCGTAGGTGGTTAAAAAAGTTGATGGTGAAATCCCAAGGCTCAACCTTGGAACTgccatcaaaactttttagctagagtatgatagaggaaagtggaatttctagtgtagaggtgaaattcgtagatattagaaagaacatcaaaagcgaaggcaactttctggatcattactgacactgaggaacgaaagcatgggtagcgaagagg
SEQ_LEN: 403
SCORES: !???B-!-?!BBBBBFFF;!CEEECC;EFFH=EGBE=CEFHD@CDCE!+!5C=FBAEADHFEHHHHHHECCG=CD=CCF=DC=DDACEE5:DEEEECBECEC3?3;?B:@CEEECEEC?CC????E?CAAAE?ACEEEEEEEE????A;)::ABCEB8CA669967)2955CA@=FEB=??<2:>?@6@7B7>449*<<9<957:9D;AD88;60?CEB2@8:;:B?>@749-663=<2-6<>46>A=64.EFEEEEDDEFBDDFFFFFFHHHHFFCC.?HFFCFHFDFDDFFGBBHHHHFGDHHGGBHIHHIIHHHDGHHHFECHHIIHFHGEA+HHFC@EHIIHHGFEGHHFGDHGDDC0?GGFGC90A0DBFFIIHFHFFFFFBBBBBDBB??BB?????
OVERLAP_LEN: 96
+HAMMING_DIST: 1
---
def initialize(entry1, entry2, options)
@entry1 = entry1
@entry2 = entry2
+ @overlap = 0
+ @offset1 = 0
+ @offset2 = 0
@options = options
@options[:mismatches_max] ||= 0
@options[:overlap_min] ||= 1
# Method to locate overlapping matches between two sequences.
def match
if @options[:overlap_max]
- overlap = [@options[:overlap_max], @entry1.length, @entry2.length].min
+ @overlap = [@options[:overlap_max], @entry1.length, @entry2.length].min
else
- overlap = [@entry1.length, @entry2.length].min
+ @overlap = [@entry1.length, @entry2.length].min
end
- while overlap >= @options[:overlap_min]
- mismatches_max = (overlap * @options[:mismatches_max] * 0.01).round
-
- offset1 = @entry2.length - overlap
- offset2 = 0
+ diff = @entry1.length - @entry2.length
+ diff = 0 if diff < 0
- if match_C(@entry1.seq, @entry2.seq, offset1, offset2, overlap, mismatches_max)
- entry_left = @entry1[0 ... @entry1.length - overlap]
- entry_right = @entry2[overlap .. -1]
+ @offset1 = @entry1.length - @overlap - diff
- if @entry1.qual and @entry2.qual
- entry_overlap1 = @entry1[-1 * overlap .. -1]
- entry_overlap2 = @entry2[0 ... overlap]
+ while @overlap >= @options[:overlap_min]
+ mismatches_max = (@overlap * @options[:mismatches_max] * 0.01).round
+
+ # puts "diff: #{diff} offset1: #{@offset1} offset2: #{@offset2} overlap: #{@overlap}"
- entry_overlap = merge_overlap(entry_overlap1, entry_overlap2)
- else
- entry_overlap = @entry1[-1 * overlap .. -1]
- end
-
- entry_left.seq.downcase!
- entry_overlap.seq.upcase!
- entry_right.seq.downcase!
+ if mismatches = match_C(@entry1.seq, @entry2.seq, @offset1, @offset2, @overlap, mismatches_max) and mismatches >= 0
entry_merged = entry_left + entry_overlap + entry_right
- entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}"
+ entry_merged.seq_name = @entry1.seq_name + ":overlap=#{@overlap}:hamming=#{mismatches}"
return entry_merged
end
- overlap -= 1
+ if diff > 0
+ diff -= 1
+ else
+ @overlap -= 1
+ end
+
+ @offset1 += 1
+ end
+ end
+
+ # Method to extract and downcase the left part of an assembled pair.
+ def entry_left
+ entry = @entry1[0 ... @offset1]
+ entry.seq.downcase!
+ entry
+ end
+
+ # Method to extract and downcase the right part of an assembled pair.
+ def entry_right
+ if @entry1.length > @offset1 + @overlap
+ entry = @entry1[@offset1 + @overlap .. -1]
+ else
+ entry = @entry2[@offset2 + @overlap .. -1]
+ end
+
+ entry.seq.downcase!
+ entry
+ end
+
+ # Method to extract and upcase the overlapping part of an assembled pair.
+ def entry_overlap
+ if @entry1.qual and @entry2.qual
+ entry_overlap1 = @entry1[@offset1 ... @offset1 + @overlap]
+ entry_overlap2 = @entry2[@offset2 ... @offset2 + @overlap]
+
+ entry = merge_overlap(entry_overlap1, entry_overlap2)
+ else
+ entry = @entry1[@offset1 ... @offset1 + @overlap]
end
+
+ entry.seq.upcase!
+ entry
end
# Method to merge sequence and quality scores in an overlap.
# C method for determining if two strings of equal length match
# given a maximum allowed mismatches and allowing for IUPAC
- # ambiguity codes. Returns true if match, else false.
+ # ambiguity codes. Returns number of mismatches is true if match, else false.
builder.c %{
VALUE match_C(
VALUE _string1, // String 1
match++;
if (match >= max_match) {
- return Qtrue;
+ return UINT2NUM(mismatch);
}
}
else
mismatch++;
if (mismatch > max_mismatch) {
- return Qfalse;
+ return INT2NUM(-1);
}
}
}
- return Qfalse;
+ return INT2NUM(-1);
}
}
end
assert_equal("ATCG", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "atcg")).seq)
end
+ test "Assemble.pair with first sequence longer than second and terminal overlap returns correctly" do
+ e1 = Seq.new(seq_name: "t1", seq: "AGGCGT")
+ e2 = Seq.new(seq_name: "t2", seq: "GT")
+ a = Assemble.pair(e1, e2)
+ assert_equal("t1:overlap=2:hamming=0", a.seq_name)
+ assert_equal("aggcGT", a.seq)
+ end
+
+ test "Assemble.pair with first sequence longer than second and internal overlap returns correctly" do
+ e1 = Seq.new(seq_name: "t1", seq: "AGGCGT")
+ e2 = Seq.new(seq_name: "t2", seq: "GC")
+ a = Assemble.pair(e1, e2)
+ assert_equal("t1:overlap=2:hamming=0", a.seq_name)
+ assert_equal("agGCgt", a.seq)
+ end
+
+ test "Assemble.pair with first sequence longer than second and initial overlap returns correctly" do
+ e1 = Seq.new(seq_name: "t1", seq: "GTCAGA")
+ e2 = Seq.new(seq_name: "t2", seq: "GT")
+ a = Assemble.pair(e1, e2)
+ assert_equal("t1:overlap=2:hamming=0", a.seq_name)
+ assert_equal("GTcaga", a.seq)
+ end
+
+ test "Assemble.pair with first sequence shorter than second and initial overlap returns correctly" do
+ e1 = Seq.new(seq_name: "t1", seq: "AG")
+ e2 = Seq.new(seq_name: "t2", seq: "AGTCAG")
+ a = Assemble.pair(e1, e2)
+ assert_equal("t1:overlap=2:hamming=0", a.seq_name)
+ assert_equal("AGtcag", a.seq)
+ end
+
test "Assemble.pair with overlap and overlap_min returns correctly" do
assert_nil(Assemble.pair(Seq.new(seq: "atcg"), Seq.new(seq: "gatc"), :overlap_min => 2))
assert_equal("atCGat", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "cgat"), :overlap_min => 2).seq)
test "Assemble.pair with overlap and overlap_max returns correctly" do
assert_equal("aTCGa", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "tcga"), :overlap_max => 3).seq)
- assert_nil(Assemble.pair(Seq.new(seq: "atcg"), Seq.new(seq: "atcg"), :overlap_max => 3))
+ assert_nil(Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "atcg"), :overlap_max => 3))
end
test "Assemble.pair with overlap returns correct quality" do