From bb827f984d41390cbcf7a478ae1c6831dbf1dd99 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 9 Sep 2013 18:58:53 +0000 Subject: [PATCH] fixing usearch upgrade git-svn-id: http://biopieces.googlecode.com/svn/trunk@2190 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/uclust_seq | 7 +- bp_bin/usearch_seq | 23 +- bp_test/in/usearch_seq.in | 28 +++ bp_test/in/usearch_seq.in.db | 14 ++ bp_test/lib/test.sh | 1 + bp_test/out/uclust_seq.out.2 | 14 +- bp_test/out/uclust_seq.out.3 | 8 +- bp_test/out/uclust_seq.out.4 | 42 ++++ bp_test/out/usearch_seq.out.1 | 253 ++++++++++++++++++++ bp_test/out/usearch_seq.out.2 | 403 ++++++++++++++++++++++++++++++++ bp_test/out/usearch_seq.out.3 | 253 ++++++++++++++++++++ bp_test/out/usearch_seq.out.4 | 253 ++++++++++++++++++++ bp_test/out/usearch_seq.out.5 | 178 ++++++++++++++ bp_test/test/test_uclust_seq | 8 +- bp_test/test/test_usearch_seq | 23 ++ code_ruby/lib/maasha/usearch.rb | 85 +++++-- 16 files changed, 1551 insertions(+), 42 deletions(-) create mode 100644 bp_test/in/usearch_seq.in create mode 100644 bp_test/in/usearch_seq.in.db create mode 100644 bp_test/out/uclust_seq.out.4 create mode 100644 bp_test/out/usearch_seq.out.1 create mode 100644 bp_test/out/usearch_seq.out.2 create mode 100644 bp_test/out/usearch_seq.out.3 create mode 100644 bp_test/out/usearch_seq.out.4 create mode 100644 bp_test/out/usearch_seq.out.5 create mode 100755 bp_test/test/test_usearch_seq diff --git a/bp_bin/uclust_seq b/bp_bin/uclust_seq index f155bdf..5a6d0ef 100755 --- a/bp_bin/uclust_seq +++ b/bp_bin/uclust_seq @@ -33,9 +33,10 @@ require 'maasha/fasta' require 'maasha/usearch' casts = [] -casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil} +casts << {long: 'no_sort', short: 'n', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'comp', short: 'c', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'identity', short: 'i', type: 'float', mandatory: true, default: 0.9, allowed: nil, disallowed: nil} +casts << {long: 'cpus', short: 'C', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"} options = Biopieces.options_parse(ARGV, casts) diff --git a/bp_bin/usearch_seq b/bp_bin/usearch_seq index c390708..d139957 100755 --- a/bp_bin/usearch_seq +++ b/bp_bin/usearch_seq @@ -33,13 +33,22 @@ require 'maasha/fasta' require 'maasha/usearch' casts = [] -casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {long: 'program', short: 'p', type: 'string', mandatory: false, default: 'global', allowed: "local,global", disallowed: nil} +casts << {long: 'database', short: 'd', type: 'file!', mandatory: true, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'identity', short: 'i', type: 'float', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'e_val', short: 'e', type: 'float', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'cpus', short: 'c', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"} +casts << {long: 'maxaccepts', short: 'm', type: 'uint', mandatory: false, default: 0, allowed: nil, disallowed: nil} options = Biopieces.options_parse(ARGV, casts) -raise ArgumentError, "--identity or --e_val must be specified" unless options[:identity] or options[:e_val] +if options[:program] == 'global' + raise ArgumentError, "--identity be specified for global search" unless options[:identity] + raise ArgumentError, "--e_val is invalid for global search" if options[:e_val] +else + raise ArgumentError, "--identity or --e_val must be specified" unless options[:identity] or options[:e_val] +end + tmpdir = Biopieces.mktmpdir infile = File.join(tmpdir, "in.fna") @@ -58,7 +67,11 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| us = Usearch.new(infile, outfile, options) - us.usearch + case options[:program] + when "local" then us.usearch_local + when "global" then us.usearch_global + else raise "no such program #{options[:program]}" + end us.each_hit do |record| output.puts record diff --git a/bp_test/in/usearch_seq.in b/bp_test/in/usearch_seq.in new file mode 100644 index 0000000..b53c0ae --- /dev/null +++ b/bp_test/in/usearch_seq.in @@ -0,0 +1,28 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +--- diff --git a/bp_test/in/usearch_seq.in.db b/bp_test/in/usearch_seq.in.db new file mode 100644 index 0000000..fc369b2 --- /dev/null +++ b/bp_test/in/usearch_seq.in.db @@ -0,0 +1,14 @@ +>test1 +tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +>test1_100 +tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +>test1_2 +tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +>test2 +ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +>test2_100 +ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +>test2_1 +ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +>test1_rc +cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca diff --git a/bp_test/lib/test.sh b/bp_test/lib/test.sh index f560e20..7ce4190 100755 --- a/bp_test/lib/test.sh +++ b/bp_test/lib/test.sh @@ -14,6 +14,7 @@ function run msg="${command/$BP_DIR/\$BP_DIR}" msg="${msg//$BP_TMP/\$BP_TMP}" + msg="${msg/$BP_DIR/\$BP_DIR}" echo -n "Testing $msg ... " eval $command > /dev/null 2>&1 diff --git a/bp_test/out/uclust_seq.out.2 b/bp_test/out/uclust_seq.out.2 index 5c1b6e4..43691ea 100644 --- a/bp_test/out/uclust_seq.out.2 +++ b/bp_test/out/uclust_seq.out.2 @@ -1,20 +1,20 @@ SEQ_NAME: test1 SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg SEQ_LEN: 59 -CLUSTER: 3 +CLUSTER: 2 IDENT: * --- SEQ_NAME: test1_100 SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg SEQ_LEN: 59 -CLUSTER: 3 +CLUSTER: 2 IDENT: 100 --- SEQ_NAME: test1_2 SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg SEQ_LEN: 59 -CLUSTER: 4 -IDENT: * +CLUSTER: 2 +IDENT: 96 --- SEQ_NAME: test2 SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac @@ -31,12 +31,12 @@ IDENT: * SEQ_NAME: test2_1 SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac SEQ_LEN: 59 -CLUSTER: 1 -IDENT: * +CLUSTER: 0 +IDENT: 96 --- SEQ_NAME: test1_rc SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca SEQ_LEN: 59 -CLUSTER: 2 +CLUSTER: 1 IDENT: * --- diff --git a/bp_test/out/uclust_seq.out.3 b/bp_test/out/uclust_seq.out.3 index 8052dfc..5c1b6e4 100644 --- a/bp_test/out/uclust_seq.out.3 +++ b/bp_test/out/uclust_seq.out.3 @@ -1,19 +1,19 @@ SEQ_NAME: test1 SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg SEQ_LEN: 59 -CLUSTER: 2 -IDENT: 100 +CLUSTER: 3 +IDENT: * --- SEQ_NAME: test1_100 SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg SEQ_LEN: 59 -CLUSTER: 2 +CLUSTER: 3 IDENT: 100 --- SEQ_NAME: test1_2 SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg SEQ_LEN: 59 -CLUSTER: 3 +CLUSTER: 4 IDENT: * --- SEQ_NAME: test2 diff --git a/bp_test/out/uclust_seq.out.4 b/bp_test/out/uclust_seq.out.4 new file mode 100644 index 0000000..8052dfc --- /dev/null +++ b/bp_test/out/uclust_seq.out.4 @@ -0,0 +1,42 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +CLUSTER: 2 +IDENT: 100 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +CLUSTER: 2 +IDENT: 100 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +CLUSTER: 3 +IDENT: * +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +CLUSTER: 0 +IDENT: 100 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +CLUSTER: 0 +IDENT: * +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +CLUSTER: 1 +IDENT: * +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +CLUSTER: 2 +IDENT: * +--- diff --git a/bp_test/out/usearch_seq.out.1 b/bp_test/out/usearch_seq.out.1 new file mode 100644 index 0000000..75e33c7 --- /dev/null +++ b/bp_test/out/usearch_seq.out.1 @@ -0,0 +1,253 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1_2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- diff --git a/bp_test/out/usearch_seq.out.2 b/bp_test/out/usearch_seq.out.2 new file mode 100644 index 0000000..3739ccc --- /dev/null +++ b/bp_test/out/usearch_seq.out.2 @@ -0,0 +1,403 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 36 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 7 +Q_END: 42 +S_BEG: 16 +S_END: 51 +E_VAL: 1.1e-16 +SCORE: 67.6 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 2 +Q_END: 7 +S_BEG: 51 +S_END: 56 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 36 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 7 +Q_END: 42 +S_BEG: 16 +S_END: 51 +E_VAL: 1.1e-16 +SCORE: 67.6 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 2 +Q_END: 7 +S_BEG: 51 +S_END: 56 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 2 +Q_END: 7 +S_BEG: 2 +S_END: 7 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 2 +Q_END: 7 +S_BEG: 2 +S_END: 7 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1_2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 14 +Q_END: 19 +S_BEG: 14 +S_END: 19 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 53 +Q_END: 58 +S_BEG: 20 +S_END: 25 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 20 +Q_END: 25 +S_BEG: 53 +S_END: 58 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 20 +Q_END: 25 +S_BEG: 53 +S_END: 58 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.1e-26 +SCORE: 100.8 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 14 +Q_END: 19 +S_BEG: 14 +S_END: 19 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 6 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 53 +Q_END: 58 +S_BEG: 20 +S_END: 25 +E_VAL: 5.2 +SCORE: 12.2 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- diff --git a/bp_test/out/usearch_seq.out.3 b/bp_test/out/usearch_seq.out.3 new file mode 100644 index 0000000..dac569e --- /dev/null +++ b/bp_test/out/usearch_seq.out.3 @@ -0,0 +1,253 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1_2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-24 +SCORE: 93.5 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.1e-26 +SCORE: 100.8 +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: 1.8e-29 +SCORE: 110.1 +STRAND: + +--- diff --git a/bp_test/out/usearch_seq.out.4 b/bp_test/out/usearch_seq.out.4 new file mode 100644 index 0000000..75e33c7 --- /dev/null +++ b/bp_test/out/usearch_seq.out.4 @@ -0,0 +1,253 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1_2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- diff --git a/bp_test/out/usearch_seq.out.5 b/bp_test/out/usearch_seq.out.5 new file mode 100644 index 0000000..ab230fa --- /dev/null +++ b/bp_test/out/usearch_seq.out.5 @@ -0,0 +1,178 @@ +SEQ_NAME: test1 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_100 +SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test1_2 +SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg +SEQ_LEN: 59 +--- +SEQ_NAME: test2 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_100 +SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test2_1 +SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac +SEQ_LEN: 59 +--- +SEQ_NAME: test1_rc +SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca +SEQ_LEN: 59 +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_100 +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_2 +S_ID: test1_2 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_100 +S_ID: test2_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test2_1 +S_ID: test2_1 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_100 +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: - +--- +REC_TYPE: USEARCH +Q_ID: test1_rc +S_ID: test1_rc +IDENT: 100.0 +ALIGN_LEN: 59 +MISMATCHES: 0 +GAPS: 0 +Q_BEG: 0 +Q_END: 58 +S_BEG: 0 +S_END: 58 +E_VAL: * +SCORE: * +STRAND: + +--- diff --git a/bp_test/test/test_uclust_seq b/bp_test/test/test_uclust_seq index 583737b..d19ecfa 100755 --- a/bp_test/test/test_uclust_seq +++ b/bp_test/test/test_uclust_seq @@ -6,10 +6,14 @@ run "$bp -I $in -O $tmp" assert_no_diff $tmp $out.1 clean -run "$bp -I $in -i 1 -O $tmp" +run "$bp -I $in -C 2 -O $tmp" assert_no_diff $tmp $out.2 clean -run "$bp -I $in -i 1 -c -O $tmp" +run "$bp -I $in -i 1 -O $tmp" assert_no_diff $tmp $out.3 clean + +run "$bp -I $in -i 1 -c -O $tmp" +assert_no_diff $tmp $out.4 +clean diff --git a/bp_test/test/test_usearch_seq b/bp_test/test/test_usearch_seq new file mode 100755 index 0000000..394a602 --- /dev/null +++ b/bp_test/test/test_usearch_seq @@ -0,0 +1,23 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -I $in -d $in.db -i 1 -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -I $in -d $in.db -i 1 -p local -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in -d $in.db -i 0.99 -e 0.1 -p local -O $tmp" +assert_no_diff $tmp $out.3 +clean + +run "$bp -I $in -d $in.db -i 0.99 -O $tmp" +assert_no_diff $tmp $out.4 +clean + +run "$bp -I $in -d $in.db -i 1 -m 1 -O $tmp" +assert_no_diff $tmp $out.5 +clean diff --git a/code_ruby/lib/maasha/usearch.rb b/code_ruby/lib/maasha/usearch.rb index 0bc7235..6375791 100644 --- a/code_ruby/lib/maasha/usearch.rb +++ b/code_ruby/lib/maasha/usearch.rb @@ -43,7 +43,9 @@ class Usearch # according to decending sequence length. def sortbylength # usearch -sortbylength seqs.fasta -output seqs_sorted.fasta -minseqlength 64 - @command << "usearch -sortbylength #{@infile} -output #{@infile}.sort" + @command << "usearch" + @command << "-sortbylength #{@infile}" + @command << "-output #{@infile}.sort" execute @@ -54,7 +56,9 @@ class Usearch # according to cluster size. def sortbysize # usearch -sortbysize seqs.fasta -output seqs_sorted.fasta -minsize 4 - @command << "usearch -sortbysize #{@infile} -output #{@infile}.sort" + @command << "usearch" + @command << "-sortbysize #{@infile}" + @command << "-output #{@infile}.sort" execute @@ -64,7 +68,11 @@ class Usearch # Method to execute cluster_fast. def cluster_fast # usearch -cluster_fast query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc - @command << "usearch -cluster_fast #{@infile} -id #{@options[:identity]} -uc #{@outfile}" + @command << "usearch" + @command << "-cluster_fast #{@infile}" + @command << "-id #{@options[:identity]}" + @command << "-uc #{@outfile}" + @command << "-threads #{@options[:cpus]}" execute end @@ -73,21 +81,49 @@ class Usearch # NB sequences must be sorted with sortbylength or sortbysize. def cluster_smallmem # usearch -cluster_smallmem query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc - @command << "usearch -cluster_smallmem #{@infile} -id #{@options[:identity]} -uc #{@outfile}" + @command << "usearch" + @command << "-cluster_smallmem #{@infile}" + @command << "-id #{@options[:identity]}" + @command << "-uc #{@outfile}" @command << "-strand both" if @options[:comp] execute end - # Method to execute database search. - def usearch - @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}" - @command << "--userfields target+tloz+thiz+query+bits+strand" - @command << "--id #{@options[:identity]}" if @options[:identity] - @command << "--evalue #{@options[:e_val]}" if @options[:e_val] - @command << "--rev" + # Method to execute database local search. + def usearch_local + # usearch -usearch_local query.fasta -db proteins.udb -id 0.8 -alnout results.aln + # usearch -usearch_local query.fasta -db ESTs.fasta -id 0.9 -evalue 1e-6 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256 + + @command << "usearch" + @command << "-usearch_local #{@infile}" + @command << "-db #{@options[:database]}" + @command << "-blast6out #{@outfile}" + @command << "-strand both" + @command << "-id #{@options[:identity]}" if @options[:identity] + @command << "-evalue #{@options[:e_val]}" if @options[:e_val] + @command << "-maxaccepts #{@options[:maxaccepts]}" + @command << "-threads #{@options[:cpus]}" - execute + execute + end + + # Method to execute database global search. + def usearch_global + # usearch -usearch_global query.fasta -db proteins.udb -id 0.8 -alnout results.aln + # usearch -usearch_global query.fasta -db ESTs.fasta -id 0.9 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256 + + @command << "usearch" + @command << "-usearch_global #{@infile}" + @command << "-db #{@options[:database]}" + @command << "-blast6out #{@outfile}" + @command << "-strand both" + @command << "-id #{@options[:identity]}" + @command << "-evalue #{@options[:e_val]}" if @options[:e_val] + @command << "-maxaccepts #{@options[:maxaccepts]}" + @command << "-threads #{@options[:cpus]}" + + execute end # Method to execute uchime chimera detection. @@ -147,17 +183,24 @@ class Usearch record = {} File.open(@outfile, "r") do |ios| - ios.gets # skip comment line ios.each_line do |line| fields = line.chomp.split("\t") - - record[:REC_TYPE] = "USEARCH" - record[:S_ID] = fields[0] - record[:S_BEG] = fields[1].to_i - record[:S_END] = fields[2].to_i - record[:Q_ID] = fields[3] - record[:SCORE] = fields[4].to_f - record[:STRAND] = fields[5] + record[:REC_TYPE] = "USEARCH" + record[:Q_ID] = fields[0] + record[:S_ID] = fields[1] + record[:IDENT] = fields[2].to_f + record[:ALIGN_LEN] = fields[3].to_i + record[:MISMATCHES] = fields[4].to_i + record[:GAPS] = fields[5].to_i + record[:Q_BEG] = fields[6].to_i - 1 + record[:Q_END] = fields[7].to_i - 1 + record[:S_BEG] = fields[8].to_i - 1 + record[:S_END] = fields[9].to_i - 1 + record[:E_VAL] = fields[10] == '*' ? '*' : fields[10].to_f + record[:SCORE] = fields[11] == '*' ? '*' : fields[11].to_f + record[:STRAND] = record[:S_BEG].to_i < record[:S_END].to_i ? '+' : '-' + + record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG] if record[:STRAND] == '-' yield record end -- 2.39.2