require 'maasha/usearch'
casts = []
-casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil}
+casts << {long: 'no_sort', short: 'n', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'comp', short: 'c', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'identity', short: 'i', type: 'float', mandatory: true, default: 0.9, allowed: nil, disallowed: nil}
+casts << {long: 'cpus', short: 'C', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"}
options = Biopieces.options_parse(ARGV, casts)
require 'maasha/usearch'
casts = []
-casts << {:long=>'database', :short=>'d', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'e_val', :short=>'e', :type=>'float', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {long: 'program', short: 'p', type: 'string', mandatory: false, default: 'global', allowed: "local,global", disallowed: nil}
+casts << {long: 'database', short: 'd', type: 'file!', mandatory: true, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'identity', short: 'i', type: 'float', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'e_val', short: 'e', type: 'float', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'cpus', short: 'c', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"}
+casts << {long: 'maxaccepts', short: 'm', type: 'uint', mandatory: false, default: 0, allowed: nil, disallowed: nil}
options = Biopieces.options_parse(ARGV, casts)
-raise ArgumentError, "--identity or --e_val must be specified" unless options[:identity] or options[:e_val]
+if options[:program] == 'global'
+ raise ArgumentError, "--identity be specified for global search" unless options[:identity]
+ raise ArgumentError, "--e_val is invalid for global search" if options[:e_val]
+else
+ raise ArgumentError, "--identity or --e_val must be specified" unless options[:identity] or options[:e_val]
+end
+
tmpdir = Biopieces.mktmpdir
infile = File.join(tmpdir, "in.fna")
us = Usearch.new(infile, outfile, options)
- us.usearch
+ case options[:program]
+ when "local" then us.usearch_local
+ when "global" then us.usearch_global
+ else raise "no such program #{options[:program]}"
+ end
us.each_hit do |record|
output.puts record
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+---
--- /dev/null
+>test1
+tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+>test1_100
+tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+>test1_2
+tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+>test2
+ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+>test2_100
+ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+>test2_1
+ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+>test1_rc
+cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
msg="${command/$BP_DIR/\$BP_DIR}"
msg="${msg//$BP_TMP/\$BP_TMP}"
+ msg="${msg/$BP_DIR/\$BP_DIR}"
echo -n "Testing $msg ... "
eval $command > /dev/null 2>&1
SEQ_NAME: test1
SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
SEQ_LEN: 59
-CLUSTER: 3
+CLUSTER: 2
IDENT: *
---
SEQ_NAME: test1_100
SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
SEQ_LEN: 59
-CLUSTER: 3
+CLUSTER: 2
IDENT: 100
---
SEQ_NAME: test1_2
SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
SEQ_LEN: 59
-CLUSTER: 4
-IDENT: *
+CLUSTER: 2
+IDENT: 96
---
SEQ_NAME: test2
SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
SEQ_NAME: test2_1
SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
SEQ_LEN: 59
-CLUSTER: 1
-IDENT: *
+CLUSTER: 0
+IDENT: 96
---
SEQ_NAME: test1_rc
SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
SEQ_LEN: 59
-CLUSTER: 2
+CLUSTER: 1
IDENT: *
---
SEQ_NAME: test1
SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
SEQ_LEN: 59
-CLUSTER: 2
-IDENT: 100
+CLUSTER: 3
+IDENT: *
---
SEQ_NAME: test1_100
SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
SEQ_LEN: 59
-CLUSTER: 2
+CLUSTER: 3
IDENT: 100
---
SEQ_NAME: test1_2
SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
SEQ_LEN: 59
-CLUSTER: 3
+CLUSTER: 4
IDENT: *
---
SEQ_NAME: test2
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+CLUSTER: 2
+IDENT: 100
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+CLUSTER: 2
+IDENT: 100
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+CLUSTER: 3
+IDENT: *
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+CLUSTER: 0
+IDENT: 100
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+CLUSTER: 0
+IDENT: *
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+CLUSTER: 1
+IDENT: *
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+CLUSTER: 2
+IDENT: *
+---
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1_2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 36
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 7
+Q_END: 42
+S_BEG: 16
+S_END: 51
+E_VAL: 1.1e-16
+SCORE: 67.6
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 2
+Q_END: 7
+S_BEG: 51
+S_END: 56
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 36
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 7
+Q_END: 42
+S_BEG: 16
+S_END: 51
+E_VAL: 1.1e-16
+SCORE: 67.6
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 2
+Q_END: 7
+S_BEG: 51
+S_END: 56
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 2
+Q_END: 7
+S_BEG: 2
+S_END: 7
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 2
+Q_END: 7
+S_BEG: 2
+S_END: 7
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1_2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 14
+Q_END: 19
+S_BEG: 14
+S_END: 19
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 53
+Q_END: 58
+S_BEG: 20
+S_END: 25
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 20
+Q_END: 25
+S_BEG: 53
+S_END: 58
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 20
+Q_END: 25
+S_BEG: 53
+S_END: 58
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.1e-26
+SCORE: 100.8
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 14
+Q_END: 19
+S_BEG: 14
+S_END: 19
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 6
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 53
+Q_END: 58
+S_BEG: 20
+S_END: 25
+E_VAL: 5.2
+SCORE: 12.2
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1_2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-24
+SCORE: 93.5
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.1e-26
+SCORE: 100.8
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: 1.8e-29
+SCORE: 110.1
+STRAND: +
+---
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1_2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
--- /dev/null
+SEQ_NAME: test1
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_100
+SEQ: tgtacgtagctagctagctagctagctagctagctagctagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_2
+SEQ: tgtacgtagctagctagctagctagcGagctagctagcAagctgactatcgtgatcgtg
+SEQ_LEN: 59
+---
+SEQ_NAME: test2
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_100
+SEQ: ggttgtgtgtgtgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test2_1
+SEQ: ggtAgtgtgtgAgtatcgatgtagtctacatcgtctatctgtactgacttactgactac
+SEQ_LEN: 59
+---
+SEQ_NAME: test1_rc
+SEQ: cacgatcacgatagtcagctagctagctagctagctagctagctagctagctacgtaca
+SEQ_LEN: 59
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_100
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_2
+S_ID: test1_2
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_100
+S_ID: test2_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test2_1
+S_ID: test2_1
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_100
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: -
+---
+REC_TYPE: USEARCH
+Q_ID: test1_rc
+S_ID: test1_rc
+IDENT: 100.0
+ALIGN_LEN: 59
+MISMATCHES: 0
+GAPS: 0
+Q_BEG: 0
+Q_END: 58
+S_BEG: 0
+S_END: 58
+E_VAL: *
+SCORE: *
+STRAND: +
+---
assert_no_diff $tmp $out.1
clean
-run "$bp -I $in -i 1 -O $tmp"
+run "$bp -I $in -C 2 -O $tmp"
assert_no_diff $tmp $out.2
clean
-run "$bp -I $in -i 1 -c -O $tmp"
+run "$bp -I $in -i 1 -O $tmp"
assert_no_diff $tmp $out.3
clean
+
+run "$bp -I $in -i 1 -c -O $tmp"
+assert_no_diff $tmp $out.4
+clean
--- /dev/null
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -I $in -d $in.db -i 1 -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -I $in -d $in.db -i 1 -p local -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in -d $in.db -i 0.99 -e 0.1 -p local -O $tmp"
+assert_no_diff $tmp $out.3
+clean
+
+run "$bp -I $in -d $in.db -i 0.99 -O $tmp"
+assert_no_diff $tmp $out.4
+clean
+
+run "$bp -I $in -d $in.db -i 1 -m 1 -O $tmp"
+assert_no_diff $tmp $out.5
+clean
# according to decending sequence length.
def sortbylength
# usearch -sortbylength seqs.fasta -output seqs_sorted.fasta -minseqlength 64
- @command << "usearch -sortbylength #{@infile} -output #{@infile}.sort"
+ @command << "usearch"
+ @command << "-sortbylength #{@infile}"
+ @command << "-output #{@infile}.sort"
execute
# according to cluster size.
def sortbysize
# usearch -sortbysize seqs.fasta -output seqs_sorted.fasta -minsize 4
- @command << "usearch -sortbysize #{@infile} -output #{@infile}.sort"
+ @command << "usearch"
+ @command << "-sortbysize #{@infile}"
+ @command << "-output #{@infile}.sort"
execute
# Method to execute cluster_fast.
def cluster_fast
# usearch -cluster_fast query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc
- @command << "usearch -cluster_fast #{@infile} -id #{@options[:identity]} -uc #{@outfile}"
+ @command << "usearch"
+ @command << "-cluster_fast #{@infile}"
+ @command << "-id #{@options[:identity]}"
+ @command << "-uc #{@outfile}"
+ @command << "-threads #{@options[:cpus]}"
execute
end
# NB sequences must be sorted with sortbylength or sortbysize.
def cluster_smallmem
# usearch -cluster_smallmem query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc
- @command << "usearch -cluster_smallmem #{@infile} -id #{@options[:identity]} -uc #{@outfile}"
+ @command << "usearch"
+ @command << "-cluster_smallmem #{@infile}"
+ @command << "-id #{@options[:identity]}"
+ @command << "-uc #{@outfile}"
@command << "-strand both" if @options[:comp]
execute
end
- # Method to execute database search.
- def usearch
- @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}"
- @command << "--userfields target+tloz+thiz+query+bits+strand"
- @command << "--id #{@options[:identity]}" if @options[:identity]
- @command << "--evalue #{@options[:e_val]}" if @options[:e_val]
- @command << "--rev"
+ # Method to execute database local search.
+ def usearch_local
+ # usearch -usearch_local query.fasta -db proteins.udb -id 0.8 -alnout results.aln
+ # usearch -usearch_local query.fasta -db ESTs.fasta -id 0.9 -evalue 1e-6 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256
+
+ @command << "usearch"
+ @command << "-usearch_local #{@infile}"
+ @command << "-db #{@options[:database]}"
+ @command << "-blast6out #{@outfile}"
+ @command << "-strand both"
+ @command << "-id #{@options[:identity]}" if @options[:identity]
+ @command << "-evalue #{@options[:e_val]}" if @options[:e_val]
+ @command << "-maxaccepts #{@options[:maxaccepts]}"
+ @command << "-threads #{@options[:cpus]}"
- execute
+ execute
+ end
+
+ # Method to execute database global search.
+ def usearch_global
+ # usearch -usearch_global query.fasta -db proteins.udb -id 0.8 -alnout results.aln
+ # usearch -usearch_global query.fasta -db ESTs.fasta -id 0.9 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256
+
+ @command << "usearch"
+ @command << "-usearch_global #{@infile}"
+ @command << "-db #{@options[:database]}"
+ @command << "-blast6out #{@outfile}"
+ @command << "-strand both"
+ @command << "-id #{@options[:identity]}"
+ @command << "-evalue #{@options[:e_val]}" if @options[:e_val]
+ @command << "-maxaccepts #{@options[:maxaccepts]}"
+ @command << "-threads #{@options[:cpus]}"
+
+ execute
end
# Method to execute uchime chimera detection.
record = {}
File.open(@outfile, "r") do |ios|
- ios.gets # skip comment line
ios.each_line do |line|
fields = line.chomp.split("\t")
-
- record[:REC_TYPE] = "USEARCH"
- record[:S_ID] = fields[0]
- record[:S_BEG] = fields[1].to_i
- record[:S_END] = fields[2].to_i
- record[:Q_ID] = fields[3]
- record[:SCORE] = fields[4].to_f
- record[:STRAND] = fields[5]
+ record[:REC_TYPE] = "USEARCH"
+ record[:Q_ID] = fields[0]
+ record[:S_ID] = fields[1]
+ record[:IDENT] = fields[2].to_f
+ record[:ALIGN_LEN] = fields[3].to_i
+ record[:MISMATCHES] = fields[4].to_i
+ record[:GAPS] = fields[5].to_i
+ record[:Q_BEG] = fields[6].to_i - 1
+ record[:Q_END] = fields[7].to_i - 1
+ record[:S_BEG] = fields[8].to_i - 1
+ record[:S_END] = fields[9].to_i - 1
+ record[:E_VAL] = fields[10] == '*' ? '*' : fields[10].to_f
+ record[:SCORE] = fields[11] == '*' ? '*' : fields[11].to_f
+ record[:STRAND] = record[:S_BEG].to_i < record[:S_END].to_i ? '+' : '-'
+
+ record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG] if record[:STRAND] == '-'
yield record
end