X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_ruby%2Flib%2Fmaasha%2Fusearch.rb;h=63757911deebea8e62de71ea688e29311d5427b8;hb=bb827f984d41390cbcf7a478ae1c6831dbf1dd99;hp=0bc72352d648c92d958a82aed390b3118518d4d2;hpb=2949257922cc90a28af23386a72dbad8157e4fa8;p=biopieces.git diff --git a/code_ruby/lib/maasha/usearch.rb b/code_ruby/lib/maasha/usearch.rb index 0bc7235..6375791 100644 --- a/code_ruby/lib/maasha/usearch.rb +++ b/code_ruby/lib/maasha/usearch.rb @@ -43,7 +43,9 @@ class Usearch # according to decending sequence length. def sortbylength # usearch -sortbylength seqs.fasta -output seqs_sorted.fasta -minseqlength 64 - @command << "usearch -sortbylength #{@infile} -output #{@infile}.sort" + @command << "usearch" + @command << "-sortbylength #{@infile}" + @command << "-output #{@infile}.sort" execute @@ -54,7 +56,9 @@ class Usearch # according to cluster size. def sortbysize # usearch -sortbysize seqs.fasta -output seqs_sorted.fasta -minsize 4 - @command << "usearch -sortbysize #{@infile} -output #{@infile}.sort" + @command << "usearch" + @command << "-sortbysize #{@infile}" + @command << "-output #{@infile}.sort" execute @@ -64,7 +68,11 @@ class Usearch # Method to execute cluster_fast. def cluster_fast # usearch -cluster_fast query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc - @command << "usearch -cluster_fast #{@infile} -id #{@options[:identity]} -uc #{@outfile}" + @command << "usearch" + @command << "-cluster_fast #{@infile}" + @command << "-id #{@options[:identity]}" + @command << "-uc #{@outfile}" + @command << "-threads #{@options[:cpus]}" execute end @@ -73,21 +81,49 @@ class Usearch # NB sequences must be sorted with sortbylength or sortbysize. def cluster_smallmem # usearch -cluster_smallmem query.fasta -id 0.9 -centroids nr.fasta -uc clusters.uc - @command << "usearch -cluster_smallmem #{@infile} -id #{@options[:identity]} -uc #{@outfile}" + @command << "usearch" + @command << "-cluster_smallmem #{@infile}" + @command << "-id #{@options[:identity]}" + @command << "-uc #{@outfile}" @command << "-strand both" if @options[:comp] execute end - # Method to execute database search. - def usearch - @command << "usearch --query #{@infile} --db #{@options[:database]} --userout #{@outfile}" - @command << "--userfields target+tloz+thiz+query+bits+strand" - @command << "--id #{@options[:identity]}" if @options[:identity] - @command << "--evalue #{@options[:e_val]}" if @options[:e_val] - @command << "--rev" + # Method to execute database local search. + def usearch_local + # usearch -usearch_local query.fasta -db proteins.udb -id 0.8 -alnout results.aln + # usearch -usearch_local query.fasta -db ESTs.fasta -id 0.9 -evalue 1e-6 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256 + + @command << "usearch" + @command << "-usearch_local #{@infile}" + @command << "-db #{@options[:database]}" + @command << "-blast6out #{@outfile}" + @command << "-strand both" + @command << "-id #{@options[:identity]}" if @options[:identity] + @command << "-evalue #{@options[:e_val]}" if @options[:e_val] + @command << "-maxaccepts #{@options[:maxaccepts]}" + @command << "-threads #{@options[:cpus]}" - execute + execute + end + + # Method to execute database global search. + def usearch_global + # usearch -usearch_global query.fasta -db proteins.udb -id 0.8 -alnout results.aln + # usearch -usearch_global query.fasta -db ESTs.fasta -id 0.9 -blast6out results.m8 -strand plus -maxaccepts 8 -maxrejects 256 + + @command << "usearch" + @command << "-usearch_global #{@infile}" + @command << "-db #{@options[:database]}" + @command << "-blast6out #{@outfile}" + @command << "-strand both" + @command << "-id #{@options[:identity]}" + @command << "-evalue #{@options[:e_val]}" if @options[:e_val] + @command << "-maxaccepts #{@options[:maxaccepts]}" + @command << "-threads #{@options[:cpus]}" + + execute end # Method to execute uchime chimera detection. @@ -147,17 +183,24 @@ class Usearch record = {} File.open(@outfile, "r") do |ios| - ios.gets # skip comment line ios.each_line do |line| fields = line.chomp.split("\t") - - record[:REC_TYPE] = "USEARCH" - record[:S_ID] = fields[0] - record[:S_BEG] = fields[1].to_i - record[:S_END] = fields[2].to_i - record[:Q_ID] = fields[3] - record[:SCORE] = fields[4].to_f - record[:STRAND] = fields[5] + record[:REC_TYPE] = "USEARCH" + record[:Q_ID] = fields[0] + record[:S_ID] = fields[1] + record[:IDENT] = fields[2].to_f + record[:ALIGN_LEN] = fields[3].to_i + record[:MISMATCHES] = fields[4].to_i + record[:GAPS] = fields[5].to_i + record[:Q_BEG] = fields[6].to_i - 1 + record[:Q_END] = fields[7].to_i - 1 + record[:S_BEG] = fields[8].to_i - 1 + record[:S_END] = fields[9].to_i - 1 + record[:E_VAL] = fields[10] == '*' ? '*' : fields[10].to_f + record[:SCORE] = fields[11] == '*' ? '*' : fields[11].to_f + record[:STRAND] = record[:S_BEG].to_i < record[:S_END].to_i ? '+' : '-' + + record[:S_BEG], record[:S_END] = record[:S_END], record[:S_BEG] if record[:STRAND] == '-' yield record end