require 'maasha/usearch'
casts = []
-casts << {:long=>'no_sort', :short=>'n', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'comp', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.9, :allowed=>nil, :disallowed=>nil}
+casts << {long: 'no_sort', short: 'n', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'comp', short: 'c', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil}
+casts << {long: 'identity', short: 'i', type: 'float', mandatory: true, default: 0.9, allowed: nil, disallowed: nil}
+casts << {long: 'cpus', short: 'C', type: 'uint', mandatory: false, default: 1, allowed: nil, disallowed: "0"}
options = Biopieces.options_parse(ARGV, casts)
file_uclust = File.join(tmpdir, "out.uc")
Biopieces.open(options[:stream_in], file_records) do |input, output|
- Fasta.open(file_fasta, mode="w") do |fasta_io|
+ Fasta.open(file_fasta, 'w') do |fasta_io|
input.each_record do |record|
output.puts record
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ if record[:SEQ_NAME] and record[:SEQ]
fasta_io.puts Seq.new_bp(record).to_fasta
end
end
end
us = Usearch.new(file_fasta, file_uclust, options)
-us.sort_length unless options[:no_sort]
-us.cluster
+
+us.sortbylength unless options[:no_sort]
+us.cluster_smallmem
hash = {}
-us.each_cluster do |record|
- hash[record[:Q_ID].to_sym] = record.dup
+us.each_cluster do |cluster|
+ hash[cluster[:Q_ID].to_sym] = cluster.dup
end
Biopieces.open(file_records, options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
- if hash.has_key? record[:SEQ_NAME].to_sym
+ if record[:SEQ_NAME] and record[:SEQ]
+ if hash[record[:SEQ_NAME].to_sym]
us = hash[record[:SEQ_NAME].to_sym]
record[:CLUSTER] = us[:CLUSTER].to_i
record[:IDENT] = us[:IDENT].to_i