X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fdenoise_seq;h=baa62ad7eb6102e724f7ad7b02d842ceb9d2d2d3;hb=e605ac14c9e2a30aa316db707674ac1d110724ab;hp=ff557b431a6d6d57d152c5e7f69dd8202fbd60a6;hpb=844087144441278cb50b8d88246417648c02e239;p=biopieces.git diff --git a/bp_bin/denoise_seq b/bp_bin/denoise_seq index ff557b4..baa62ad 100755 --- a/bp_bin/denoise_seq +++ b/bp_bin/denoise_seq @@ -37,8 +37,13 @@ require 'maasha/align' require 'maasha/usearch' casts = [] -casts << {:long=>'identity', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.97, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'cluster_min', :short=>'c', :type=>'uint', :mandatory=>true, :default=>2, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'cluster_ident', :short=>'i', :type=>'float', :mandatory=>true, :default=>0.97, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'cluster_min', :short=>'c', :type=>'uint', :mandatory=>true, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'sequence_min', :short=>'s', :type=>'uint', :mandatory=>true, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'residue_min', :short=>'r', :type=>'float', :mandatory=>true, :default=>0.3, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'gap_max', :short=>'g', :type=>'float', :mandatory=>true, :default=>0.4, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'quality_min', :short=>'q', :type=>'uint', :mandatory=>true, :default=>10, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'quality_mean', :short=>'Q', :type=>'uint', :mandatory=>true, :default=>15, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) tmpdir = Biopieces.mktmpdir @@ -46,6 +51,8 @@ fastq_file = File.join(tmpdir, "test.fq") fasta_file = File.join(tmpdir, "test.fna") fasta_file_align = File.join(tmpdir, "test.aln.fna") +options[:identity] = options[:cluster_ident] + def alignment_to_fastq(entries, index) entries.each do |entry| cluster, ident, name = entry.seq_name.split('|') @@ -65,7 +72,7 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| Fasta.open(fasta_file, "w") do |fasta_io| Fastq.open(fastq_file, "w") do |fastq_io| input.each_record do |record| - if record.has_key? :SEQ and record.has_key? :SCORES + if record[:SEQ] and record[:SCORES] entry = Seq.new_bp(record) entry.seq_name = seq_count.to_s @@ -95,6 +102,8 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| uc.each_alignment do |align| if align.members >= options[:cluster_min] + align.options = options + alignment_to_fastq(align.entries, index) cons = align.consensus