]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/find_adaptor
fixed raw intput bug in read_fasta and read_fastq
[biopieces.git] / bp_bin / find_adaptor
index 78e633a34ca0365694022bdf310ed1ef354871f3..290af5a19dfa5da0614f0ea911ec1c6c636bf63c 100755 (executable)
@@ -37,67 +37,36 @@ require 'maasha/fasta'
 class PatScanError < StandardError; end;
 
 class PatScan
-  def initialize(options, tmpdir, file_pattern, file_patscan, cpus)
+  def initialize(options, tmpdir, file_pattern, cpus)
     @options      = options
+               @tmpdir       = tmpdir
     @file_pattern = file_pattern
-    @file_patscan = file_patscan
     @cpus         = cpus
-    @files_fasta  = Dir.glob(File.join(tmpdir, "*.fna"))
+    @files_fasta  = Dir.glob(File.join(@tmpdir, "*.fna"))
 
     pat = Pattern.new(@options)
     pat.write(@file_pattern)
   end
 
-#  def run
-#    child_count = 0
-#
-#    @files_fasta.each do |file|
-#      if fork
-#        Process.wait if ( child_count += 1 ) >= @cpus
-#      else
-#        command = command_compile(file)
-#        system(command)
-#        raise PatScanError, "Command failed: #{command}" unless $?.success?
-#        exit
-#      end
-#    end
-#  end
-
   def run
     child_count = 0
+    ch_mutex = Mutex.new
+    threads = []
 
     @files_fasta.each do |file|
       Thread.pass while child_count >= @cpus
-      child_count += 1
+      ch_mutex.synchronize { child_count += 1 }
 
-      Thread.new do
+      threads << Thread.new do
         command = command_compile(file)
         system(command)
         raise PatScanError, "Command failed: #{command}" unless $?.success?
-        child_count -= 1
+        ch_mutex.synchronize { child_count -= 1 }
       end
     end
-  end
 
-#  def run
-#    child_count = 0
-#    ch_mutex = Mutex.new
-#    threads = []
-#
-#    @files_fasta.each do |file|
-#      Thread.pass while child_count >= @cpus
-#      ch_mutex.synchronize { child_count += 1 }
-#
-#      threads << Thread.new do
-#        command = command_compile(file)
-#        system(command)
-#        raise PatScanError, "Command failed: #{command}" unless $?.success?
-#        ch_mutex.synchronize { child_count -= 1 }
-#      end
-#    end
-#
-#    threads.each { |t| t.join }
-#  end
+    threads.each { |t| t.join }
+  end
 
   def command_compile(file)
     commands = []
@@ -110,17 +79,21 @@ class PatScan
   end
 
   def parse_results
+    files_result = Dir.glob(File.join(@tmpdir, "*.out"))
+
     matches = {}
 
-    Fasta.open(@file_patscan, mode='r') do |ios|
-      ios.each do |entry|
-        if entry.seq_name =~ /^(\d+):\[(\d+),(\d+)\]$/
-          name  = $1.to_i
-          start = $2.to_i - 1
-          stop  = $3.to_i - 1
-          matches[name] = [start, stop - start + 1] unless matches.has_key? name
-        else
-          raise "Failed to parse sequence name: #{entry.seq_name}"
+    files_result.each do |file|
+      Fasta.open(file, 'r') do |ios|
+        ios.each do |entry|
+          if entry.seq_name =~ /^(\d+):\[(\d+),(\d+)\]$/
+            name  = $1.to_i
+            start = $2.to_i - 1
+            stop  = $3.to_i - 1
+            matches[name] = [start, stop - start + 1] unless matches.has_key? name
+          else
+            raise "Failed to parse sequence name: #{entry.seq_name}"
+          end
         end
       end
     end
@@ -154,7 +127,7 @@ class Pattern
   end
 
   def write(file)
-    File.open(file, mode='w') do |ios|
+    File.open(file, 'w') do |ios|
       ios.puts self.to_i
     end
   end
@@ -213,11 +186,9 @@ BASE_PER_FILE = 10_000_000
 
 options = Biopieces.options_parse(ARGV, casts)
 
-#tmpdir       = Biopieces.mktmpdir
-tmpdir       = "Tyt" # DEBUG TODO
+tmpdir       = Biopieces.mktmpdir
 file_records = File.join(tmpdir, "data.stream")
 file_pattern = File.join(tmpdir, "pattern.txt")
-file_patscan = File.join(tmpdir, "patscan.out")
 
 number_file = 0
 number_seq  = 0
@@ -225,14 +196,17 @@ bases       = 0
 
 Biopieces.open(options[:stream_in], file_records) do |input, output|
   file_fasta = File.join(tmpdir, "#{number_file}.fna")
-  out_fa     = Fasta.open(file_fasta, mode='w')
+  out_fa     = Fasta.open(file_fasta, 'w')
 
   input.each do |record|
     output.puts record
 
     if record.has_key? :SEQ
       record[:SEQ_NAME] = number_seq
-      out_fa.puts record
+
+      seq = Seq.new_bp(record)
+
+      out_fa.puts seq.to_fasta
 
       number_seq += 1;
       bases      += record[:SEQ].length
@@ -242,7 +216,7 @@ Biopieces.open(options[:stream_in], file_records) do |input, output|
         bases = 0
         number_file += 1
         file_fasta = File.join(tmpdir, "#{number_file}.fna")
-        out_fa     = Fasta.open(file_fasta, mode='w')
+        out_fa     = Fasta.open(file_fasta, 'w')
       end
     end
   end
@@ -250,9 +224,8 @@ Biopieces.open(options[:stream_in], file_records) do |input, output|
   out_fa.close if out_fa.respond_to? :close
 end
 
-patscan = PatScan.new(options, tmpdir, file_pattern, file_patscan, options[:cpus])
+patscan = PatScan.new(options, tmpdir, file_pattern, options[:cpus])
 patscan.run
-exit
 matches = patscan.parse_results
 
 number_seq = 0