]> git.donarmstrong.com Git - biopieces.git/commitdiff
ported write_fastq to ruby and allowed different encoding support
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 11 Apr 2012 08:15:25 +0000 (08:15 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 11 Apr 2012 08:15:25 +0000 (08:15 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1792 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/write_fastq
bp_test/out/write_fastq.out.2 [new file with mode: 0644]
bp_test/test/test_write_fastq
code_ruby/test/maasha/test_locator.rb
code_ruby/test/maasha/test_seq.rb

index 5f81de231d3a80ff203e905facbaf9286303218c..f6050f38555071932cab4ad91eaba821c8d84a83 100755 (executable)
@@ -1,6 +1,6 @@
-#!/usr/bin/env perl
+#!/usr/bin/env ruby
 
-# Copyright (C) 2007-2009 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
 
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 
 # http://www.gnu.org/copyleft/gpl.html
 
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# Write sequences from stream in FASTQ format.
-
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
+# This program is part of the Biopieces framework (www.biopieces.org).
 
-use warnings;
-use strict;
-use Maasha::Fastq;
-use Maasha::Biopieces;
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
+# Write sequences from stream in FASTQ format.
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
+require 'maasha/biopieces'
+require 'maasha/fastq'
 
-my ( $options, $in, $out, $record, $data_out, $entry );
+allowed_enc = 'sanger,solexa,illumina1.3,illumina1.5,illumina1.8'
 
-$options = Maasha::Biopieces::parse_options(
-    [
-        { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
-        { long => 'data_out',  short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
-        { long => 'compress',  short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
-    ]   
-);
+casts = []
+casts << {:long=>'no_stream', :short=>'x', :type=>'flag',   :mandatory=>false, :default=>nil,           :allowed=>nil,         :disallowed=>nil}
+casts << {:long=>'data_out',  :short=>'o', :type=>'file',   :mandatory=>false, :default=>nil,           :allowed=>nil,         :disallowed=>nil}
+casts << {:long=>'encoding',  :short=>'e', :type=>'string', :mandatory=>false, :default=>'illumina1.3', :allowed=>allowed_enc, :disallowed=>nil}
+casts << {:long=>'compress',  :short=>'Z', :type=>'flag',   :mandatory=>false, :default=>nil,           :allowed=>nil,         :disallowed=>nil}
 
-$in  = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
-$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+options = Biopieces.options_parse(ARGV, casts)
 
-$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+encoding = options[:encoding].downcase.delete('.')
 
-while ( $record = Maasha::Biopieces::get_record( $in ) ) 
-{
-    if ( $entry = Maasha::Fastq::biopiece2fastq( $record ) ) {
-        Maasha::Fastq::put_entry( $entry, $data_out );
-    }
-
-    Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
-}
-
-close $data_out;
-
-Maasha::Biopieces::close_stream( $in );
-Maasha::Biopieces::close_stream( $out );
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  if options[:data_out]
+    if options[:compress]
+      io_out = Zlib::GzipWriter.open(options[:data_out])
+    else
+      io_out = Fastq.open(options[:data_out], 'w')
+    end
+  else
+    io_out = $stdout
+  end
 
+  input.each do |record|
+    if record[:SEQ_NAME] and record[:SEQ] and record[:SCORES]
+      entry = Seq.new_bp(record)
+      entry.convert_scores!('illumina13', encoding)
 
-BEGIN
-{
-    Maasha::Biopieces::status_set();
-}
+      io_out.puts entry.to_fastq
+    end
 
+    output.puts record unless options.has_key? :no_stream
+  end
 
-END
-{
-    Maasha::Biopieces::status_log();
-}
+  io_out.close
+end
 
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
diff --git a/bp_test/out/write_fastq.out.2 b/bp_test/out/write_fastq.out.2
new file mode 100644 (file)
index 0000000..3772a06
--- /dev/null
@@ -0,0 +1,12 @@
+@sanger
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
++
+!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
+@solexa
+bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
++
+\1c\1d\1e\1f !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
+@illumina1.3
+ccccccccccccccccccccccccccccccccccccccccc
++
+!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI
index db9bbc23a8eb7df911a59adea0b4458b21cc19c6..39401409c848a580b12a0a0942eb70b3a2c0066e 100755 (executable)
@@ -6,7 +6,11 @@ run "$bp -I $in -o $tmp -x"
 assert_no_diff $tmp $out.1
 clean
 
-run "$bp -I $in -Z -o $tmp.gz -x"
+run "$bp -I $in -e sanger -o $tmp -x"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in -Z -e sanger -o $tmp.gz -x"
 gunzip $tmp.gz
-assert_no_diff $tmp $out.1
+assert_no_diff $tmp $out.2
 clean
index 2340578472e65999f4606297dd6d2e17a63f6bd7..45047f2770d17a3d2ed10a19833d1a6b4838b800 100755 (executable)
@@ -42,22 +42,22 @@ class TestLocator < Test::Unit::TestCase
 
   def test_Locator_with_single_interval_returns_correctly
     loc = Locator.new("5..10", @seq)
-    assert_equal("gatca", loc.subseq.seq)
+    assert_equal("gatcaa", loc.subseq.seq)
   end
 
   def test_Locator_with_multiple_intervals_return_correctly
     loc = Locator.new("5..10,15..20", @seq)
-    assert_equal("gatcataaca", loc.subseq.seq)
+    assert_equal("gatcaataacag", loc.subseq.seq)
   end
 
   def test_Locator_with_join_multiple_intervals_return_correctly
     loc = Locator.new("join(5..10,15..20)", @seq)
-    assert_equal("gatcataaca", loc.subseq.seq)
+    assert_equal("gatcaataacag", loc.subseq.seq)
   end
 
   def test_Locator_with_complement_and_single_interval_return_correctly
     loc = Locator.new("complement(5..10)", @seq)
-    assert_equal("tgatc", loc.subseq.seq)
+    assert_equal("ttgatc", loc.subseq.seq)
   end
 end
 
index 9a8deef34c8a443cad8c9f5817e744cda8cd0d05..a90c565072503641160ea2c7c2e24fded0e0563b 100755 (executable)
@@ -577,7 +577,141 @@ class TestSeq < Test::Unit::TestCase
 
     assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
   end
-end
 
+  # convert sanger to ...
+
+  def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_sanger_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'illumina18').qual)
+  end
+
+  # convert solexa to ...
+
+  def test_Seq_convert_scores_bang_from_solexa_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_solexa_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'illumina18').qual)
+  end
+
+  # convert illumina13 to ...
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina13_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'illumina18').qual)
+  end
+
+  # convert illumina15 to ...
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina15_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'illumina18').qual)
+  end
+
+  # convert illumina18 to ...
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_sanger_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'sanger').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_solexa_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'solexa').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_illumina13_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina13').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_illumina15_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina15').qual)
+  end
+
+  def test_Seq_convert_scores_bang_from_illumina18_to_illumina18_returns_OK
+    @entry.qual = 'BCDEFGHI'
+    assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'illumina18').qual)
+  end
+end
 
 __END__