From 2dddbfc4c5563421b83cce0e81f2e559ed982bab Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 11 Apr 2012 08:15:25 +0000 Subject: [PATCH] ported write_fastq to ruby and allowed different encoding support git-svn-id: http://biopieces.googlecode.com/svn/trunk@1792 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/write_fastq | 81 +++++++-------- bp_test/out/write_fastq.out.2 | 12 +++ bp_test/test/test_write_fastq | 8 +- code_ruby/test/maasha/test_locator.rb | 8 +- code_ruby/test/maasha/test_seq.rb | 136 +++++++++++++++++++++++++- 5 files changed, 192 insertions(+), 53 deletions(-) create mode 100644 bp_test/out/write_fastq.out.2 diff --git a/bp_bin/write_fastq b/bp_bin/write_fastq index 5f81de2..f6050f3 100755 --- a/bp_bin/write_fastq +++ b/bp_bin/write_fastq @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,66 +18,55 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Write sequences from stream in FASTQ format. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Maasha::Fastq; -use Maasha::Biopieces; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Write sequences from stream in FASTQ format. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/biopieces' +require 'maasha/fastq' -my ( $options, $in, $out, $record, $data_out, $entry ); +allowed_enc = 'sanger,solexa,illumina1.3,illumina1.5,illumina1.8' -$options = Maasha::Biopieces::parse_options( - [ - { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); +casts = [] +casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'encoding', :short=>'e', :type=>'string', :mandatory=>false, :default=>'illumina1.3', :allowed=>allowed_enc, :disallowed=>nil} +casts << {:long=>'compress', :short=>'Z', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); +options = Biopieces.options_parse(ARGV, casts) -$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); +encoding = options[:encoding].downcase.delete('.') -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - if ( $entry = Maasha::Fastq::biopiece2fastq( $record ) ) { - Maasha::Fastq::put_entry( $entry, $data_out ); - } - - Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; -} - -close $data_out; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + if options[:data_out] + if options[:compress] + io_out = Zlib::GzipWriter.open(options[:data_out]) + else + io_out = Fastq.open(options[:data_out], 'w') + end + else + io_out = $stdout + end + input.each do |record| + if record[:SEQ_NAME] and record[:SEQ] and record[:SCORES] + entry = Seq.new_bp(record) + entry.convert_scores!('illumina13', encoding) -BEGIN -{ - Maasha::Biopieces::status_set(); -} + io_out.puts entry.to_fastq + end + output.puts record unless options.has_key? :no_stream + end -END -{ - Maasha::Biopieces::status_log(); -} + io_out.close +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_test/out/write_fastq.out.2 b/bp_test/out/write_fastq.out.2 new file mode 100644 index 0000000..3772a06 --- /dev/null +++ b/bp_test/out/write_fastq.out.2 @@ -0,0 +1,12 @@ +@sanger +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@solexa +bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb ++ + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI +@illumina1.3 +ccccccccccccccccccccccccccccccccccccccccc ++ +!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI diff --git a/bp_test/test/test_write_fastq b/bp_test/test/test_write_fastq index db9bbc2..3940140 100755 --- a/bp_test/test/test_write_fastq +++ b/bp_test/test/test_write_fastq @@ -6,7 +6,11 @@ run "$bp -I $in -o $tmp -x" assert_no_diff $tmp $out.1 clean -run "$bp -I $in -Z -o $tmp.gz -x" +run "$bp -I $in -e sanger -o $tmp -x" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in -Z -e sanger -o $tmp.gz -x" gunzip $tmp.gz -assert_no_diff $tmp $out.1 +assert_no_diff $tmp $out.2 clean diff --git a/code_ruby/test/maasha/test_locator.rb b/code_ruby/test/maasha/test_locator.rb index 2340578..45047f2 100755 --- a/code_ruby/test/maasha/test_locator.rb +++ b/code_ruby/test/maasha/test_locator.rb @@ -42,22 +42,22 @@ class TestLocator < Test::Unit::TestCase def test_Locator_with_single_interval_returns_correctly loc = Locator.new("5..10", @seq) - assert_equal("gatca", loc.subseq.seq) + assert_equal("gatcaa", loc.subseq.seq) end def test_Locator_with_multiple_intervals_return_correctly loc = Locator.new("5..10,15..20", @seq) - assert_equal("gatcataaca", loc.subseq.seq) + assert_equal("gatcaataacag", loc.subseq.seq) end def test_Locator_with_join_multiple_intervals_return_correctly loc = Locator.new("join(5..10,15..20)", @seq) - assert_equal("gatcataaca", loc.subseq.seq) + assert_equal("gatcaataacag", loc.subseq.seq) end def test_Locator_with_complement_and_single_interval_return_correctly loc = Locator.new("complement(5..10)", @seq) - assert_equal("tgatc", loc.subseq.seq) + assert_equal("ttgatc", loc.subseq.seq) end end diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index 9a8deef..a90c565 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -577,7 +577,141 @@ class TestSeq < Test::Unit::TestCase assert_equal("-atCG", @entry.mask_seq_soft!(20).seq) end -end + # convert sanger to ... + + def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'sanger').qual) + end + + def test_Seq_convert_scores_bang_from_sanger_to_solexa_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'solexa').qual) + end + + def test_Seq_convert_scores_bang_from_sanger_to_illumina13_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina13').qual) + end + + def test_Seq_convert_scores_bang_from_sanger_to_illumina15_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('abcdefgh', @entry.convert_scores!('sanger', 'illumina15').qual) + end + + def test_Seq_convert_scores_bang_from_sanger_to_illumina18_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('sanger', 'illumina18').qual) + end + + # convert solexa to ... + + def test_Seq_convert_scores_bang_from_solexa_to_sanger_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'sanger').qual) + end + + def test_Seq_convert_scores_bang_from_solexa_to_solexa_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'solexa').qual) + end + + def test_Seq_convert_scores_bang_from_solexa_to_illumina13_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina13').qual) + end + + def test_Seq_convert_scores_bang_from_solexa_to_illumina15_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('solexa', 'illumina15').qual) + end + + def test_Seq_convert_scores_bang_from_solexa_to_illumina18_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal(%q[#$%&'()*], @entry.convert_scores!('solexa', 'illumina18').qual) + end + + # convert illumina13 to ... + + def test_Seq_convert_scores_bang_from_illumina13_to_sanger_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'sanger').qual) + end + + def test_Seq_convert_scores_bang_from_illumina13_to_solexa_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'solexa').qual) + end + + def test_Seq_convert_scores_bang_from_illumina13_to_illumina13_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina13').qual) + end + + def test_Seq_convert_scores_bang_from_illumina13_to_illumina15_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina13', 'illumina15').qual) + end + + def test_Seq_convert_scores_bang_from_illumina13_to_illumina18_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina13', 'illumina18').qual) + end + + # convert illumina15 to ... + + def test_Seq_convert_scores_bang_from_illumina15_to_sanger_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'sanger').qual) + end + + def test_Seq_convert_scores_bang_from_illumina15_to_solexa_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'solexa').qual) + end + + def test_Seq_convert_scores_bang_from_illumina15_to_illumina13_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina13').qual) + end + + def test_Seq_convert_scores_bang_from_illumina15_to_illumina15_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina15', 'illumina15').qual) + end + + def test_Seq_convert_scores_bang_from_illumina15_to_illumina18_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal(%q[#$%&'()*], @entry.convert_scores!('illumina15', 'illumina18').qual) + end + + # convert illumina18 to ... + + def test_Seq_convert_scores_bang_from_illumina18_to_sanger_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'sanger').qual) + end + + def test_Seq_convert_scores_bang_from_illumina18_to_solexa_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'solexa').qual) + end + + def test_Seq_convert_scores_bang_from_illumina18_to_illumina13_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina13').qual) + end + + def test_Seq_convert_scores_bang_from_illumina18_to_illumina15_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('abcdefgh', @entry.convert_scores!('illumina18', 'illumina15').qual) + end + + def test_Seq_convert_scores_bang_from_illumina18_to_illumina18_returns_OK + @entry.qual = 'BCDEFGHI' + assert_equal('BCDEFGHI', @entry.convert_scores!('illumina18', 'illumina18').qual) + end +end __END__ -- 2.39.5