From 5a4454dbd14c55fca0c2c350975972b14949ab5b Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 25 Sep 2013 14:34:00 +0000 Subject: [PATCH] ported write_fasta to ruby enabling bzip2 support git-svn-id: http://biopieces.googlecode.com/svn/trunk@2210 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/write_fasta | 73 +++++++++++++---------------------- bp_test/test/test_write_fasta | 7 +++- 2 files changed, 32 insertions(+), 48 deletions(-) diff --git a/bp_bin/write_fasta b/bp_bin/write_fasta index 52f8a70..f44d6f4 100755 --- a/bp_bin/write_fasta +++ b/bp_bin/write_fasta @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,67 +18,46 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Write sequences from stream in FASTA format. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Maasha::Fasta; -use Maasha::Biopieces; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Write sequences from stream in FASTA format. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/biopieces' +require 'maasha/fasta' -my ( $options, $in, $out, $record, $data_out, $entry ); +casts = [] +casts << {long: 'no_stream', short: 'x', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'data_out', short: 'o', type: 'file', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'wrap', short: 'w', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} +casts << {long: 'compress', short: 'Z', type: 'string', mandatory: false, default: nil, allowed: "gzip,bzip,bzip2", disallowed: nil} -$options = Maasha::Biopieces::parse_options( - [ - { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'wrap', short => 'w', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, - { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); +options = Biopieces.options_parse(ARGV, casts) -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); +compress = options[:compress] ? options[:compress].to_sym : nil -$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); +raise "--data_out is mandatory for compressed output" if compress and not options[:data_out] -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) { - Maasha::Fasta::put_entry( $entry, $data_out, $options->{ "wrap" } ); - } - - Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; -} - -close $data_out; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + fasta_out = options[:data_out] ? Fasta.open(options[:data_out], 'w', compress: compress) : STDOUT + input.each do |record| + if record[:SEQ_NAME] and record[:SEQ] + entry = Seq.new_bp(record) -BEGIN -{ - Maasha::Biopieces::status_set(); -} + fasta_out.puts entry.to_fasta(options[:wrap]) + end + output.puts record unless options[:no_stream] + end -END -{ - Maasha::Biopieces::status_log(); -} + fasta_out.close +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/bp_test/test/test_write_fasta b/bp_test/test/test_write_fasta index 68f37ce..4307e25 100755 --- a/bp_test/test/test_write_fasta +++ b/bp_test/test/test_write_fasta @@ -10,7 +10,12 @@ run "$bp -I $in -w 4 -o $tmp -x" assert_no_diff $tmp $out.2 clean -run "$bp -I $in -w 4 -Z -o $tmp.gz -x" +run "$bp -I $in -w 4 -Z gzip -o $tmp.gz -x" gunzip $tmp.gz assert_no_diff $tmp $out.3 clean + +run "$bp -I $in -w 4 -Z bzip2 -o $tmp.bz2 -x" +bunzip2 $tmp.bz2 +assert_no_diff $tmp $out.3 +clean -- 2.39.2