From 3ef3037a8bdfca6bb16fdd4455e1c9218352ad3a Mon Sep 17 00:00:00 2001 From: martinahansen Date: Wed, 25 Sep 2013 19:29:53 +0000 Subject: [PATCH] added support for gzip/bzip2 in write_fasta_files (and wrap) git-svn-id: http://biopieces.googlecode.com/svn/trunk@2212 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/write_fasta_files | 20 +++++++++++-------- bp_test/out/write_fasta_files.out.3/11.fasta | 8 ++++++++ bp_test/out/write_fasta_files.out.3/9.fasta | 4 ++++ bp_test/test/test_write_fasta_files | 21 ++++++++++++++++++-- 4 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 bp_test/out/write_fasta_files.out.3/11.fasta create mode 100644 bp_test/out/write_fasta_files.out.3/9.fasta diff --git a/bp_bin/write_fasta_files b/bp_bin/write_fasta_files index 36011ed..a0e20a5 100755 --- a/bp_bin/write_fasta_files +++ b/bp_bin/write_fasta_files @@ -33,15 +33,17 @@ require 'maasha/fasta' require 'pp' casts = [] -casts << {:long=>'key', :short=>'k', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'dir', :short=>'d', :type=>'dir!', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {long: 'key', short: 'k', type: 'string', mandatory: true, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'dir', short: 'd', type: 'dir!', mandatory: true, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'wrap', short: 'w', type: 'uint', mandatory: false, default: nil, allowed: nil, disallowed: "0"} +casts << {long: 'compress', short: 'Z', type: 'string', mandatory: false, default: nil, allowed: "gzip,bzip,bzip2", disallowed: nil} +casts << {long: 'no_stream', short: 'x', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} options = Biopieces.options_parse(ARGV, casts) -key = options[:key].to_sym - -fh_hash = {} +key = options[:key].to_sym +compress = options[:compress] ? options[:compress].to_sym : nil +fh_hash = {} Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| @@ -52,11 +54,13 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| fasta_io = fh_hash[record[key].to_sym] else fasta_file = File.join(options[:dir], record[key] + ".fasta") - fasta_io = Fasta.open(fasta_file, "w") + fasta_file << ".gz" if compress == :gzip + fasta_file << ".bz2" if compress == :bzip or compress == :bzip2 + fasta_io = Fasta.open(fasta_file, "w", compress: compress) fh_hash[record[key].to_sym] = fasta_io end - fasta_io.puts seq.to_fasta + fasta_io.puts seq.to_fasta(options[:wrap]) end output.puts record unless options[:no_stream] diff --git a/bp_test/out/write_fasta_files.out.3/11.fasta b/bp_test/out/write_fasta_files.out.3/11.fasta new file mode 100644 index 0000000..85f3184 --- /dev/null +++ b/bp_test/out/write_fasta_files.out.3/11.fasta @@ -0,0 +1,8 @@ +>test2 +ACGA +CTAC +AGT +>test3 +GCAC +ACAG +AGC diff --git a/bp_test/out/write_fasta_files.out.3/9.fasta b/bp_test/out/write_fasta_files.out.3/9.fasta new file mode 100644 index 0000000..4e39e70 --- /dev/null +++ b/bp_test/out/write_fasta_files.out.3/9.fasta @@ -0,0 +1,4 @@ +>test1 +GACA +TCGA +C diff --git a/bp_test/test/test_write_fasta_files b/bp_test/test/test_write_fasta_files index d87fb8a..9332864 100755 --- a/bp_test/test/test_write_fasta_files +++ b/bp_test/test/test_write_fasta_files @@ -3,16 +3,33 @@ source "$BP_DIR/bp_test/lib/test.sh" mkdir $tmp_dir - run "$bp -I $in -d $tmp_dir -k SEQ_NAME -x" assert_no_diff_dir $tmp_dir $out.1 clean +rm -rf $tmp_dir +mkdir $tmp_dir +run "$bp -I $in -d $tmp_dir -k SEQ_LEN -x" +assert_no_diff_dir $tmp_dir $out.2 +clean rm -rf $tmp_dir + mkdir $tmp_dir +run "$bp -I $in -d $tmp_dir -k SEQ_LEN -Z gzip -x" +gunzip $tmp_dir/*.gz +assert_no_diff_dir $tmp_dir $out.2 +clean +rm -rf $tmp_dir -run "$bp -I $in -d $tmp_dir -k SEQ_LEN -x" +mkdir $tmp_dir +run "$bp -I $in -d $tmp_dir -k SEQ_LEN -Z bzip2 -x" +bunzip2 $tmp_dir/*.bz2 assert_no_diff_dir $tmp_dir $out.2 clean +rm -rf $tmp_dir +mkdir $tmp_dir +run "$bp -I $in -d $tmp_dir -k SEQ_LEN -w 4 -x" +assert_no_diff_dir $tmp_dir $out.3 +clean rm -rf $tmp_dir -- 2.39.2