From 78a12cbc29114e8e17f485ad26b3d4a2c135e114 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 21 Jul 2009 11:36:31 +0000 Subject: [PATCH] fixed bowtie_seq git-svn-id: http://biopieces.googlecode.com/svn/trunk@574 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/bowtie_seq | 23 +++++- bp_bin/print_wiki | 1 + bp_bin/write_fastq | 86 +++++++++++++++++++ code_perl/Maasha/Fastq.pm | 156 +++++++++++++++++++++++++++++++++++ code_perl/Maasha/Gwiki.pm | 2 +- code_perl/Maasha/UCSC/BED.pm | 2 + 6 files changed, 265 insertions(+), 5 deletions(-) create mode 100755 bp_bin/write_fastq create mode 100644 code_perl/Maasha/Fastq.pm diff --git a/bp_bin/bowtie_seq b/bp_bin/bowtie_seq index 92a5118..3dd3ddc 100755 --- a/bp_bin/bowtie_seq +++ b/bp_bin/bowtie_seq @@ -30,6 +30,7 @@ use warnings; use strict; use Maasha::Biopieces; use Maasha::Common; +use Maasha::Fastq; use Maasha::Fasta; use Maasha::Calc; @@ -37,7 +38,7 @@ use Maasha::Calc; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $index, $tmp_dir, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $entry, $line, @fields ); +my ( $options, $in, $out, $index, $tmp_dir, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $entry, $line, @fields, $type, $args ); $options = Maasha::Biopieces::parse_options( [ @@ -58,8 +59,19 @@ $fh_out = Maasha::Filesys::file_write_open( $tmp_in ); while ( $record = Maasha::Biopieces::get_record( $in ) ) { - if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) { + if ( $entry = Maasha::Fastq::biopiece2fastq( $record ) ) + { + Maasha::Common::error( "Mixed FASTA and FASTQ entries in stream" ) if defined $type and $type ne "FASTQ"; + Maasha::Fastq::put_entry( $entry, $fh_out ); + + $type = "FASTQ"; + } + elsif ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) + { + Maasha::Common::error( "Mixed FASTA and FASTQ entries in stream" ) if defined $type and $type ne "FASTA"; Maasha::Fasta::put_entry( $entry, $fh_out ); + + $type = "FASTA"; } Maasha::Biopieces::put_record( $record, $out ); @@ -67,10 +79,13 @@ while ( $record = Maasha::Biopieces::get_record( $in ) ) close $fh_out; +$args = ""; +$args = "-f" if $type eq "FASTA"; + if ( $options->{ 'verbose' } ) { - Maasha::Common::run( "bowtie", "-f $index $tmp_in $tmp_out" ); + Maasha::Common::run( "bowtie", "$args $index $tmp_in $tmp_out" ); } else { - Maasha::Common::run( "bowtie", "-f $index $tmp_in $tmp_out > /dev/null 2>&1" ); + Maasha::Common::run( "bowtie", "$args $index $tmp_in $tmp_out > /dev/null 2>&1" ); } unlink $tmp_in; diff --git a/bp_bin/print_wiki b/bp_bin/print_wiki index f766c8b..79ee80a 100755 --- a/bp_bin/print_wiki +++ b/bp_bin/print_wiki @@ -28,6 +28,7 @@ use warnings; use strict; +use Data::Dumper; use Maasha::Biopieces; use Maasha::Gwiki; diff --git a/bp_bin/write_fastq b/bp_bin/write_fastq new file mode 100755 index 0000000..d242f37 --- /dev/null +++ b/bp_bin/write_fastq @@ -0,0 +1,86 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Write sequences from stream in FASTQ format. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Maasha::Fastq; +use Maasha::Biopieces; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, $data_out, $entry ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( $entry = Maasha::Fastq::biopiece2fastq( $record ) ) { + Maasha::Fastq::put_entry( $entry, $data_out, $options->{ "wrap" } ); + } + + Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; +} + +close $data_out; + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/code_perl/Maasha/Fastq.pm b/code_perl/Maasha/Fastq.pm new file mode 100644 index 0000000..5149166 --- /dev/null +++ b/code_perl/Maasha/Fastq.pm @@ -0,0 +1,156 @@ +package Maasha::Fastq; + +# Copyright (C) 2006-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +# Routines for manipulation of FASTQ files and FASTQ entries. + +# http://maq.sourceforge.net/fastq.shtml + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Data::Dumper; +use Maasha::Calc; +use vars qw( @ISA @EXPORT ); + +@ISA = qw( Exporter ); + +use constant { + SEQ_NAME => 0, + SEQ => 1, + SCORES => 2, +}; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub get_entry +{ + # Martin A. Hansen, July 2009. + + # Gets the next FASTQ entry from a given filehandle. + + my ( $fh, # filehandle + ) = @_; + + # Returns a list + + my ( $seq, $seq_name, $qual, $qual_name ); + + $seq_name = <$fh>; + $seq = <$fh>; + $qual_name = <$fh>; + $qual = <$fh>; + + return unless $seq; + + chomp $seq; + chomp $seq_name; + chomp $qual; + chomp $qual_name; + + $seq_name =~ s/^@//; + + return wantarray ? ( $seq_name, $seq, $qual ) : [ $seq_name, $seq, $qual ]; +} + + +sub put_entry +{ + # Martin A. Hansen, July 2009. + + # Output a FASTQ entry to STDOUT or a filehandle. + + my ( $entry, # FASTQ entry + $fh, # filehandle - OPTIONAL + ) = @_; + + # Returns nothing. + + $fh ||= \*STDOUT; + + print $fh "@" . $entry->[ SEQ_NAME ] . "\n"; + print $fh $entry->[ SEQ ] . "\n"; + print $fh "+\n"; + print $fh $entry->[ SCORES ] . "\n"; +} + + +sub fastq2biopiece +{ + # Martin A. Hansen, July 2009. + + # Converts a FASTQ entry to a Biopiece record, where + # the FASTQ quality scores are converted to numerics. + + my ( $entry, # FASTQ entry, + ) = @_; + + # Returns a hash. + + my ( $record ); + + $record->{ 'SEQ' } = $entry->[ SEQ ]; + $record->{ 'SEQ_NAME' } = $entry->[ SEQ_NAME ]; + $record->{ 'SCORES' } = $entry->[ SCORES ]; + + $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml + $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) ); + + return wantarray ? %{ $record } : $record; +} + + +sub biopiece2fastq +{ + # Martin A. Hansen, July 2009. + + # Converts a Biopiece record to a FASTQ entry. + + my ( $record, # Biopiece record + ) = @_; + + # Returns a list. + + my ( $list ); + + if ( exists $record->{ 'SEQ' } and exists $record->{ 'SEQ_NAME' } and exists $record->{ 'SCORES' } ) + { + $list->[ SEQ_NAME ] = $record->{ 'SEQ_NAME' }; + $list->[ SEQ ] = $record->{ 'SEQ' }; + $list->[ SCORES ] = $record->{ 'SCORES' }; + + $list->[ SCORES ] =~ s/(\d+);/chr( ( $1 <= 93 ? $1 : 93 ) + 33 )/ge; + + return wantarray ? @{ $list } : $list; + } + + return; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< diff --git a/code_perl/Maasha/Gwiki.pm b/code_perl/Maasha/Gwiki.pm index 60987d7..055df09 100644 --- a/code_perl/Maasha/Gwiki.pm +++ b/code_perl/Maasha/Gwiki.pm @@ -152,7 +152,7 @@ sub gwiki_read { undef @block; - if ( $lines[ $i ] =~ /^(#summary.+)/ ) + if ( $lines[ $i ] =~ /(#summary.+)/ ) # TODO: unsolved problem with anchor! { $section = $1; diff --git a/code_perl/Maasha/UCSC/BED.pm b/code_perl/Maasha/UCSC/BED.pm index 6bfa590..3f4784a 100644 --- a/code_perl/Maasha/UCSC/BED.pm +++ b/code_perl/Maasha/UCSC/BED.pm @@ -506,6 +506,8 @@ sub biopiece2bed if ( exists $bp_record->{ "SCORE" } ) { $bed_entry[ score ] = $bp_record->{ "SCORE" }; + } elsif ( exists $bp_record->{ "SCORE_MEAN" } ) { + $bed_entry[ score ] = sprintf( "%.0f", $bp_record->{ "SCORE_MEAN" } ); } else { return wantarray ? @bed_entry : \@bed_entry; } -- 2.39.2