From 1f203b5788a335b1e17f540fa52709ac63c35520 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 20 Jul 2009 21:44:23 +0000 Subject: [PATCH] added bowtie_seq biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@570 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/bowtie_seq | 152 +++++++++++++++++++++++++++++++++++++ bp_bin/format_genome | 2 +- code_perl/Maasha/Solexa.pm | 9 +-- 3 files changed, 155 insertions(+), 8 deletions(-) create mode 100755 bp_bin/bowtie_seq diff --git a/bp_bin/bowtie_seq b/bp_bin/bowtie_seq new file mode 100755 index 0000000..560b1c4 --- /dev/null +++ b/bp_bin/bowtie_seq @@ -0,0 +1,152 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Use bowtie to map sequences in the stream against a specified genome. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Maasha::Biopieces; +use Maasha::Common; +use Maasha::Fasta; +use Maasha::Calc; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $index, $tmp_dir, $tmp_in, $tmp_out, $fh_in, $fh_out, $record, $entry, $line, @fields ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'genome', short => 'g', type => 'genome', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +$index = "$ENV{ 'BP_DATA' }/genomes/$options->{ 'genome' }/bowtie/$options->{ 'genome' }"; + +$tmp_dir = Maasha::Biopieces::get_tmpdir(); +$tmp_in = "$tmp_dir/bowtie.seq"; +$tmp_out = "$tmp_dir/bowtie.out"; + +$fh_out = Maasha::Filesys::file_write_open( $tmp_in ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) ) { + Maasha::Fasta::put_entry( $entry, $fh_out ); + } + + Maasha::Biopieces::put_record( $record, $out ); +} + +close $fh_out; + +if ( $options->{ 'verbose' } ) { + Maasha::Common::run( "bowtie", "-f $index $tmp_in $tmp_out" ); +} else { + Maasha::Common::run( "bowtie", "-f $index $tmp_in $tmp_out > /dev/null 2>&1" ); +} + +unlink $tmp_in; + +$fh_in = Maasha::Filesys::file_read_open( $tmp_out ); + +while ( $line = <$fh_in> ) +{ + chomp $line; + + @fields = split /\t/, $line; + + $record = bowtie2biopiece( \@fields ); + + Maasha::Biopieces::put_record( $record, $out ); +} + +close $fh_out; + +unlink $tmp_out; + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub bowtie2biopiece +{ + # Martin A. Hansen, July 2009 + + # Convert a bowtie entry to a Biopiece record. + + my ( $entry, # bowtie entry + ) = @_; + + # Returns a hash. + + my ( $record, @scores ); + + $record->{ 'Q_ID' } = $entry->[ 0 ]; + $record->{ 'STRAND' } = $entry->[ 1 ]; + $record->{ 'S_ID' } = $entry->[ 2 ]; + $record->{ 'S_BEG' } = $entry->[ 3 ]; + $record->{ 'SEQ' } = $entry->[ 4 ]; + $record->{ 'SCORES' } = $entry->[ 5 ]; + $record->{ 'MISMATCH' } = $entry->[ 6 ]; + + $record->{ 'SEQ_LEN' } = length $entry->[ 4 ]; + $record->{ 'S_END' } = $record->{ 'S_BEG' } + $record->{ 'SEQ_LEN' } - 1; + $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml + $record->{ 'SCORE' } = Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ); + + $record->{ 'REC_TYPE' } = "BOWTIE"; + + return wantarray ? %{ $record } : $record; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_bin/format_genome b/bp_bin/format_genome index 71e1d21..9fbdcb8 100755 --- a/bp_bin/format_genome +++ b/bp_bin/format_genome @@ -113,7 +113,7 @@ foreach $format ( @{ $options->{ 'formats' } } ) elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) } elsif ( $format =~ /^blat$/i ) { warn "BLAT FORMAT NOT IMPLEMENTED" } elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $tmp_dir ) } - elsif ( $format =~ /^bowtie$/i ) { bowtie_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/bowtie", $genome, $option->{ 'verbose' } ) } + elsif ( $format =~ /^bowtie$/i ) { bowtie_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/bowtie", $genome, $options->{ 'verbose' } ) } elsif ( $format =~ /^phastcons$/i ) { Maasha::UCSC::phastcons_index( "$genome.pp", $phastcons_dir ) } } diff --git a/code_perl/Maasha/Solexa.pm b/code_perl/Maasha/Solexa.pm index f539658..a26b0ce 100644 --- a/code_perl/Maasha/Solexa.pm +++ b/code_perl/Maasha/Solexa.pm @@ -65,7 +65,7 @@ sub solexa_get_entry_octal # Returns a list. - my ( $seq_header, $seq, $score_head, $score, @scores ); + my ( $seq_header, $seq, $score_head, $score ); $seq_header = <$fh>; $seq = <$fh>; @@ -79,13 +79,8 @@ sub solexa_get_entry_octal chomp $score_header; chomp $score; - @scores = split //, $score; - - map { $_ = int( score_oct2dec( $_ ) ) } @scores; - $seq_header =~ s/^@//; - - $score = join( ";", @scores ); + $score =~ s/(.)/int( score_oct2dec( $1 ) ) . ";"/ge; return wantarray ? ( $seq_header, $seq, $score ) : [ $seq_header, $seq, $score ]; } -- 2.39.5