X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fget_genome_align;h=566ea6e365072e40f388b27a6b9784e503547439;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=fdf5bd287f78613d2aea83bb9b15f855cad57e77;hpb=e8e32d43d674573ff4a66fe77780cc18cb02d430;p=biopieces.git diff --git a/bp_bin/get_genome_align b/bp_bin/get_genome_align index fdf5bd2..566ea6e 100755 --- a/bp_bin/get_genome_align +++ b/bp_bin/get_genome_align @@ -1,6 +1,181 @@ #!/usr/bin/env perl +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Extract alignments from a multiple genome alignment. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + use warnings; use strict; +use Maasha::Biopieces; +use Maasha::Common; +use Maasha::UCSC; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, $tmp_dir, $maf_track, $align, $align_num, $beg, $end, $len, $entry ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'genome', short => 'g', type => 'genome', mandatory => 'yes', default => undef, allowed => undef, disallowed => undef }, + { long => 'chr', short => 'c', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'beg', short => 'b', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'end', short => 'e', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'len', short => 'l', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'strand', short => 's', type => 'string', mandatory => 'no', default => '+', allowed => '+,-', disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +$tmp_dir = Maasha::Biopieces::get_tmpdir(); + +$align_num = 1; + +$maf_track = maf_track( $options->{ "genome" } ); + +if ( $options->{ "chr" } and $options->{ "beg" } and ( $options->{ "end" } or $options->{ "len" } ) ) +{ + $beg = $options->{ "beg" } - 1; + + if ( $options->{ "end" } ) { + $end = $options->{ "end" }; + } elsif ( $options->{ "len" } ) { + $end = $beg + $options->{ "len" }; + } + + $align = Maasha::UCSC::maf_extract( $tmp_dir, $options->{ "genome" }, $maf_track, $options->{ "chr" }, $beg, $end, $options->{ "strand" } ); + + foreach $entry ( @{ $align } ) + { + $entry->{ "CHR" } = $record->{ "CHR" }; + $entry->{ "CHR_BEG" } = $record->{ "CHR_BEG" }; + $entry->{ "CHR_END" } = $record->{ "CHR_END" }; + $entry->{ "STRAND" } = $record->{ "STRAND" } || '+'; + $entry->{ "Q_ID" } = $record->{ "Q_ID" }; + $entry->{ "SCORE" } = $record->{ "SCORE" }; + + Maasha::Biopieces::put_record( $entry, $out ); + } +} + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( $record->{ "REC_TYPE" } eq "BED" ) + { + $align = Maasha::UCSC::maf_extract( $tmp_dir, $options->{ "genome" }, $maf_track, $record->{ "CHR" }, $record->{ "CHR_BEG" }, $record->{ "CHR_END" }, $record->{ "STRAND" } ); + } + elsif ( $record->{ "REC_TYPE" } eq "VMATCH" ) + { + $align = Maasha::UCSC::maf_extract( $tmp_dir, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" } + 1, $record->{ "STRAND" } ); + } + elsif ( $record->{ "REC_TYPE" } eq "PSL" ) + { + $align = Maasha::UCSC::maf_extract( $tmp_dir, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $record->{ "STRAND" } ); + } + elsif ( $record->{ "REC_TYPE" } eq "BLAST" ) + { + $align = Maasha::UCSC::maf_extract( $tmp_dir, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $record->{ "STRAND" } ); + } + + foreach $entry ( @{ $align } ) + { + $entry->{ "CHR" } = $record->{ "CHR" }; + $entry->{ "CHR_BEG" } = $record->{ "CHR_BEG" }; + $entry->{ "CHR_END" } = $record->{ "CHR_END" }; + $entry->{ "STRAND" } = $record->{ "STRAND" }; + $entry->{ "Q_ID" } = $record->{ "Q_ID" }; + $entry->{ "SCORE" } = $record->{ "SCORE" }; + + Maasha::Biopieces::put_record( $entry, $out ); + } + + $align_num++; +} + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +sub maf_track +{ + # Martin A. Hansen, April 2008. + + # Given a genome returns the corresponding mafTrack database table name. + + my ( $genome, # genome to lookup. + ) = @_; + + # Returns a string. + + my ( %hash ); + + # The below has should be in a config file - fix later. + + %hash = ( + danRer4 => 'multiz7way', + dm2 => 'multiz15way', + dm3 => 'multiz15way', + fr2 => 'multiz7way', + galGal3 => 'multiz7way', + gasAcu1 => 'multiz7way', + hg18 => 'multiz17way', + mm8 => 'multiz17way', + mm9 => 'multiz17way', + oryLat1 => 'multiz7way', + panTro2 => 'multiz17way', + tetNig1 => 'multiz7way', + ); + + Maasha::Common::error( qw(multiz track not found) ) if not exists $hash{ $genome }; + + return $hash{ $genome }; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use Maasha::BioRun; +__END__