From ca0e3f34fa91f527961dcbbf2a0827385af9a6ce Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 4 Sep 2009 14:33:25 +0000 Subject: [PATCH] added mate_pair_dist git-svn-id: http://biopieces.googlecode.com/svn/trunk@652 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/mate_pair_dist | 165 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100755 bp_bin/mate_pair_dist diff --git a/bp_bin/mate_pair_dist b/bp_bin/mate_pair_dist new file mode 100755 index 0000000..d652679 --- /dev/null +++ b/bp_bin/mate_pair_dist @@ -0,0 +1,165 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Determines the distance between mapped mate pair sequence matches. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +use warnings; +use strict; +use Data::Dumper; +use Maasha::Biopieces; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, %data, $q_id, $mate, $contig, $strand, @mates1, @mates2, $mate1, $mate2, $new_record ); + +$options = Maasha::Biopieces::parse_options(); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + if ( exists $record->{ 'S_ID' } and + exists $record->{ 'Q_ID' } and + exists $record->{ 'STRAND' } and + exists $record->{ 'S_BEG' } + ) + { + ( $q_id, $mate ) = split "/", $record->{ 'Q_ID' }, 2; + + if ( $mate == 2 ) + { + if ( $record->{ 'STRAND' } eq '+' ) { + $record->{ 'STRAND' } = '-'; + } else { + $record->{ 'STRAND' } = '+'; + } + } + + push @{ $data{ $record->{ 'S_ID' } }{ $record->{ 'STRAND' } }{ $q_id }{ $mate } }, $record; + } + + Maasha::Biopieces::put_record( $record, $out ); +} + +# print Dumper( \%data ); # DEBUG + +foreach $contig ( keys %data ) +{ + foreach $strand ( keys %{ $data{ $contig } } ) + { + foreach $q_id ( keys %{ $data{ $contig }{ $strand } } ) + { + next if not exists $data{ $contig }{ $strand }{ $q_id }{ 1 }; + next if not exists $data{ $contig }{ $strand }{ $q_id }{ 2 }; + + @mates1 = @{ $data{ $contig }{ $strand }{ $q_id }{ 1 } }; + @mates2 = @{ $data{ $contig }{ $strand }{ $q_id }{ 2 } }; + + @mates1 = sort { $a->{ 'S_BEG' } <=> $b->{ 'S_BEG' } } @mates1; + @mates2 = sort { $a->{ 'S_BEG' } <=> $b->{ 'S_BEG' } } @mates2; + + foreach $mate1 ( @mates1 ) + { + foreach $mate2 ( @mates2 ) + { + $new_record = { + S_ID => $contig, + Q_ID1 => $mate1->{ 'Q_ID' }, + Q_ID2 => $mate2->{ 'Q_ID' }, + DIST => abs( $mate2->{ 'S_BEG' } - $mate1->{ 'S_BEG' } ), + }; + + Maasha::Biopieces::put_record( $new_record, $out ); + } + } + } + } +} + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ + + +@records = sort { $a->{ 'S_ID' } cmp $b->{ 'S_ID' } or + $a->{ 'STRAND' } cmp $b->{ 'STRAND' } or + $a->{ 'Q_ID' } cmp $b->{ 'Q_ID' } or + $a->{ 'S_BEG' } <=> $b->{ 'S_BEG' } } @records; + +print STDERR "done.\n" if $options->{ 'verbose' }; + +# map { print join( "\t", $_->{ 'S_ID' }, $_->{ 'STRAND' }, $_->{ 'Q_ID' }, $_->{ 'S_BEG' } ), "\n" } @records; # DEBUG +print Dumper( \@records ); + +$i = 0; + +while ( $i < @records - 1 ) +{ + $j = $i + 1; + + while ( $j < @records and + $records[ $i ]->{ 'S_ID' } eq $records[ $j ]->{ 'S_ID' } and + $records[ $i ]->{ 'STRAND' } eq $records[ $j ]->{ 'STRAND' } + ) + { + ( $id1, $mate1 ) = split "/", $records[ $i ]->{ 'Q_ID' }, 2; + ( $id2, $mate2 ) = split "/", $records[ $j ]->{ 'Q_ID' }, 2; + + print Dumper( $i, $j, $id1, $mate1, $id1, $mate2 ); + + if ( $id1 eq $id2 and $mate1 == 1 and $mate2 == 2 ) + { + print "Dist: " . ( $records[ $j ]->{ 'S_BEG' } - $records[ $i ]->{ 'S_BEG' } ) . "\n"; + } + + $j++; + } + + $i = $j; +} -- 2.39.5