code_perl/Maasha/ACE.pm

   1 package Maasha::ACE;
   2
   3 # Copyright (C) 2009 Martin A. Hansen.
   4
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18
  19 # http://www.gnu.org/copyleft/gpl.html
  20
  21
  22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  23
  24
  25 # Routines to parse ACE format.
  26
  27
  28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  29
  30
  31 use warnings;
  32 use strict;
  33 use Data::Dumper;
  34 use Storable;
  35 use Maasha::Common;
  36 use Maasha::Filesys;
  37 use Maasha::KISS;
  38 use Maasha::Seq;
  39 use vars qw ( @ISA @EXPORT );
  40
  41
  42 @ISA = qw( Exporter );
  43
  44
  45 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  46
  47
  48 sub ace_entries_get
  49 {
  50     # Martin A. Hansen, December 2009.
  51     #
  52     # Fetches the next contig from an ACE file.
  53
  54     my ( $fh,
  55        ) = @_;
  56
  57     my ( $block, $line, @lines, $records );
  58
  59     local $/ = "\nCO ";
  60
  61     while ( $block = <$fh> )
  62     {
  63         chomp $block;
  64
  65         next if $block =~ /^AS/;
  66
  67         if ( defined $block )
  68         {
  69             @lines = split "\n", "CO $block";
  70
  71             $records = ace_entry_parse( \@lines );
  72
  73             return wantarray ? @{ $records } : $records;
  74         }
  75     }
  76 }
  77
  78
  79 sub ace_entry_parse
  80 {
  81     # Martin A. Hansen, December 2009.
  82
  83     # Parses an ACE entry given as a list of lines and
  84     # returns a list of hashref.
  85
  86     my ( $entry,  # list of lines
  87        ) = @_;
  88
  89     # Returns a list.
  90
  91     my ( $i, $contig, $reads, $read_name, $read_seq, $record, @records );
  92
  93     $i = 0;
  94
  95     while ( $i < scalar @{ $entry } )
  96     {
  97         if ( $entry->[ $i ] =~ /^CO ([^ ]+) (\d+) (\d+) (\d+) (U|C)/ )
  98         {
  99             $contig->{ 'CONTIG_NAME' }       = $1;
 100             $contig->{ 'CONTIG_BASES' }      = $2;
 101             $contig->{ 'CONTIG_READS' }      = $3;
 102             $contig->{ 'CONTIG_SEGMENT' }    = $4;
 103             $contig->{ 'CONTIG_COMPLEMENT' } = $5;
 104             $contig->{ 'CONTIG_SEQ' }        = "";
 105
 106             $i++;
 107
 108             while ( $entry->[ $i ] ne "" )
 109             {
 110                 $contig->{ 'CONTIG_SEQ' } .= $entry->[ $i ];
 111
 112                 $i++;
 113             }
 114         }
 115         elsif ( $entry->[ $i ] =~ /^AF ([^ ]+) (U|C) (-?\d+)/ )
 116         {
 117             $reads->{ $1 }->{ 'AF_READ_COMPLEMENT' } = $2;
 118             $reads->{ $1 }->{ 'AF_READ_BEG' }        = ( $3 - 1 );
 119         }
 120         elsif ( $entry->[ $i ] =~ /^BS (\d+) (\d+) (.+)/ )
 121         {
 122             # skip
 123         }
 124         elsif ( $entry->[ $i ] =~ /^RD ([^ ]+) (\d+) (\d+) (\d+)/ )
 125         {
 126             $read_name = $1;
 127
 128             $reads->{ $read_name }->{ 'RD_READ_LEN' }   = $2;
 129             $reads->{ $read_name }->{ 'RD_READ_ITEMS' } = $3;
 130             $reads->{ $read_name }->{ 'RD_READ_TAGS' }  = $4;
 131
 132             $read_seq   = "";
 133
 134             $i++;
 135
 136             while ( $entry->[ $i ] ne "" )
 137             {
 138                 $read_seq .= $entry->[ $i ];
 139
 140                 $i++;
 141             }
 142
 143             $reads->{ $read_name }->{ 'RD_READ_SEQ' } = $read_seq;
 144         }
 145         elsif ( $entry->[ $i ] =~ /^QA (\d+) (\d+) (\d+) (\d+)/ )
 146         {
 147             $reads->{ $read_name }->{ 'QA_CLIP_BEG' }  = ( $1 - 1 );
 148             $reads->{ $read_name }->{ 'QA_CLIP_END' }  = ( $2 - 1 );
 149             $reads->{ $read_name }->{ 'QA_ALIGN_BEG' } = ( $3 - 1 );
 150             $reads->{ $read_name }->{ 'QA_ALIGN_END' } = ( $4 - 1 );
 151         }
 152
 153         $i++;
 154     }
 155
 156     my ( $read, $s_seq, $q_seq, $len, $align );
 157
 158     foreach $read ( keys %{ $reads } )
 159     {
 160         $len   = $reads->{ $read }->{ 'QA_ALIGN_END' } - $reads->{ $read }->{ 'QA_ALIGN_BEG' } + 1;
 161         $s_seq = substr $contig->{ 'CONTIG_SEQ' }, $reads->{ $read }->{ 'AF_READ_BEG' } + $reads->{ $read }->{ 'QA_ALIGN_BEG' }, $len;
 162         $q_seq = substr $reads->{ $read }->{ 'RD_READ_SEQ' }, $reads->{ $read }->{ 'QA_ALIGN_BEG' }, $len;
 163
 164         $s_seq =~ tr/*/-/;
 165         $q_seq =~ tr/*/-/;
 166
 167         if ( $reads->{ $read }->{ 'AF_READ_COMPLEMENT' } eq 'C' )
 168         {
 169             $record->{ 'STRAND' } = '-';
 170
 171             Maasha::Seq::dna_comp( \$s_seq );
 172             Maasha::Seq::dna_comp( \$q_seq );
 173         }
 174         else
 175         {
 176             $record->{ 'STRAND' } = '+';
 177         }
 178
 179        # print Dumper( $read, $reads->{ $read } );
 180
 181         $align = Maasha::KISS::kiss_align_enc( \$s_seq, \$q_seq, $reads->{ $read }->{ 'QA_ALIGN_BEG' } );
 182
 183         $record->{ 'S_ID' }  = $contig->{ 'CONTIG_NAME' };
 184         $record->{ 'S_BEG' } = $reads->{ $read }->{ 'AF_READ_BEG' };
 185         $record->{ 'S_END' } = $record->{ 'S_BEG' } + $len - 1;   # FIXME
 186         $record->{ 'Q_ID' }  = $read;
 187         $record->{ 'Q_ID' }  =~ s/\..+//;
 188         $record->{ 'ALIGN' } = join ",", @{ $align };
 189
 190         if ( 0 )   # DEBUG
 191         {
 192             print "LEN: $len\n";
 193             print "S_SEQ: $s_seq\n";
 194             print "Q_SEQ: $q_seq\n\n";
 195             print "READ: $read\n";
 196             print Dumper( $reads->{ $read } );
 197             print Dumper( $record );
 198         }
 199
 200         if ( $record->{ 'S_BEG' } >= 0 ) # FIXME
 201         {
 202             push @records, Storable::dclone $record;
 203         }
 204     }
 205
 206     return wantarray ? @records : \@records;
 207 }
 208
 209
 210 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 211
 212
 213 1;