# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Routines for manipulation 'Generic Feature Format' - GFF.
+# Routines for manipulation 'Generic Feature Format' - GFF version 3.
+# Read more here:
+# http://www.sequenceontology.org/resources/gff3.html
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use Data::Dumper;
use Maasha::Common;
@ISA = qw( Exporter );
+use constant {
+ seqid => 0,
+ source => 1,
+ type => 2,
+ start => 3,
+ end => 4,
+ score => 5,
+ strand => 6,
+ phase => 7,
+ attributes => 8,
+};
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-sub get_entry
-{
- # Martin A. Hansen, February 2008.
+my %ATTRIBUTES = (
+ id => 'ID',
+ name => 'Name',
+ alias => 'Alias',
+ parent => 'Parent',
+ target => 'Target',
+ gap => 'Gap',
+ derives_from => 'Derives_from',
+ note => 'Note',
+ dbxref => 'Dbxref',
+ ontology_term => 'Ontology_term',
+);
+
- # Reads a single entry from a filehandle to a GFF file.
+sub gff_entry_get
+{
+ # Martin A. Hansen, October 2009
- my ( $fh, # file handle
+ my ( $fh, # file handle
) = @_;
- # Returns hashref.
+ # Returns a
- my ( $line, @fields, %entry, $q_beg, $q_end, @atts, $att, $key, $val );
+ my ( $line, @fields );
- while ( $line = <$fh> )
+ while ( $line = <$fh>)
{
chomp $line;
- @fields = split "\t", $line;
-
- if ( @fields == 9 )
- {
- $q_beg = $fields[ 3 ] - 1;
- $q_end = $fields[ 4 ] - 1;
-
- ( $q_beg, $q_end ) = ( $q_end, $q_beg ) if $q_beg > $q_end;
-
- %entry = (
- Q_ID => $fields[ 0 ],
- SOURCE => $fields[ 1 ],
- TYPE => $fields[ 2 ],
- Q_BEG => $q_beg,
- Q_END => $q_end,
- SCORE => $fields[ 5 ],
- STRAND => $fields[ 6 ],
- PHASE => $fields[ 7 ],
- ATT => $fields[ 8 ],
- );
-
- @atts = split ";", $fields[ 8 ];
-
- foreach $att ( @atts )
- {
- ( $key, $val ) = split "=", $att;
-
- $entry{ "ATT_" . uc $key } = $val;
- }
+ next if $line =~ /^$|^#/; # skip empty lines and lines starting with #
- return wantarray ? %entry : \%entry;
- }
+ @fields = split /\t/, $line;
+
+ next if scalar @fields < 9;
+
+ return wantarray ? @fields : \@fields;
}
}
-sub get_entries
+sub gff_entry_put
{
- # Martin A. Hansen, February 2008.
-
- # Reads GFF file and returns a list of entries.
+ # Martin A. Hansen, October 2009
- my ( $path, # full path to GFF file.
+ my ( $entry, # GFF entry
+ $fh, # file handle
) = @_;
- # Returns a list.
+ $fh ||= \*STDOUT;
- my ( $fh, $entry, @entries );
+ print $fh join( "\t", @{ $entry } ), "\n";
+}
- $fh = Maasha::Common::read_open( $path );
- while ( $entry = get_entry( $fh ) ) {
- push @entries, $entry;
- }
+sub gff_pragma_put
+{
+ # Martin A. Hansen, October 2009
+
+ my ( $pragmas, # list of GFF pragma lines
+ $fh, # file handle
+ ) = @_;
- close $fh;
+ my ( $pragma );
- return wantarray ? @entries : \@entries;
+ $fh ||= \*STDOUT;
+
+ foreach $pragma ( @{ $pragmas } ) {
+ print $fh "$pragma\n";
+ }
}
-sub put_entry
+sub gff2biopiece
{
+ # Martin A. Hansen, October 2009
+ my ( $entry, # GFF entry
+ ) = @_;
+ # Returns a hashref.
+
+ my ( %record, @atts, $att, $key, $val );
+
+ %record = (
+ 'S_ID' => $entry->[ seqid ],
+ 'SOURCE' => $entry->[ source ],
+ 'TYPE' => $entry->[ type ],
+ 'S_BEG' => $entry->[ start ] - 1,
+ 'S_END' => $entry->[ end ] - 1,
+ 'S_LEN' => $entry->[ end ] - $entry->[ start ] + 1,
+ 'SCORE' => $entry->[ score ],
+ 'STRAND' => $entry->[ strand ],
+ 'PHASE' => $entry->[ phase ],
+ );
+
+ @atts = split /;/, $entry->[ attributes ];
+
+ foreach $att ( @atts )
+ {
+ ( $key, $val ) = split /=/, $att;
+
+ $record{ 'ATT_' . uc $key } = $val;
+ }
+
+ return wantarray ? %record : \%record;
}
-sub put_entries
+sub biopiece2gff
{
+ # Martin A. Hansen, October 2009.
+
+ # Converts a Biopiece record to a GFF entry (a list).
+
+ my ( $record, # Biopiece record
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @entry, $key, $tag, @atts );
+
+ $entry[ seqid ] = $record->{ 'S_ID' };
+ $entry[ source ] = $record->{ 'SOURCE' } || $record->{ 'REC_TYPE' } || '.';
+ $entry[ type ] = $record->{ 'TYPE' } || '.';
+ $entry[ start ] = $record->{ 'S_BEG' };
+ $entry[ end ] = $record->{ 'S_END' };
+ $entry[ score ] = $record->{ 'SCORE' };
+ $entry[ strand ] = $record->{ 'STRAND' };
+ $entry[ phase ] = $record->{ 'PHASE' } || '.';
+
+ if ( not exists $record->{ 'ATT_ID' } )
+ {
+ push @atts, "ID=$record->{ 'Q_ID' }" if exists $record->{ 'Q_ID' };
+ }
+
+ foreach $key ( %{ $record } )
+ {
+ if ( $key =~ /ATT_(.+)/ )
+ {
+ $tag = lc $1;
+
+ if ( exists $ATTRIBUTES{ $tag } ) {
+ $tag = $ATTRIBUTES{ $tag };
+ }
+
+ push @atts, "$tag=" . $record->{ $key };
+ }
+ }
+ $entry[ attributes ] = join ";", @atts;
+ return wantarray ? @entry : \@entry;
}