use Maasha::NCBI;
use Maasha::Match;
use Maasha::UCSC;
+use Maasha::Jbrowse;
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$tmp_dir = Maasha::Biopieces::get_tmpdir();
$default = $ENV{ 'BP_DATA' };
-$formats = 'fasta,blast,vmatch,bowtie,bwa,phastcons';
+$formats = 'fasta,blast,vmatch,bowtie,bwa,phastcons,jbrowse';
$options = Maasha::Biopieces::parse_options(
[
Maasha::Filesys::dir_create_if_not_exists( "$dir/genomes" );
Maasha::Filesys::dir_create_if_not_exists( "$dir/genomes/$genome" );
-if ( grep { $_ =~ /fasta|blast|vmatch|bowtie|bwa/i } @{ $options->{ "formats" } } )
+if ( grep { $_ =~ /fasta|blast|vmatch|bowtie|bwa|jbrowse/i } @{ $options->{ "formats" } } )
{
if ( -f "$dir/genomes/$genome/fasta/$genome.fna" )
{
{
Maasha::Fasta::put_entry( $entry, $fh_out );
}
- elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
+ elsif ( $fh_out and $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } ) # TODO: clean this!
{
print $fh_out "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
if ( $format =~ /^fasta$/i ) { Maasha::Fasta::fasta_index( "$fasta_dir/$genome.fna", $fasta_dir, "$genome.index" ) }
elsif ( $format =~ /^blast$/i ) { Maasha::NCBI::blast_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/blast", "dna", $genome ) }
- elsif ( $format =~ /^blat$/i ) { warn "BLAT FORMAT NOT IMPLEMENTED" }
+ elsif ( $format =~ /^blat$/i ) { warn "BLAT FORMAT NOT IMPLEMENTED\n" }
elsif ( $format =~ /^vmatch$/i ) { Maasha::Match::vmatch_index( "$genome.fna", $fasta_dir, "$dir/genomes/$genome/vmatch", $tmp_dir ) }
elsif ( $format =~ /^bowtie$/i ) { Maasha::Bowtie::bowtie_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/bowtie", $genome, $options->{ 'verbose' } ) }
elsif ( $format =~ /^bwa$/i ) { Maasha::BWA::bwa_index( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/bwa", $genome, $options->{ 'verbose' } ) }
elsif ( $format =~ /^phastcons$/i ) { Maasha::UCSC::phastcons_index( "$genome.pp", $phastcons_dir ) }
+ elsif ( $format =~ /^jbrowse$/i ) { Maasha::Jbrowse::install_seq( "$fasta_dir/$genome.fna", "$dir/genomes/$genome/jbrowse", $genome ) }
print STDERR qq(done.\n) if $options->{ 'verbose' };
}
$num = 1;
- while ( $entry = Maasha::GFF::get_entry( $data_in ) )
+ while ( $entry = Maasha::GFF::gff_entry_get( $data_in ) )
{
- Maasha::Biopieces::put_record( $entry, $out );
+ if ( $record = Maasha::GFF::gff2biopiece( $entry ) ) {
+ Maasha::Biopieces::put_record( $record, $out );
+ }
last if $options->{ "num" } and $num == $options->{ "num" };
--- /dev/null
+#!/usr/bin/env perl
+
+# Copyright (C) 2007-2009 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Write features from the stream in GFF format.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+use warnings;
+use strict;
+use Maasha::GFF;
+use Maasha::Biopieces;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $options, $in, $out, $record, $data_out, $entry, $pragmas );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ { long => 'compress', short => 'Z', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } );
+
+$pragmas = [ '##gff-version 3' ];
+
+while ( $record = Maasha::Biopieces::get_record( $in ) )
+{
+ if ( $entry = Maasha::GFF::biopiece2gff( $record ) )
+ {
+ Maasha::GFF::gff_pragma_put( $pragmas, $data_out ) if $pragmas;
+
+ undef $pragmas;
+
+ Maasha::GFF::gff_entry_put( $entry, $data_out );
+ }
+
+ Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
+}
+
+close $data_out;
+
+Maasha::Biopieces::close_stream( $in );
+Maasha::Biopieces::close_stream( $out );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ Maasha::Biopieces::status_set();
+}
+
+
+END
+{
+ Maasha::Biopieces::status_log();
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Routines for manipulation 'Generic Feature Format' - GFF.
+# Routines for manipulation 'Generic Feature Format' - GFF version 3.
+# Read more here:
+# http://www.sequenceontology.org/resources/gff3.html
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
@ISA = qw( Exporter );
+use constant {
+ seqid => 0,
+ source => 1,
+ type => 2,
+ start => 3,
+ end => 4,
+ score => 5,
+ strand => 6,
+ phase => 7,
+ attributes => 8,
+};
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-sub get_entry
-{
- # Martin A. Hansen, February 2008.
+my %ATTRIBUTES = (
+ id => 'ID',
+ name => 'Name',
+ alias => 'Alias',
+ parent => 'Parent',
+ target => 'Target',
+ gap => 'Gap',
+ derives_from => 'Derives_from',
+ note => 'Note',
+ dbxref => 'Dbxref',
+ ontology_term => 'Ontology_term',
+);
- # Reads a single entry from a filehandle to a GFF file.
- my ( $fh, # file handle
+sub gff_entry_get
+{
+ # Martin A. Hansen, October 2009
+
+ my ( $fh, # file handle
) = @_;
- # Returns hashref.
+ # Returns a
- my ( $line, @fields, %entry, $q_beg, $q_end, @atts, $att, $key, $val );
+ my ( $line, @fields );
- while ( $line = <$fh> )
+ while ( $line = <$fh>)
{
chomp $line;
- @fields = split "\t", $line;
-
- if ( @fields == 9 )
- {
- $q_beg = $fields[ 3 ] - 1;
- $q_end = $fields[ 4 ] - 1;
-
- ( $q_beg, $q_end ) = ( $q_end, $q_beg ) if $q_beg > $q_end;
-
- %entry = (
- Q_ID => $fields[ 0 ],
- SOURCE => $fields[ 1 ],
- TYPE => $fields[ 2 ],
- Q_BEG => $q_beg,
- Q_END => $q_end,
- SCORE => $fields[ 5 ],
- STRAND => $fields[ 6 ],
- PHASE => $fields[ 7 ],
- ATT => $fields[ 8 ],
- );
-
- @atts = split ";", $fields[ 8 ];
-
- foreach $att ( @atts )
- {
- ( $key, $val ) = split "=", $att;
-
- $entry{ "ATT_" . uc $key } = $val;
- }
+ next if $line =~ /^$|^#/; # skip empty lines and lines starting with #
- return wantarray ? %entry : \%entry;
- }
+ @fields = split /\t/, $line;
+
+ return wantarray ? @fields : \@fields;
}
}
-sub get_entries
+sub gff_entry_put
{
- # Martin A. Hansen, February 2008.
-
- # Reads GFF file and returns a list of entries.
+ # Martin A. Hansen, October 2009
- my ( $path, # full path to GFF file.
+ my ( $entry, # GFF entry
+ $fh, # file handle
) = @_;
- # Returns a list.
+ $fh ||= \*STDOUT;
- my ( $fh, $entry, @entries );
+ print $fh join( "\t", @{ $entry } ), "\n";
+}
- $fh = Maasha::Common::read_open( $path );
- while ( $entry = get_entry( $fh ) ) {
- push @entries, $entry;
- }
+sub gff_pragma_put
+{
+ # Martin A. Hansen, October 2009
+
+ my ( $pragmas, # list of GFF pragma lines
+ $fh, # file handle
+ ) = @_;
+
+ my ( $pragma );
- close $fh;
+ $fh ||= \*STDOUT;
- return wantarray ? @entries : \@entries;
+ foreach $pragma ( @{ $pragmas } ) {
+ print $fh "$pragma\n";
+ }
}
-sub put_entry
+sub gff2biopiece
{
+ # Martin A. Hansen, October 2009
+ my ( $entry, # GFF entry
+ ) = @_;
+ # Returns a hashref.
+
+ my ( %record, @atts, $att, $key, $val );
+
+ %record = (
+ 'Q_ID' => $entry->[ seqid ],
+ 'SOURCE' => $entry->[ source ],
+ 'TYPE' => $entry->[ type ],
+ 'Q_BEG' => $entry->[ start ],
+ 'Q_END' => $entry->[ end ],
+ 'SCORE' => $entry->[ score ],
+ 'STRAND' => $entry->[ strand ],
+ 'PHASE' => $entry->[ phase ],
+ );
+
+ @atts = split /;/, $entry->[ attributes ];
+
+ foreach $att ( @atts )
+ {
+ ( $key, $val ) = split /=/, $att;
+
+ $record{ 'ATT_' . uc $key } = $val;
+ }
+
+ return wantarray ? %record : \%record;
}
-sub put_entries
+sub biopiece2gff
{
+ # Martin A. Hansen, October 2009.
+
+ # Converts a Biopiece record to a GFF entry (a list).
+
+ my ( $record, # Biopiece record
+ ) = @_;
+
+ # Returns a list.
+
+ my ( @entry, $key, $tag, @atts );
+
+ $entry[ seqid ] = $record->{ 'Q_ID' };
+ $entry[ source ] = $record->{ 'SOURCE' };
+ $entry[ type ] = $record->{ 'TYPE' };
+ $entry[ start ] = $record->{ 'Q_BEG' };
+ $entry[ end ] = $record->{ 'Q_END' };
+ $entry[ score ] = $record->{ 'SCORE' };
+ $entry[ strand ] = $record->{ 'STRAND' };
+ $entry[ phase ] = $record->{ 'PHASE' };
+
+ foreach $key ( %{ $record } )
+ {
+ if ( $key =~ /ATT_(.+)/ )
+ {
+ $tag = lc $1;
+
+ if ( exists $ATTRIBUTES{ $tag } ) {
+ $tag = $ATTRIBUTES{ $tag };
+ }
+
+ push @atts, "$tag=" . $record->{ $key };
+ }
+ }
+ $entry[ attributes ] = join ";", @atts;
+ return wantarray ? @entry : \@entry;
}