-#!/usr/bin/env perl
+#!/usr/bin/env perl -w
+
+# Copyright (C) 2007-2009 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Assemble tag contigs from overlapping BED type records in the stream.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
-use warnings;
use strict;
+use Maasha::Biopieces;
+use Maasha::Filesys;
+use Maasha::UCSC::BED;
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+my ( $run_time_beg, $run_time_end, $options, $in, $out, $tmp_dir, $bed_file, $tag_file, $fh_in, $fh_out,
+ $cols, $record, $bed_entry, $file_hash, $chr, $strand );
+
+$options = Maasha::Biopieces::parse_options(
+ [
+ { long => 'check', short => 'C', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
+ ]
+);
+
+$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
+$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
+
+$tmp_dir = Maasha::Biopieces::get_tmpdir();
+
+$bed_file = "$tmp_dir/assemble_tag_contigs.bed";
+$fh_out = Maasha::Filesys::file_write_open( $bed_file );
+$cols = 6; # we only need the first 6 BED columns
+
+while ( $record = Maasha::Biopieces::get_record( $in ) )
+{
+ if ( $bed_entry = Maasha::UCSC::BED::biopiece2bed( $record, $cols ) )
+ {
+ $strand = $record->{ 'STRAND' } || '+';
+
+ Maasha::UCSC::BED::bed_entry_put( $bed_entry, $fh_out, $cols, $options->{ 'check' } );
+ }
+}
+
+close $fh_out;
+
+$file_hash = Maasha::UCSC::BED::bed_file_split_on_chr( $bed_file, $tmp_dir, $cols );
+
+unlink $bed_file;
+
+foreach $chr ( sort keys %{ $file_hash } )
+{
+ $bed_file = $file_hash->{ $chr };
+ $tag_file = "$bed_file.tc";
+
+ Maasha::Common::run( "bed2tag_contigs", "< $bed_file > $tag_file" );
+
+ $fh_in = Maasha::Filesys::file_read_open( $tag_file );
+
+ while ( $bed_entry = Maasha::UCSC::BED::bed_entry_get( $fh_in, $cols, $options->{ 'check' } ) )
+ {
+ if ( $record = Maasha::UCSC::BED::bed2biopiece( $bed_entry ) ) {
+ Maasha::Biopieces::put_record( $record, $out );
+ }
+ }
+
+ close $fh_in;
+
+ unlink $bed_file;
+ unlink $tag_file;
+}
+
+Maasha::Filesys::dir_remove( $tmp_dir );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+BEGIN
+{
+ $run_time_beg = Maasha::Biopieces::run_time();
+
+ Maasha::Biopieces::log_biopiece();
+}
+
+END
+{
+ Maasha::Biopieces::close_stream( $in );
+ Maasha::Biopieces::close_stream( $out );
+
+ $run_time_end = Maasha::Biopieces::run_time();
+
+ Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options );
+}
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
-use Maasha::BioRun;
#include <errno.h>
#include <getopt.h>
+typedef unsigned int uint;
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef long long llong;
/* Function that prints "pong" to stderr. */
void maasha_ping();
-/* Return a binary number as a string of 1's and 0's. */
-char *bits2string( uint bin );
+// /* Return a binary number as a string of 1's and 0's. */
+// char *bits2string( uint bin );
/* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/
elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) }
elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) }
elsif ( $script eq "read_ucsc_config" ) { script_read_ucsc_config( $in, $out, $options ) }
- elsif ( $script eq "assemble_tag_contigs" ) { script_assemble_tag_contigs( $in, $out, $options ) }
elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) }
elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) }
elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) }
num|n=s
);
}
- elsif ( $script eq "assemble_tag_contigs" )
- {
- @options = qw(
- check|C
- );
- }
elsif ( $script eq "calc_fixedstep" )
{
@options = qw(
}
-sub script_assemble_tag_contigs
-{
- # Martin A. Hansen, November 2008.
-
- # Assemble tags from the stream into
- # tag contigs.
-
- # The current implementation is quite
- # slow because of heavy use of temporary
- # files.
-
- my ( $in, # handle to in stream
- $out, # handle to out stream
- $options, # options hash
- ) = @_;
-
- # Returns nothing.
-
- my ( $bed_file, $tag_file, $fh_in, $fh_out, $cols, $record, $bed_entry, $file_hash, $chr, $strand );
-
- $bed_file = "$BP_TMP/assemble_tag_contigs.bed";
- $fh_out = Maasha::Filesys::file_write_open( $bed_file );
- $cols = 6; # we only need the first 6 BED columns
-
- while ( $record = Maasha::Biopieces::get_record( $in ) )
- {
- if ( $bed_entry = Maasha::UCSC::BED::biopiece2bed( $record, $cols ) )
- {
- $strand = $record->{ 'STRAND' } || '+';
-
- Maasha::UCSC::BED::bed_entry_put( $bed_entry, $fh_out, $cols, $options->{ 'check' } );
- }
- }
-
- close $fh_out;
-
- $file_hash = Maasha::UCSC::BED::bed_file_split_on_chr( $bed_file, $BP_TMP, $cols );
-
- unlink $bed_file;
-
- foreach $chr ( sort keys %{ $file_hash } )
- {
- $bed_file = $file_hash->{ $chr };
- $tag_file = "$bed_file.tc";
-
- Maasha::Common::run( "bed2tag_contigs", "< $bed_file > $tag_file" );
-
- $fh_in = Maasha::Filesys::file_read_open( $tag_file );
-
- while ( $bed_entry = Maasha::UCSC::BED::bed_entry_get( $fh_in, $cols, $options->{ 'check' } ) )
- {
- if ( $record = Maasha::UCSC::BED::bed2biopiece( $bed_entry ) ) {
- Maasha::Biopieces::put_record( $record, $out );
- }
- }
-
- close $fh_in;
-
- unlink $bed_file;
- unlink $tag_file;
- }
-}
-
-
sub script_length_seq
{
# Martin A. Hansen, August 2007.
$line = <$fh>;
- $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
-
return if not defined $line;
+ $line =~ tr/\n\r//d; # some people have carriage returns in their BED files -> Grrrr
+
if ( not defined $cols ) {
$cols = 1 + $line =~ tr/\t//;
}
$bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" };
$bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" };
$bed_entry[ blockStarts ] = join ",", @begs;
- $bed_entry[ thickEnd ];
+ $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1;
}
elsif ( defined $bp_record->{ "BLOCK_COUNT" } and
defined $bp_record->{ "BLOCK_LENS" } and
$bed_entry[ blockCount ] = $bp_record->{ "BLOCK_COUNT" };
$bed_entry[ blockSizes ] = $bp_record->{ "BLOCK_LENS" };
$bed_entry[ blockStarts ] = $bp_record->{ "Q_BEGS" };
- $bed_entry[ thickEnd ];
+ $bed_entry[ thickEnd ] = $bp_record->{ "THICK_END" } + 1;
}
return wantarray ? @bed_entry : \@bed_entry;