elsif ( $script eq "read_solexa" ) { script_read_solexa( $in, $out, $options ) }
elsif ( $script eq "read_solid" ) { script_read_solid( $in, $out, $options ) }
elsif ( $script eq "read_mysql" ) { script_read_mysql( $in, $out, $options ) }
+ elsif ( $script eq "assemble_tag_contigs" ) { script_assemble_tag_contigs( $in, $out, $options ) }
elsif ( $script eq "format_genome" ) { script_format_genome( $in, $out, $options ) }
elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) }
elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) }
}
+sub script_assemble_tag_contigs
+{
+ # Martin A. Hansen, November 2008.
+
+ # Assemble tags from the stream into
+ # tag contigs.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $new_record, %fh_hash, $fh_out, $chr, $array, $pos, $beg, $end, $score, $file, $id );
+
+ while ( $record = get_record( $in ) )
+ {
+ $record->{ "CHR" } = $record->{ "S_ID" } if not defined $record->{ "CHR" };
+ $record->{ "CHR_BEG" } = $record->{ "S_BEG" } if not defined $record->{ "CHR_BEG" };
+ $record->{ "CHR_END" } = $record->{ "S_END" } if not defined $record->{ "CHR_END" };
+
+ if ( $record->{ "CHR" } and defined $record->{ "CHR_BEG" } and $record->{ "CHR_END" } )
+ {
+ $fh_hash{ $record->{ "CHR" } } = Maasha::Common::write_open( "$BP_TMP/$record->{ 'CHR' }" ) if not exists $fh_hash{ $record->{ "CHR" } };
+
+ $fh_out = $fh_hash{ $record->{ "CHR" } };
+
+ Maasha::UCSC::bed_put_entry( $record, $fh_out, 5 );
+ }
+ }
+
+ map { close $_ } keys %fh_hash;
+
+ foreach $chr ( sort keys %fh_hash )
+ {
+ $array = tag_contigs_assemble( "$BP_TMP/$chr" );
+
+ $pos = 0;
+ $id = 0;
+
+ while ( ( $beg, $end, $score ) = tag_contigs_scan( $array, $pos ) and $beg )
+ {
+ $new_record = {
+ CHR => $chr,
+ CHR_BEG => $beg,
+ CHR_END => $end - 1,
+ Q_ID => sprintf( "TC%06d", $id ),
+ SCORE => $score,
+ STRAND => $record->{ 'STRAND' } || '+',
+ };
+
+ put_record( $new_record, $out );
+
+ $pos = $end;
+ $id++;
+ }
+
+ unlink "$BP_TMP/$chr";
+ }
+}
+
+
+sub tag_contigs_assemble
+{
+ # Martin A. Hansen, November 2008.
+
+ # Given a BED file with entries from only one
+ # chromosome assembles tag contigs from these
+ # ignoring strand information. Only tags with
+ # a score higher than the clone count over
+ # genomic loci (the SCORE field) is included
+ # in the tag contigs.
+
+ # ----------- tags
+ # -------------
+ # ----------
+ # ----------
+ # ======================== tag contig
+
+
+ my ( $path, # full path to BED file
+ ) = @_;
+
+ # Returns an arrayref.
+
+ my ( $fh, $entry, $clones, $score, @array );
+
+ $fh = Maasha::Common::read_open( $path );
+
+ while ( $entry = Maasha::UCSC::bed_get_entry( $fh ) )
+ {
+ if ( $entry->{ 'Q_ID' } =~ /(\d+)$/ )
+ {
+ $clones = $1;
+
+ $score = int( $clones / $entry->{ 'SCORE' } );
+
+ map { $array[ $_ ] += $score } $entry->{ 'CHR_BEG' } .. $entry->{ 'CHR_END' } if $score >= 1;
+ }
+ }
+
+ close $fh;
+
+ return wantarray ? @array : \@array;
+}
+
+
+sub tag_contigs_scan
+{
+ # Martin A. Hansen, November 2008.
+
+ # Scans an array with tag contigs and locates
+ # the next contig from a given position. The
+ # score of the tag contig is determined as the
+ # maximum value of the tag contig. If a tag contig
+ # is found a triple is returned with beg, end and score
+ # otherwise an empty triple is returned.
+
+ my ( $array, # array to scan
+ $beg, # position to start scanning from
+ ) = @_;
+
+ # Returns an arrayref.
+
+ my ( $end, $score );
+
+ $score = 0;
+
+ while ( $beg < scalar @{ $array } and not $array->[ $beg ] ) { $beg++ }
+
+ $end = $beg;
+
+ while ( $array->[ $end ] )
+ {
+ $score = Maasha::Calc::max( $score, $array->[ $end ] );
+
+ $end++
+ }
+
+ if ( $score > 0 ) {
+ return wantarray ? ( $beg, $end, $score ) : [ $beg, $end, $score ];
+ } else {
+ return wantarray ? () : [];
+ }
+}
+
+
sub script_format_genome
{
# Martin A. Hansen, Juli 2008.