elsif ( $script eq "blat_seq" ) { script_blat_seq( $in, $out, $options ) }
elsif ( $script eq "match_seq" ) { script_match_seq( $in, $out, $options ) }
elsif ( $script eq "create_vmatch_index" ) { script_create_vmatch_index( $in, $out, $options ) }
+ elsif ( $script eq "create_fixedstep_index" ) { script_create_fixedstep_index( $in, $out, $options ) }
elsif ( $script eq "vmatch_seq" ) { script_vmatch_seq( $in, $out, $options ) }
elsif ( $script eq "write_fasta" ) { script_write_fasta( $in, $out, $options ) }
elsif ( $script eq "write_align" ) { script_write_align( $in, $out, $options ) }
no_stream|x
);
}
+ elsif ( $script eq "create_fixedstep_index" )
+ {
+ @options = qw(
+ index_name|i=s
+ no_stream|x
+ );
+ }
elsif ( $script eq "vmatch_seq" )
{
@options = qw(
# print STDERR Dumper( \%options );
- if ( scalar( keys %options ) == 1 or $options{ "help" } ) {
+ if ( -t STDIN && scalar( keys %options ) == 1 or $options{ "help" } ) {
return wantarray ? %options : \%options;
}
}
Maasha::Common::error( qq(no --database specified) ) if $script eq "create_blast_db" and not $options{ "database" };
- Maasha::Common::error( qq(no --index_name specified) ) if $script eq "create_vmatch_index" and not $options{ "index_name" };
+ Maasha::Common::error( qq(no --index_name specified) ) if $script =~ /create_vmatch_index|create_fixedstep_index/ and not $options{ "index_name" };
Maasha::Common::error( qq(no --database or --genome specified) ) if $script eq "blast_seq" and not $options{ "genome" } and not $options{ "database" };
Maasha::Common::error( qq(both --database and --genome specified) ) if $script eq "blast_seq" and $options{ "genome" } and $options{ "database" };
Maasha::Common::error( qq(no --index_name or --genome specified) ) if $script eq "vmatch_seq" and not $options{ "genome" } and not $options{ "index_name" };
}
+sub script_create_fixedstep_index
+{
+ # Martin A. Hansen, January 2008.
+
+ # Create a fixedStep index from records in the stream.
+
+ my ( $in, # handle to in stream
+ $out, # handle to out stream
+ $options, # options hash
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( $record, $file_tmp, $fh_tmp, $vals, $index );
+
+ $file_tmp = "$BP_TMP/fixedstep.tmp";
+
+ $fh_tmp = Maasha::Common::write_open( $file_tmp );
+
+ while ( $record = get_record( $in ) )
+ {
+ if ( $record->{ "CHR" } and $record->{ "CHR_BEG" } and $record->{ "STEP" } and $record->{ "VALS" } )
+ {
+ print $fh_tmp "fixedStep chrom=$record->{ 'CHR' } start=$record->{ 'CHR_BEG' } step=$record->{ 'STEP' }\n";
+
+ $vals = $record->{ 'VALS' };
+
+ $vals =~ tr/,/\n/;
+
+ print $fh_tmp "$vals\n";
+ }
+ }
+
+ close $fh_tmp;
+
+ $index = Maasha::UCSC::fixedstep_index_create( $file_tmp );
+
+ unlink $file_tmp;
+
+ Maasha::UCSC::fixedstep_index_store( $options->{ 'index_name' }, $index );
+}
+
+
sub script_vmatch_seq
{
# Martin A. Hansen, August 2007.
}
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-sub phastcons_parse_entry
-{
- # Martin A. Hansen, December 2007.
-
- # Given a PhastCons entry converts this to a
- # list of super blocks.
-
- my ( $lines, # list of lines
- $args, # argument hash
- ) = @_;
-
- # Returns
-
- my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
-
- $info = shift @{ $lines };
-
- if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
- {
- $chr = $1;
- $beg = $2;
- $step = $3;
-
- die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
- }
-
- $i = 0;
-
- while ( $i < @{ $lines } )
- {
- if ( $lines->[ $i ] >= $args->{ "threshold" } )
- {
- $c = $i + 1;
-
- while ( $c < @{ $lines } )
- {
- if ( $lines->[ $c ] < $args->{ "threshold" } )
- {
- $j = $c + 1;
-
- while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
- $j++;
- }
-
- if ( $j - $c > $args->{ "gap" } )
- {
- if ( $c - $i >= $args->{ "min" } )
- {
- push @blocks, {
- CHR => $chr,
- CHR_BEG => $beg + $i - 1,
- CHR_END => $beg + $c - 2,
- CHR_LEN => $c - $i,
- };
- }
-
- $i = $j;
-
- last;
- }
-
- $c = $j
- }
- else
- {
- $c++;
- }
- }
-
- if ( $c - $i >= $args->{ "min" } )
- {
- push @blocks, {
- CHR => $chr,
- CHR_BEG => $beg + $i - 1,
- CHR_END => $beg + $c - 2,
- CHR_LEN => $c - $i,
- };
- }
-
- $i = $c;
- }
- else
- {
- $i++;
- }
- }
-
- $i = 0;
-
- while ( $i < @blocks )
- {
- $c = $i + 1;
-
- while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
- {
- $c++;
- }
-
- push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
-
- $i = $c;
- }
-
- foreach $super_block ( @super_blocks )
- {
- foreach $block ( @{ $super_block } )
- {
- push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
- push @lens, $block->{ "CHR_LEN" } - 1;
- }
-
- $lens[ -1 ]++;
-
- push @entries, {
- CHR => $super_block->[ 0 ]->{ "CHR" },
- CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
- Q_ID => "Q_ID",
- SCORE => 100,
- STRAND => "+",
- THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
- THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
- ITEMRGB => "0,200,100",
- BLOCKCOUNT => scalar @{ $super_block },
- BLOCKSIZES => join( ",", @lens ),
- Q_BEGS => join( ",", @begs ),
- };
-
- undef @begs;
- undef @lens;
- }
-
- return wantarray ? @entries : \@entries;
-}
-
-
-sub phastcons_index_create
+sub fixedstep_index_create
{
# Martin A. Hansen, January 2008.
- # Indexes a concatenated PhastCons file.
+ # Indexes a concatenated fixedStep file.
# The index consists of a hash with chromosomes as keys,
# and a list of [ chr_beg, next_chr_beg, chr_end, index_beg, index_len ] as values.
- my ( $path, # path to PhastCons file
+ my ( $path, # path to fixedStep file
) = @_;
# Returns a hashref
if ( $locator =~ /chrom=([^ ]+) start=(\d+) step=(\d+)/ )
{
$chr = $1;
- $beg = $2 - 1; # phastcons files are 1-based
+ $beg = $2 - 1; # fixedStep files are 1-based
$step = $3;
}
else
{
- Maasha::Common::error( qq(Could not parse PhastCons locator: $locator) );
+ Maasha::Common::error( qq(Could not parse locator: $locator) );
}
$pos += length( $locator ) + 11;
}
-sub phastcons_index_store
+sub fixedstep_index_store
{
# Martin A. Hansen, January 2008.
- # Writes a PhastCons index to binary file.
+ # Writes a fixedStep index to binary file.
my ( $path, # full path to file
$index, # list with index
}
-sub phastcons_index_retrieve
+sub fixedstep_index_retrieve
{
# Martin A. Hansen, January 2008.
- # Retrieves a PhastCons index from binary file.
+ # Retrieves a fixedStep index from binary file.
my ( $path, # full path to file
) = @_;
}
-sub phastcons_index_lookup
+sub fixedStep_index_lookup
{
# Martin A. Hansen, January 2008.
- # Retrieve PhastCons scores from a indexed
- # Phastcons file given a chromosome and
+ # Retrieve fixedStep scores from a indexed
+ # fixedStep file given a chromosome and
# begin and end positions.
my ( $index, # data structure
}
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PhastCons format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub phastcons_parse_entry
+{
+ # Martin A. Hansen, December 2007.
+
+ # Given a PhastCons entry converts this to a
+ # list of super blocks.
+
+ my ( $lines, # list of lines
+ $args, # argument hash
+ ) = @_;
+
+ # Returns
+
+ my ( $info, $chr, $beg, $step, $i, $c, $j, @blocks, @super_blocks, @entries, $super_block, $block, @lens, @begs );
+
+ $info = shift @{ $lines };
+
+ if ( $info =~ /^chrom=([^ ]+) start=(\d+) step=(\d+)$/ )
+ {
+ $chr = $1;
+ $beg = $2;
+ $step = $3;
+
+ die qq(ERROR: step size $step != 1 -> problem!\n) if $step != 1; # in an ideal world should would be fixed ...
+ }
+
+ $i = 0;
+
+ while ( $i < @{ $lines } )
+ {
+ if ( $lines->[ $i ] >= $args->{ "threshold" } )
+ {
+ $c = $i + 1;
+
+ while ( $c < @{ $lines } )
+ {
+ if ( $lines->[ $c ] < $args->{ "threshold" } )
+ {
+ $j = $c + 1;
+
+ while ( $j < @{ $lines } and $lines->[ $j ] < $args->{ "threshold" } ) {
+ $j++;
+ }
+
+ if ( $j - $c > $args->{ "gap" } )
+ {
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $j;
+
+ last;
+ }
+
+ $c = $j
+ }
+ else
+ {
+ $c++;
+ }
+ }
+
+ if ( $c - $i >= $args->{ "min" } )
+ {
+ push @blocks, {
+ CHR => $chr,
+ CHR_BEG => $beg + $i - 1,
+ CHR_END => $beg + $c - 2,
+ CHR_LEN => $c - $i,
+ };
+ }
+
+ $i = $c;
+ }
+ else
+ {
+ $i++;
+ }
+ }
+
+ $i = 0;
+
+ while ( $i < @blocks )
+ {
+ $c = $i + 1;
+
+ while ( $c < @blocks and $blocks[ $c ]->{ "CHR_BEG" } - $blocks[ $c - 1 ]->{ "CHR_END" } <= $args->{ "dist" } )
+ {
+ $c++;
+ }
+
+ push @super_blocks, [ @blocks[ $i .. $c - 1 ] ];
+
+ $i = $c;
+ }
+
+ foreach $super_block ( @super_blocks )
+ {
+ foreach $block ( @{ $super_block } )
+ {
+ push @begs, $block->{ "CHR_BEG" } - $super_block->[ 0 ]->{ "CHR_BEG" };
+ push @lens, $block->{ "CHR_LEN" } - 1;
+ }
+
+ $lens[ -1 ]++;
+
+ push @entries, {
+ CHR => $super_block->[ 0 ]->{ "CHR" },
+ CHR_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ CHR_END => $super_block->[ -1 ]->{ "CHR_END" },
+ Q_ID => "Q_ID",
+ SCORE => 100,
+ STRAND => "+",
+ THICK_BEG => $super_block->[ 0 ]->{ "CHR_BEG" },
+ THICK_END => $super_block->[ -1 ]->{ "CHR_END" } + 1,
+ ITEMRGB => "0,200,100",
+ BLOCKCOUNT => scalar @{ $super_block },
+ BLOCKSIZES => join( ",", @lens ),
+ Q_BEGS => join( ",", @begs ),
+ };
+
+ undef @begs;
+ undef @lens;
+ }
+
+ return wantarray ? @entries : \@entries;
+}
+
+
sub phastcons_normalize
{
# Martin A. Hansen, January 2008.