]> git.donarmstrong.com Git - biopieces.git/commitdiff
added python code
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 4 Dec 2008 10:18:04 +0000 (10:18 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 4 Dec 2008 10:18:04 +0000 (10:18 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@328 74ccb610-7750-0410-82ae-013aeee3265d

12 files changed:
bp_bin/lowercase_seq [new symlink]
bp_doc/logo.svg [new file with mode: 0644]
code_c/Maasha/src/bed2fixedstep.c
code_c/Maasha/src/lib/barray.c
code_perl/Maasha/Biopieces.pm
code_perl/Maasha/C_bitarray.pm
code_perl/Maasha/Match.pm
code_perl/Maasha/UCSC/BED.pm
code_perl/Maasha/UCSC/Wiggle.pm
code_python/Cjung/Args.py [new file with mode: 0644]
code_python/Cjung/Args.pyc [new file with mode: 0644]
code_python/Cjung/lowercase_seq [new file with mode: 0755]

diff --git a/bp_bin/lowercase_seq b/bp_bin/lowercase_seq
new file mode 120000 (symlink)
index 0000000..16158e9
--- /dev/null
@@ -0,0 +1 @@
+../code_python/Cjung/lowercase_seq
\ No newline at end of file
diff --git a/bp_doc/logo.svg b/bp_doc/logo.svg
new file mode 100644 (file)
index 0000000..1759293
--- /dev/null
@@ -0,0 +1,334 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<svg height="100%" style="font-weight: normal; font-family: Courier New; font-size: 10" width="100%" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+       <g transform="translate(0,30)">
+               <text style="fill: #000000; font-weight: bold" transform="scale(1,0.0114731156622045)" x="0" y="0">N
+               </text>
+       </g>
+       <g transform="translate(0,29.9196881903646)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0344193469866135)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(0,29.6787527614583)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0917849252976359)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(0,29.0362582843748)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.120467714453147)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(0,28.1929842832028)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.309774122879521)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(7,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0373099847194091)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(7,29.7388301069641)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0422846493486637)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(7,29.4428375615235)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0671579724949364)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(7,28.9727317540589)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.099493292585091)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(14,30)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0425960928203338)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(14,29.7018273502577)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.120688929657612)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(14,28.8570048426544)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.177483720084724)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(14,27.6146188020613)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.369166137776226)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(21,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0356885975228204)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(21,29.7501798173403)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.196287286375512)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(21,28.3761688127117)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.231975883898333)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(21,26.7523376254233)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.32047810834436)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(28,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0202276662029062)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(28,29.8584063365797)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.202276662029062)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(28,28.4424697023762)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.242731994434875)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(28,26.7433457413321)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.55753029762378)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(35,30)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0434014842969806)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(35,29.6961896099211)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.607620780157729)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(35,25.442844148817)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,1.51905195039432)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(42,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0381012700102883)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(42,29.733291109928)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0762025400205767)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(42,29.1998733297839)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,3.69582319099797)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(49,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0325336485601305)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(49,29.7722644600791)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.260269188481044)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(49,27.9503801407118)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.96056201897187)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(56,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0808766946944788)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(63,30)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.020365052341986)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(63,29.8574446336061)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.834967146021427)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(63,24.0126746114561)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,1.18117303583519)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(70,30)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0659739694574225)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(70,29.538182213798)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.131947938914845)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(70,28.6145466413941)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.219913231524742)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(70,27.0751540207209)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,1.78129717535041)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(77,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0352511895677424)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(77,29.7532416730258)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0352511895677424)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(77,29.5064833460516)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.105753568703227)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(77,28.766208365129)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,3.34886300893552)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(84,30)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.0505809647437153)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(84,29.645933246794)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.126452411859288)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(84,28.760766363779)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.202323858974861)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(84,27.3444993509549)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,2.1496910016079)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(91,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0264045526931584)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(91,29.8151681311479)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0792136580794752)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(91,29.2606725245916)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.290450079624743)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(91,27.2275219672184)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,2.24438697891846)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(98,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0329477465261945)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(98,29.7693657743166)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0658954930523891)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(98,29.3080973229499)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.131790986104778)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(98,28.3855604202165)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,3.06414042693609)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(105,30)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.185808306209435)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(105,28.699341856534)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.278712459314152)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(105,26.7483546413349)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,1.85808306209435)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(112,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0610147492445561)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(112,29.5728967552881)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0813529989927415)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(112,29.0034257623389)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.325411995970966)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(112,26.7255417905422)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.56604523061027)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(119,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0722133203867144)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(119,29.494506757293)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.173311968928114)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(119,28.2813229747962)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.187754633005457)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(119,26.967040543758)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,1.010986485414)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(126,30)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.018790868167855)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(126,29.868463922825)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.018790868167855)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(126,29.73692784565)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0276918057210494)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(126,29.5430852056027)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0336257640898458)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(133,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0494297814297627)" x="0" y="0">U
+               </text>
+       </g>
+       <g transform="translate(133,29.6539915299917)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.108745519145478)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(133,28.8927728959733)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.266920819720719)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(133,27.0243271579283)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.563499508299295)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(140,30)">
+               <text style="fill: #0000ff; font-weight: bold" transform="scale(1,0.0312560144689586)" x="0" y="0">C
+               </text>
+       </g>
+       <g transform="translate(140,29.7812078987173)">
+               <text style="fill: #cc0000; font-weight: bold" transform="scale(1,0.281304130220627)" x="0" y="0">A
+               </text>
+       </g>
+       <g transform="translate(140,27.8120789871729)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,2.75052927326835)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(147,30)">
+               <text style="fill: #ffc800; font-weight: bold" transform="scale(1,0.0414334593879054)" x="0" y="0">G
+               </text>
+       </g>
+       <g transform="translate(154,30)">
+               <text style="fill: #00ff00; font-weight: bold" transform="scale(1,0.0414334593879054)" x="0" y="0">U
+               </text>
+       </g>
+       <g style="stroke: black; font-size: 8px" transform="translate(-10)">
+               <line x1="0" x2="0" y1="0" y2="30" />
+               <line x1="-5" x2="0" y1="0" y2="0" />
+               <text style="stroke: none" x="-13" y="2">2
+               </text>
+               <line x1="-5" x2="0" y1="15" y2="15" />
+               <text style="stroke: none" x="-13" y="17">1
+               </text>
+               <line x1="-5" x2="0" y1="30" y2="30" />
+               <text style="stroke: none" x="-13" y="32">0
+               </text>
+       </g>
+       <text style="stroke: none" transform="rotate(-90)" x="-26" y="-30">bits
+       </text><!-- 
+       Generated using the Perl SVG Module V2.33
+       by Ronan Oger
+       Info: http://www.roasp.com/
+ -->
+</svg>
\ No newline at end of file
index a06043767191960e98b0149f5e4145f2364652f6..04636412527e7a68a15f013c0f5000525cbda085 100644 (file)
@@ -1,6 +1,5 @@
 #include "common.h"
 #include "mem.h"
-#include "filesys.h"
 #include "list.h"
 #include "ucsc.h"
 #include "hash.h"
@@ -10,6 +9,19 @@
 #define HASH_SIZE   8
 #define BARRAY_SIZE ( 1 << 16 )
 
+static void usage()
+{
+    fprintf( stderr,
+    "\n"
+    "bed2fixedstep - collapse overlapping BED entries using a score\n"
+    "based on the last number following a _ in the 'name' column.\n"
+    "if no such number is found 1 is used.\n"
+    "\n"
+    "Usage: bed2fixedstep < <BED file(s)> > <fixedstep file>\n\n"
+    );
+
+    exit( EXIT_FAILURE );
+}
 
 long get_score( char *str )
 {
@@ -32,8 +44,6 @@ long get_score( char *str )
 
 int main( int argc, char *argv[] )
 {
-    char      *file     = NULL;
-    FILE      *fp       = NULL;
     bed_entry *entry    = NULL;
     hash      *chr_hash = NULL;
     hash_elem *bucket   = NULL;
@@ -49,10 +59,11 @@ int main( int argc, char *argv[] )
     entry    = bed_entry_new( BED_COLS );
     chr_hash = hash_new( HASH_SIZE );
 
-    file = argv[ argc - 1 ];
-    fp   = read_open( file );
-
-    while ( ( bed_entry_get( fp, &entry ) ) )
+    if ( ! stdin ) {
+        usage();
+    }
+    
+    while ( ( bed_entry_get( stdin, &entry ) ) )
     {
 //        bed_entry_put( entry, entry->cols );
 
@@ -70,8 +81,6 @@ int main( int argc, char *argv[] )
         barray_interval_inc( ba, entry->chr_beg, entry->chr_end - 1, score );
     }
 
-    close_stream( fp );
-
 //    barray_print( ba );
 
     for ( i = 0; i < chr_hash->table_size; i++ )
@@ -87,7 +96,7 @@ int main( int argc, char *argv[] )
             {
 //                printf( "chr: %s   pos: %zu   beg: %zu   end: %zu\n", chr, pos, beg, end );
 
-                printf( "fixedStep chrom=%s start=%zu step=1\n", chr, beg );
+                printf( "fixedStep chrom=%s start=%zu step=1\n", chr, beg + 1 );
 
                 for ( j = beg; j <= end; j++ ) {
                     printf( "%hd\n", ba->array[ j ] );
index 68ee72e145895d2da9693d94314dd43215e1e5af..08fdc02a356754363ab69e5e65fc00b02a372826 100644 (file)
@@ -3,7 +3,8 @@
 #include "common.h"
 #include "mem.h"
 #include "barray.h"
-
+#define SIZE_BLOCK 10000000
 
 barray *barray_new( size_t nmemb )
 {
@@ -32,9 +33,11 @@ size_t barray_new_size( size_t nmemb_old )
     size_t nmemb_new = 1;
 
     while ( nmemb_new < nmemb_old ) {
-        nmemb_new <<= 1;
+        nmemb_new += SIZE_BLOCK;
     }
 
+//    fprintf( stderr, "New size: %zu\n", nmemb_new );
+
     return nmemb_new;
 }
 
index 53a0a24e33080e60b0908cdc368357eb0f87c2c4..5f6dd076395ab401a85fb3e62ce8e206812162df 100644 (file)
@@ -3153,6 +3153,10 @@ sub script_get_genome_align
         {
             $align = Maasha::UCSC::maf_extract( $BP_TMP, $options->{ "genome" }, $maf_track, $record->{ "CHR" }, $record->{ "CHR_BEG" }, $record->{ "CHR_END" }, $record->{ "STRAND" } );
         }
+        elsif ( $record->{ "REC_TYPE" } eq "VMATCH" )
+        {
+            $align = Maasha::UCSC::maf_extract( $BP_TMP, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" } + 1, $record->{ "STRAND" } );
+        }
         elsif ( $record->{ "REC_TYPE" } eq "PSL" )
         {
             $align = Maasha::UCSC::maf_extract( $BP_TMP, $options->{ "genome" }, $maf_track, $record->{ "S_ID" }, $record->{ "S_BEG" }, $record->{ "S_END" }, $record->{ "STRAND" } );
@@ -3675,7 +3679,7 @@ sub script_blast_seq
     $tmp_in  = "$BP_TMP/blast_query.seq";
     $tmp_out = "$BP_TMP/blast.result";
 
-    $fh_out = Maasha::Common::write_open( $tmp_in );
+    $fh_out = Maasha::Filesys::file_write_open( $tmp_in );
 
     while ( $record = get_record( $in ) ) 
     {
@@ -3748,7 +3752,7 @@ sub script_blast_seq
 
     unlink $tmp_in;
 
-    $fh_out = Maasha::Common::read_open( $tmp_out );
+    $fh_out = Maasha::Filesys::file_read_open( $tmp_out );
 
     undef $record;
 
index 523cbae1c62fde45298f6584149547b13567466a..50ffcabf202bd20456cec336d143d7b9c8285e1b 100644 (file)
@@ -57,10 +57,12 @@ void c_array_interval_fill( SV *array, int beg, int end, int score )
 
     size = len / sizeof( int );
 
+ printf( "C: len: %d    size: %d   sizeof( int ): %ld\n", len, size, sizeof(int) );
+
     assert( beg >= 0 );
     assert( end >= 0 );
     assert( beg <= size );
-    assert( end < size );
+    assert( end <= size );
     assert( score > 0 );
 
     for ( i = beg; i < end + 1; i++ ) {
@@ -168,8 +170,7 @@ sub c_array_init
     # Martin A. Hansen, November 2008.
 
     # Initializes a zeroed C integer array using
-    # Perls vec function to create a bit
-    # vector.
+    # Perls vec function to create a bit array.
 
     my ( $size,   # number of elements in array
          $bits,   # bit size
@@ -179,9 +180,12 @@ sub c_array_init
 
     my ( $vec );
 
-    $vec  = '';
+    $vec = '';
+
+    #vec( $vec, $size - 1, $bits ) = 0;
+    vec( $vec, 4 * $size - 1, $bits / 4 ) = 0;
 
-    vec( $vec, $size - 1, $bits ) = 0;
+    printf STDERR "P: size: %d   bits: %d   len: %d   size: %d\n", $size, $bits, length( $vec ), length( $vec ) / 4;
 
     return $vec;
 }
index 9ea1b4380a2dccd72c63a7c5e534c44cb8a503f1..240dd8de28eb654862e385fbf950df9166b882cb 100644 (file)
@@ -156,7 +156,7 @@ sub match_vmatch
 
     foreach $record ( @{ $records } ) 
     {
-        if ( $entry = Maasha::Biopieces::record2fasta( $record ) )
+        if ( $entry = Maasha::Fasta::biopiece2fasta( $record ) )
         {
             next if length $entry->[ SEQ ] < 12; # assuming that the index is created for 12 as minimum length
 
index 6a80b13ec4cee623bd12b6cd6e22e28bb19e6cfc..3dcf45f8217c45d6911733889bb201ed3345db83 100644 (file)
@@ -228,7 +228,8 @@ sub bed_entry_check
         Maasha::Common::error( qq(Bad BED entry - score must be a whole number - not "$bed->[ score ]") );
     }
 
-    if ( $bed->[ score ] < 0 or $bed->[ score ] > 1000 ) {
+    # if ( $bed->[ score ] < 0 or $bed->[ score ] > 1000 ) { # disabled - too restrictive !
+    if ( $bed->[ score ] < 0 ) {
         Maasha::Common::error( qq(Bad BED entry - score must be between 0 and 1000 - not "$bed->[ score ]") );
     }
 
index 40bdc40dec2142c872dc90228d421d7c05fc1927..8718b315a1bfcc95eeb6c960889f5f6b61f7c5f8 100644 (file)
@@ -50,7 +50,7 @@ require Exporter;
 
 use constant {
     BITS        => 32,            # Number of bits in an integer
-    SEQ_MAX     => 250_000_000,   # Maximum sequence size
+    SEQ_MAX     => 200_000_000,   # Maximum sequence size
     chrom       => 0,             # BED field names
     chromStart  => 1,
     chromEnd    => 2,
diff --git a/code_python/Cjung/Args.py b/code_python/Cjung/Args.py
new file mode 100644 (file)
index 0000000..52027cc
--- /dev/null
@@ -0,0 +1,157 @@
+"""
+Handling of arguments: options, arguments, file(s) content iterator
+
+For small scripts that:
+- read some command line options
+- read some command line positional arguments
+- iterate over all lines of some files given on the command line, or stdin if none given
+- give usage message if positional arguments are missing
+- give usage message if input files are missing and stdin is not redirected
+"""
+
+__author__ = 'Peter Kleiweg'
+__version__ = '0.2'
+__date__ = '2004/08/28'
+
+import os, sys, getopt
+
+class Args:
+       """
+       Perform common tasks on command line arguments
+       
+       Instance data:
+       progname (string) -- name of program
+       opt (dictionary) -- options with values
+       infile (string) -- name of current file being processed
+       lineno (int) -- line number of last line read in current file
+       linesum (int) -- total of lines read
+       """
+
+       def __init__(self, usage='Usage: %(progname)s [opt...] [file...]'):
+               "init, usage string: embed program name as %(progname)s"
+               self.progname = os.path.basename(sys.argv[0])
+               self.opt = {}
+               self.infile = None
+               self.lineno = 0
+               self.linesum = 0
+               self._argv = sys.argv[1:]
+               self._usage = usage
+
+       def __iter__(self):
+               "iterator: set-up"
+               if self._argv:
+                       self.infile = self._argv.pop(0)
+                       self._in = open(self.infile, 'r')
+                       self._stdin = False
+               else:
+                       if sys.stdin.isatty():
+                               print "### USAGE in __iter__"
+                               #self.usage()  # Doesn't return
+                               return None
+                       self.infile = '<stdin>'
+                       self._in = sys.stdin
+                       self._stdin = True
+               return self
+
+       def next(self):
+               "iterator: get next line, possibly from next file"
+               while True:
+                       line = self._in.readline()
+                       if line:
+                               self.lineno += 1
+                               self.linesum += 1
+                               return line
+
+                       if self._stdin:
+                               break
+
+                       self._in.close()
+                       try:
+                               self.infile = self._argv.pop(0)
+                       except IndexError:
+                               break
+                       self.lineno = 0
+                       self._in = open(self.infile, 'r')
+
+               self.lineno = -1
+               self.infile = None
+               raise StopIteration
+
+       def getopt(self, shortopts, longopts=[]):
+               "get options and merge into dict 'opt'"
+               try:
+                       options, self._argv = getopt.getopt(self._argv, shortopts, longopts)
+               except getopt.GetoptError:
+                       print "### USAGE in getopt"
+                       #self.usage()
+                       return None
+               self.opt.update(dict(options))
+
+       def shift(self):
+               "pop first of remaining arguments (shift)"
+               try:
+                       return self._argv.pop(0)
+               except IndexError:
+                       #print "### USAGE in shift"
+                       #self.usage()
+                       return None
+
+
+       def pop(self):
+               "pop last of remaining arguments"
+               try:
+                       return self._argv.pop()
+               except IndexError:
+                       print "### USAGE in pop"
+                       #self.usage()
+                       return None
+
+       def warning(self, text):
+               "print warning message to stderr, possibly with filename and lineno"
+               if self.lineno > 0:
+                       print >> sys.stderr, '%s:%i: warning: %s' % (self.infile, self.lineno, text)
+               else:
+                       print >> sys.stderr, '\nWarning %s: %s\n' % (self.progname, text)
+
+       def error(self, text):
+               "print error message to stderr, possibly with filename and lineno, and exit"
+               if self.lineno > 0:
+                       print >> sys.stderr, '%s:%i: %s' % (self.infile, self.lineno, text)
+               else:
+                       print >> sys.stderr, '\nError %s: %s\n' % (self.progname, text)
+               sys.exit(1)
+
+       def usage(self):
+               "print usage message, and exit"
+               print >> sys.stderr
+               print >> sys.stderr, self._usage % {'progname': self.progname}
+               print >> sys.stderr        
+               #sys.exit(1)
+
+
+if __name__ == '__main__':
+
+       a = Args('Usage: %(progname)s [-a value] [-b value] [-c] word [file...]')
+
+       a.opt['-a'] = 'option a'    # set some default option values
+       a.opt['-b'] = 'option b'    #
+       a.getopt('a:b:c')           # get user supplied option values
+
+       word = a.shift()            # get the first of the remaining arguments
+                                                               # use a.pop() to get the last instead
+
+       for line in a:              # iterate over the contents of all remaining arguments (file names)
+               if a.lineno == 1:
+                       print 'starting new file:', a.infile
+               a.warning(line.rstrip())
+
+       print 'Options:', a.opt
+       print 'Word:', word
+       print 'Total number of lines:', a.linesum
+
+       print 'Command line:', sys.argv     # unchanged
+
+       a.warning('warn 1')         # print a warning
+       a.error('error')            # print an error message and exit
+       a.warning('warn 2')         # this won't show
+
diff --git a/code_python/Cjung/Args.pyc b/code_python/Cjung/Args.pyc
new file mode 100644 (file)
index 0000000..8b12178
Binary files /dev/null and b/code_python/Cjung/Args.pyc differ
diff --git a/code_python/Cjung/lowercase_seq b/code_python/Cjung/lowercase_seq
new file mode 100755 (executable)
index 0000000..02d3514
--- /dev/null
@@ -0,0 +1,179 @@
+#!/usr/bin/python
+
+import os, string, sys, getopt, Args
+
+record_delimiter = "\n---\n"
+
+class Lowercase_seq:
+       in_stream = None
+       out_stream = None
+       eo_buffer = False
+       buffer = ''
+       rec_dic = {}
+       rec_num = 0
+
+       ###########################################
+       def __init__(self):
+               pass
+       ###########################################
+
+       ###########################################
+       def open_streams(self, input_file, output_file):
+               #print input_file, output_file
+               if input_file == '':
+                       self.in_stream = sys.stdin
+                       #print "in_stream = <STDIN>"
+               else:
+                       try:
+                               self.in_stream = open(input_file, 'r')
+                               #print "in_stream = %s" % (input_file)
+                       except:
+                               raise IOError
+
+               if output_file == '':
+                       self.out_stream = sys.stdout
+                       #print "out_stream = <STDOUT>"
+               else:
+                       try:
+                               self.out_stream = open(output_file, 'w')
+                               #print "out_stream = %s" % (output_file)
+                       except:
+                               raise IOError
+       ###########################################
+
+       ###########################################
+       def close_streams(self):
+               if self.in_stream:
+                       self.in_stream.close()
+               if self.out_stream:
+                       self.out_stream.close()
+       ###########################################
+
+       ###########################################
+       def get_record(self):
+               rec = ''
+               eof_flag = False
+               while not self.eo_buffer:
+                       if eof_flag:
+                               if self.buffer == '':
+                                       self.eo_buffer = True
+                                       break
+                       else:
+                               tmp = self.in_stream.read(1000)
+                       if not tmp:
+                               eof_flag = True
+
+                       self.buffer = self.buffer + tmp
+                       delim_index = self.buffer.find(record_delimiter)
+                       if delim_index >= 0:
+                               rec = self.buffer[:delim_index]
+                               self.buffer = self.buffer[delim_index + len(record_delimiter):]
+                               break
+               return rec
+       ###########################################
+
+       ###########################################
+       def process_record(self, rec):
+               #print "PARSE_RECORD"
+               #print rec
+               #print "==="
+               lines = rec.split("\n")
+               self.rec_num += 1
+               self.rec_dic[self.rec_num] = {}
+               for l in lines:
+                       toks = l.split(": ")
+                       if toks[0]=="SEQ":
+                               self.rec_dic[self.rec_num][toks[0]] = toks[1].lower()
+                       else:
+                               self.rec_dic[self.rec_num][toks[0]] = toks[1]
+                       #self.rec_dic[self.rec_num][toks[0]] = toks[1]
+               #print self.rec_dic[self.rec_num]
+               return self.rec_num
+       ###########################################
+
+       ###########################################
+       def put_record(self, r_num):
+               rec = self.rec_dic[r_num]
+               for k in rec.keys():
+                       #print "%s: %s" % (k, rec[k])
+                       self.out_stream.write("%s: %s\n" % (k, rec[k]))
+               #print "---"
+               self.out_stream.write("---\n")
+       ###########################################
+
+       ###########################################
+       def print_usage(self, opt):
+               bp_dir = os.environ['BP_DIR']
+               usage_path = bp_dir + os.path.sep + "bp_usage" + os.path.sep + "lowercase_seq.wiki"
+               os.system("print_usage -i %s %s" % (usage_path, opt))
+       ###########################################
+
+
+# main
+
+"""
+print "############"
+print len(sys.argv)
+print sys.argv
+print "############"
+"""
+
+
+lc_seq = Lowercase_seq()
+
+a = Args.Args('Usage: %(progname)s [-a value] [-b value] [-c] word [file...]')
+
+a.opt['-I'] = ''    # input file
+a.opt['-O'] = ''    # output file
+a.getopt('I:O:?:v')                    # get user supplied option values
+
+print >> sys.stderr, a.opt
+
+word = a.shift()                       # get the first of the remaining arguments
+                                                       # use a.pop() to get the last instead
+if not word == None:
+       sys.stderr.write("Unknown argument %s\n" % (word))
+       sys.exit(1)
+
+if sys.stdin.isatty():
+       lc_seq.print_usage('')
+       sys.exit(1)
+
+#for line in a:              # iterate over the contents of all remaining arguments (file names)
+#      if a.lineno == 1:
+#              print 'starting new file:', a.infile
+#      a.warning(line.rstrip())
+
+#print 'Options:', a.opt
+#print 'Word:', word
+#print 'Total number of lines:', a.linesum
+
+
+if a.opt.has_key('-?'):
+       lc_seq.print_usage('-?')
+       sys.exit(1)
+else:
+       try:
+               lc_seq.open_streams(a.opt['-I'], a.opt['-O'])
+       except:
+               sys.stderr.write("%s\n" % ("IOError"))
+               sys.exit(1)
+
+
+while True:
+       rec = lc_seq.get_record()
+       if rec=='':
+               break
+       rec_num = lc_seq.process_record(rec)
+       lc_seq.put_record(rec_num)
+
+lc_seq.close_streams()
+
+#source = "Dmel_tRNAs_key_record_tuples.txt"
+#lc_seq.open_stream(source)
+
+#lc_seq.get_record()
+#lc_seq.put_record()
+
+
+