]> git.donarmstrong.com Git - biopieces.git/commitdiff
worked on match code for ruby Seq class
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sat, 19 Mar 2011 20:38:52 +0000 (20:38 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Sat, 19 Mar 2011 20:38:52 +0000 (20:38 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1301 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/doc/created.rid
code_ruby/Maasha/lib/doc/index.html
code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

index 7d21b41e72809aeb994d170f7aae16be20edeabe..36dfd1158ea1879f5568d09a0ecc9dbf4d0e7188 100644 (file)
@@ -1,4 +1,13 @@
-Mon, 27 Sep 2010 12:56:47 +0200
-biopieces.rb   Mon, 13 Sep 2010 09:27:58 +0200
-fasta.rb       Mon, 06 Sep 2010 16:36:25 +0200
-seq.rb Mon, 20 Sep 2010 10:38:53 +0200
+Sat, 19 Mar 2011 16:29:12 +0100
+./base36.rb    Fri, 11 Feb 2011 19:42:32 +0100
+./biopieces.rb Sat, 19 Mar 2011 09:14:44 +0100
+./bitarray.rb  Thu, 17 Mar 2011 15:02:03 +0100
+./bits.rb      Tue, 08 Mar 2011 11:37:57 +0100
+./boulder.rb   Sat, 19 Mar 2011 09:17:13 +0100
+./digest.rb    Sat, 19 Mar 2011 15:41:06 +0100
+./fasta.rb     Sat, 19 Mar 2011 09:11:06 +0100
+./fastq.rb     Sat, 19 Mar 2011 09:09:52 +0100
+./filesys.rb   Fri, 18 Mar 2011 22:13:22 +0100
+./genbank.rb   Sat, 19 Mar 2011 09:19:25 +0100
+./seq.rb       Sat, 19 Mar 2011 16:29:08 +0100
+./sff.rb       Fri, 11 Feb 2011 19:55:38 +0100
index 7622b70052f12d8c63d0a9c4f7192e645957c916..4979e8a29c202ef4760371f57f32c965dfe9f32d 100644 (file)
        <h2 id="classes">Classes/Modules</h2>
        <ul>
                
+               <li class="class"><a href="Base36.html">Base36</a></li>
+               
+               <li class="class"><a href="Base36Error.html">Base36Error</a></li>
+               
                <li class="class"><a href="Biopieces.html">Biopieces</a></li>
                
+               <li class="class"><a href="BitArray.html">BitArray</a></li>
+               
+               <li class="class"><a href="BitArrayError.html">BitArrayError</a></li>
+               
+               <li class="class"><a href="Boulder.html">Boulder</a></li>
+               
+               <li class="class"><a href="BoulderError.html">BoulderError</a></li>
+               
                <li class="class"><a href="CastError.html">CastError</a></li>
                
                <li class="class"><a href="Casts.html">Casts</a></li>
                
                <li class="class"><a href="FastaError.html">FastaError</a></li>
                
+               <li class="class"><a href="Fastq.html">Fastq</a></li>
+               
+               <li class="class"><a href="FastqError.html">FastqError</a></li>
+               
+               <li class="class"><a href="Filesys.html">Filesys</a></li>
+               
+               <li class="class"><a href="FilesysError.html">FilesysError</a></li>
+               
+               <li class="class"><a href="Genbank.html">Genbank</a></li>
+               
+               <li class="class"><a href="GenbankError.html">GenbankError</a></li>
+               
+               <li class="class"><a href="GenbankFeatures.html">GenbankFeatures</a></li>
+               
+               <li class="class"><a href="Locator.html">Locator</a></li>
+               
+               <li class="class"><a href="LocatorError.html">LocatorError</a></li>
+               
                <li class="class"><a href="OptionHandler.html">OptionHandler</a></li>
                
+               <li class="class"><a href="Read.html">Read</a></li>
+               
+               <li class="class"><a href="SFF.html">SFF</a></li>
+               
+               <li class="class"><a href="SFFError.html">SFFError</a></li>
+               
                <li class="class"><a href="Seq.html">Seq</a></li>
                
                <li class="class"><a href="SeqError.html">SeqError</a></li>
                
                <li class="class"><a href="Stream.html">Stream</a></li>
                
+               <li class="class"><a href="String.html">String</a></li>
+               
+               <li class="class"><a href="StringError.html">StringError</a></li>
+               
        </ul>
 
        <h2 id="methods">Methods</h2>
        <ul>
                
-                       <li><a href="Biopieces.html#method-c-new">::new &mdash; Biopieces</a></li>
+                       <li><a href="Base36.html#method-c-decode">::decode &mdash; Base36</a></li>
                
-                       <li><a href="Digest.html#method-c-new">::new &mdash; Digest</a></li>
+                       <li><a href="Base36.html#method-c-encode">::encode &mdash; Base36</a></li>
+               
+                       <li><a href="Seq.html#method-c-generate_oligos">::generate_oligos &mdash; Seq</a></li>
+               
+                       <li><a href="String.html#method-c-hamming_dist">::hamming_dist &mdash; String</a></li>
+               
+                       <li><a href="GenbankFeatures.html#method-c-new">::new &mdash; GenbankFeatures</a></li>
+               
+                       <li><a href="Locator.html#method-c-new">::new &mdash; Locator</a></li>
                
                        <li><a href="OptionHandler.html#method-c-new">::new &mdash; OptionHandler</a></li>
                
-                       <li><a href="Seq.html#method-c-new">::new &mdash; Seq</a></li>
+                       <li><a href="BitArray.html#method-c-new">::new &mdash; BitArray</a></li>
+               
+                       <li><a href="SFF.html#method-c-new">::new &mdash; SFF</a></li>
                
                        <li><a href="Casts.html#method-c-new">::new &mdash; Casts</a></li>
                
-                       <li><a href="Fasta.html#method-c-new">::new &mdash; Fasta</a></li>
+                       <li><a href="Filesys.html#method-c-new">::new &mdash; Filesys</a></li>
+               
+                       <li><a href="Biopieces.html#method-c-new">::new &mdash; Biopieces</a></li>
+               
+                       <li><a href="Boulder.html#method-c-new">::new &mdash; Boulder</a></li>
+               
+                       <li><a href="Digest.html#method-c-new">::new &mdash; Digest</a></li>
+               
+                       <li><a href="Genbank.html#method-c-new">::new &mdash; Genbank</a></li>
+               
+                       <li><a href="Seq.html#method-c-new">::new &mdash; Seq</a></li>
                
                        <li><a href="Stream.html#method-c-nread">::nread &mdash; Stream</a></li>
                
                        <li><a href="Stream.html#method-c-nwrite">::nwrite &mdash; Stream</a></li>
                
-                       <li><a href="Fasta.html#method-c-open">::open &mdash; Fasta</a></li>
+                       <li><a href="Filesys.html#method-c-open">::open &mdash; Filesys</a></li>
                
                        <li><a href="Stream.html#method-c-open">::open &mdash; Stream</a></li>
                
+                       <li><a href="SFF.html#method-c-open">::open &mdash; SFF</a></li>
+               
                        <li><a href="Stream.html#method-c-read">::read &mdash; Stream</a></li>
                
                        <li><a href="Stream.html#method-c-write">::write &mdash; Stream</a></li>
                
                        <li><a href="Stream.html#method-c-zipped%3F">::zipped? &mdash; Stream</a></li>
                
-                       <li><a href="Fasta.html#method-c-zopen">::zopen &mdash; Fasta</a></li>
+                       <li><a href="Filesys.html#method-c-zopen">::zopen &mdash; Filesys</a></li>
                
                        <li><a href="Stream.html#method-c-zread">::zread &mdash; Stream</a></li>
                
                        <li><a href="Stream.html#method-c-zwrite">::zwrite &mdash; Stream</a></li>
                
+                       <li><a href="BitArray.html#method-i-%26">#& &mdash; BitArray</a></li>
+               
+                       <li><a href="String.html#method-i-%26">#& &mdash; String</a></li>
+               
+                       <li><a href="String.html#method-i-%5E">#^ &mdash; String</a></li>
+               
+                       <li><a href="BitArray.html#method-i-%5E">#^ &mdash; BitArray</a></li>
+               
+                       <li><a href="Seq.html#method-i-adaptor_clip_left">#adaptor_clip_left &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-adaptor_clip_right">#adaptor_clip_right &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-adaptor_locate_left">#adaptor_locate_left &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-adaptor_locate_right">#adaptor_locate_right &mdash; Seq</a></li>
+               
+                       <li><a href="Locator.html#method-i-balance_parens%3F">#balance_parens? &mdash; Locator</a></li>
+               
+                       <li><a href="BitArray.html#method-i-bit_pos">#bit_pos &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-bit_set">#bit_set &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-bit_set%3F">#bit_set? &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-bits_in_char">#bits_in_char &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-bits_off">#bits_off &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-bits_on">#bits_on &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-byte_pos">#byte_pos &mdash; BitArray</a></li>
+               
                        <li><a href="Casts.html#method-i-check">#check &mdash; Casts</a></li>
                
                        <li><a href="Casts.html#method-i-check_duplicates">#check_duplicates &mdash; Casts</a></li>
                
+                       <li><a href="SFF.html#method-i-check_header_length">#check_header_length &mdash; SFF</a></li>
+               
                        <li><a href="Casts.html#method-i-check_keys">#check_keys &mdash; Casts</a></li>
                
+                       <li><a href="SFF.html#method-i-check_magic_number">#check_magic_number &mdash; SFF</a></li>
+               
                        <li><a href="Casts.html#method-i-check_val_allowed">#check_val_allowed &mdash; Casts</a></li>
                
                        <li><a href="Casts.html#method-i-check_val_default">#check_val_default &mdash; Casts</a></li>
                
                        <li><a href="Casts.html#method-i-check_values">#check_values &mdash; Casts</a></li>
                
-                       <li><a href="Fasta.html#method-i-close">#close &mdash; Fasta</a></li>
+                       <li><a href="SFF.html#method-i-check_version">#check_version &mdash; SFF</a></li>
+               
+                       <li><a href="Read.html#method-i-clip">#clip &mdash; Read</a></li>
+               
+                       <li><a href="SFF.html#method-i-close">#close &mdash; SFF</a></li>
+               
+                       <li><a href="Filesys.html#method-i-close">#close &mdash; Filesys</a></li>
                
                        <li><a href="Seq.html#method-i-complement">#complement &mdash; Seq</a></li>
                
+                       <li><a href="Seq.html#method-i-composition">#composition &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-convert_phred2illumina%21">#convert_phred2illumina! &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-convert_solexa2illumina%21">#convert_solexa2illumina! &mdash; Seq</a></li>
+               
+                       <li><a href="Read.html#method-i-coordinates_get">#coordinates_get &mdash; Read</a></li>
+               
                        <li><a href="Status.html#method-i-delete">#delete &mdash; Status</a></li>
                
                        <li><a href="Digest.html#method-i-disambiguate">#disambiguate &mdash; Digest</a></li>
                
                        <li><a href="Biopieces.html#method-i-each">#each &mdash; Biopieces</a></li>
                
-                       <li><a href="Fasta.html#method-i-each">#each &mdash; Fasta</a></li>
+                       <li><a href="Boulder.html#method-i-each">#each &mdash; Boulder</a></li>
+               
+                       <li><a href="Filesys.html#method-i-each">#each &mdash; Filesys</a></li>
+               
+                       <li><a href="GenbankFeatures.html#method-i-each">#each &mdash; GenbankFeatures</a></li>
+               
+                       <li><a href="Genbank.html#method-i-each">#each &mdash; Genbank</a></li>
                
                        <li><a href="Digest.html#method-i-each">#each &mdash; Digest</a></li>
                
+                       <li><a href="SFF.html#method-i-each">#each &mdash; SFF</a></li>
+               
                        <li><a href="Biopieces.html#method-i-each_record">#each_record &mdash; Biopieces</a></li>
                
+                       <li><a href="SFF.html#method-i-fast_forward">#fast_forward &mdash; SFF</a></li>
+               
                        <li><a href="Seq.html#method-i-generate">#generate &mdash; Seq</a></li>
                
                        <li><a href="Fasta.html#method-i-get_entry">#get_entry &mdash; Fasta</a></li>
                
+                       <li><a href="Fastq.html#method-i-get_entry">#get_entry &mdash; Fastq</a></li>
+               
+                       <li><a href="Genbank.html#method-i-get_entry">#get_entry &mdash; Genbank</a></li>
+               
+                       <li><a href="Genbank.html#method-i-get_keys">#get_keys &mdash; Genbank</a></li>
+               
+                       <li><a href="GenbankFeatures.html#method-i-get_quals">#get_quals &mdash; GenbankFeatures</a></li>
+               
+                       <li><a href="Genbank.html#method-i-get_seq">#get_seq &mdash; Genbank</a></li>
+               
                        <li><a href="Status.html#method-i-get_tmpdir">#get_tmpdir &mdash; Status</a></li>
                
+                       <li><a href="Seq.html#method-i-hard_mask">#hard_mask &mdash; Seq</a></li>
+               
+                       <li><a href="SFF.html#method-i-header_parse">#header_parse &mdash; SFF</a></li>
+               
+                       <li><a href="Seq.html#method-i-homopol_max">#homopol_max &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-indels">#indels &mdash; Seq</a></li>
+               
+                       <li><a href="BitArray.html#method-i-init_byte_array">#init_byte_array &mdash; BitArray</a></li>
+               
+                       <li><a href="BitArray.html#method-i-init_count_array">#init_count_array &mdash; BitArray</a></li>
+               
                        <li><a href="Seq.html#method-i-is_dna%3F">#is_dna? &mdash; Seq</a></li>
                
                        <li><a href="Seq.html#method-i-is_protein%3F">#is_protein? &mdash; Seq</a></li>
                
                        <li><a href="Casts.html#method-i-long_to_sym">#long_to_sym &mdash; Casts</a></li>
                
+                       <li><a href="Read.html#method-i-mask">#mask &mdash; Read</a></li>
+               
+                       <li><a href="Seq.html#method-i-match">#match &mdash; Seq</a></li>
+               
                        <li><a href="Biopieces.html#method-i-mktmpdir">#mktmpdir &mdash; Biopieces</a></li>
                
                        <li><a href="OptionHandler.html#method-i-options_check">#options_check &mdash; OptionHandler</a></li>
                
                        <li><a href="Biopieces.html#method-i-parse">#parse &mdash; Biopieces</a></li>
                
+                       <li><a href="Locator.html#method-i-parse_locator">#parse_locator &mdash; Locator</a></li>
+               
                        <li><a href="Status.html#method-i-path">#path &mdash; Status</a></li>
                
                        <li><a href="OptionHandler.html#method-i-print_usage_and_exit">#print_usage_and_exit &mdash; OptionHandler</a></li>
                
                        <li><a href="OptionHandler.html#method-i-print_usage_short%3F">#print_usage_short? &mdash; OptionHandler</a></li>
                
+                       <li><a href="Fasta.html#method-i-puts">#puts &mdash; Fasta</a></li>
+               
+                       <li><a href="Fastq.html#method-i-puts">#puts &mdash; Fastq</a></li>
+               
                        <li><a href="Biopieces.html#method-i-puts">#puts &mdash; Biopieces</a></li>
                
-                       <li><a href="Fasta.html#method-i-puts">#puts &mdash; Fasta</a></li>
+                       <li><a href="SFF.html#method-i-read_parse">#read_parse &mdash; SFF</a></li>
                
                        <li><a href="Seq.html#method-i-revcomp">#revcomp &mdash; Seq</a></li>
                
                
                        <li><a href="Seq.html#method-i-reverse_complement">#reverse_complement &mdash; Seq</a></li>
                
+                       <li><a href="Locator.html#method-i-s_beg">#s_beg &mdash; Locator</a></li>
+               
+                       <li><a href="Locator.html#method-i-s_end">#s_end &mdash; Locator</a></li>
+               
                        <li><a href="Status.html#method-i-set">#set &mdash; Status</a></li>
                
                        <li><a href="Status.html#method-i-set_tmpdir">#set_tmpdir &mdash; Status</a></li>
                
+                       <li><a href="Seq.html#method-i-soft_mask">#soft_mask &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-solexa2phred">#solexa2phred &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-solexa_char2illumina_char">#solexa_char2illumina_char &mdash; Seq</a></li>
+               
+                       <li><a href="Locator.html#method-i-strand">#strand &mdash; Locator</a></li>
+               
+                       <li><a href="Seq.html#method-i-subseq">#subseq &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-subseq%21">#subseq! &mdash; Seq</a></li>
+               
+                       <li><a href="Seq.html#method-i-subseq_rand">#subseq_rand &mdash; Seq</a></li>
+               
                        <li><a href="Status.html#method-i-time_diff">#time_diff &mdash; Status</a></li>
                
+                       <li><a href="Boulder.html#method-i-to_boulder">#to_boulder &mdash; Boulder</a></li>
+               
                        <li><a href="Seq.html#method-i-to_bp">#to_bp &mdash; Seq</a></li>
                
+                       <li><a href="Read.html#method-i-to_bp">#to_bp &mdash; Read</a></li>
+               
                        <li><a href="Seq.html#method-i-to_dna">#to_dna &mdash; Seq</a></li>
                
                        <li><a href="Seq.html#method-i-to_fasta">#to_fasta &mdash; Seq</a></li>
                
+                       <li><a href="Seq.html#method-i-to_key">#to_key &mdash; Seq</a></li>
+               
                        <li><a href="Seq.html#method-i-to_rna">#to_rna &mdash; Seq</a></li>
                
+                       <li><a href="BitArray.html#method-i-to_s">#to_s &mdash; BitArray</a></li>
+               
+                       <li><a href="Biopieces.html#method-i-to_s">#to_s &mdash; Biopieces</a></li>
+               
                        <li><a href="Casts.html#method-i-ubiquitous">#ubiquitous &mdash; Casts</a></li>
                
+                       <li><a href="GenbankFeatures.html#method-i-want_feat%3F">#want_feat? &mdash; GenbankFeatures</a></li>
+               
+                       <li><a href="Genbank.html#method-i-want_key%3F">#want_key? &mdash; Genbank</a></li>
+               
+                       <li><a href="GenbankFeatures.html#method-i-want_qual%3F">#want_qual? &mdash; GenbankFeatures</a></li>
+               
                        <li><a href="OptionHandler.html#method-i-wiki_path">#wiki_path &mdash; OptionHandler</a></li>
                
+                       <li><a href="BitArray.html#method-i-%7C">#| &mdash; BitArray</a></li>
+               
+                       <li><a href="String.html#method-i-%7C">#| &mdash; String</a></li>
+               
        </ul>
 
        <div id="validator-badges">
index bf07ab3e7cf93c738ef2d1a2919592fe49bb6d31..60f91bdf1b9a68581ae60cc427a33bf1f1070c6d 100644 (file)
@@ -24,6 +24,7 @@
 
 require 'amatch'
 require 'digest'
+require 'narray'
 
 # Residue alphabets
 DNA     = %w[a t c g]
@@ -35,6 +36,29 @@ INDELS  = %w[. - _ ~]
 SCORE_PHRED    = 33
 SCORE_ILLUMINA = 64
 
+# Nucleotide equivalents
+EQUAL = {
+  :AA => true, :BU => true, :TH => true, :UY => true,
+  :TT => true, :CB => true, :UH => true, :SC => true,
+  :CC => true, :GB => true, :VA => true, :SG => true,
+  :GG => true, :TB => true, :VC => true, :CS => true,
+  :UU => true, :UB => true, :VG => true, :GS => true,
+  :NA => true, :DA => true, :AV => true, :WA => true,
+  :NT => true, :DG => true, :CV => true, :WT => true,
+  :NC => true, :DT => true, :GV => true, :WU => true,
+  :NG => true, :DU => true, :KG => true, :AW => true,
+  :NU => true, :AD => true, :KT => true, :TW => true,
+  :AN => true, :GD => true, :KU => true, :UW => true,
+  :TN => true, :TD => true, :GK => true, :RA => true,
+  :CN => true, :UD => true, :TK => true, :RG => true,
+  :GN => true, :HA => true, :UK => true, :AR => true,
+  :UN => true, :HC => true, :YC => true, :GR => true,
+  :NN => true, :HT => true, :YT => true, :MA => true,
+  :BC => true, :HU => true, :YU => true, :MC => true,
+  :BG => true, :AH => true, :CY => true, :AM => true,
+  :BT => true, :CH => true, :TY => true, :CM => true,
+}
+
 # Error class for all exceptions to do with Seq.
 class SeqError < StandardError; end
 
@@ -124,6 +148,7 @@ class Seq
   def to_dna
     raise SeqError, "Cannot reverse-transcribe 0 length sequence" if self.length == 0
     raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna?
+
     self.type = 'dna'
     self.seq.tr!('Uu','Tt')
   end
@@ -132,6 +157,7 @@ class Seq
   def to_bp
     raise SeqError, "Missing seq_name" if self.seq_name.nil?
     raise SeqError, "Missing seq"      if self.seq.nil?
+
     record             = {}
     record[:SEQ_NAME] = self.seq_name
     record[:SEQ]      = self.seq
@@ -402,6 +428,65 @@ class Seq
     end
   end
 
+  # ------------------------------------------------------------------------------
+  #   seq.match(pattern[, pos ] [, hd=max] [, ed=max]) -> matchdata or nil
+  # ------------------------------------------------------------------------------
+  # Method to locate a pattern in a sequence and return the position of the match
+  # or nil if no match was found. Hamming or Edit distance may be specified.
+  def match(pattern, pos = 0)
+    while pos < self.length - pattern.length + 1
+      str1 = self.seq[pos ... pos + pattern.length]
+      str2 = pattern
+
+      puts "pos: #{pos} str1: #{str1} str2: #{str2}"
+
+      rows = str1.length + 1
+      cols = str2.length + 1
+
+      matches    = 0
+      mismatches = 0
+      insertions = 0
+      deletions  = 0
+
+      matrix = NArray.int(rows, cols)
+
+      for i in 0 ... rows do matrix[i, 0] = i end
+      for j in 0 ... cols do matrix[0, j] = j end
+
+      for j in 1 ... cols do
+        for i in 1 ... rows do
+          puts "pos: #{pos}   i: #{i}   j: #{j}   str1: #{str1}   str2: #{str2}   str1[i-1]: #{str1[i-1]}   str2[j-1]: #{str2[j-1]}"
+
+          if EQUAL[(str1[i - 1].upcase + str2[j - 1].upcase).to_sym]
+            matrix[i, j] = matrix[i - 1, j - 1]
+            matches += 1
+          else
+            del = matrix[i - 1, j] + 1
+            ins = matrix[i, j - 1] + 1
+            mis = matrix[i - 1, j - 1] + 1
+
+            if del < ins and del < mis
+              deletions += 1
+              matrix[i, j] = del
+            elsif ins < del and ins < mis
+              insertions += 1
+              matrix[i, j] = ins
+            else
+              mismatches += 1
+              matrix[i, j] = mis
+            end
+          end
+        end
+      end
+      pp matrix
+      puts "match: #{matches}  mis: #{mismatches}   del: #{deletions}   ins: #{insertions}"
+
+      return pos if matrix[rows - 1, cols - 1] == 0
+
+      pos += 1
+    end
+  end
+
   private
 
   # Method to convert a Solexa score (odd ratio) to
@@ -418,68 +503,3 @@ class Seq
     (score_phred + 64).chr
   end
 end
-
-__END__
-
-
-# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
-class Seq < String
-  # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
-  def guess_type
-    raise ArgumentError, "No sequence." if self.empty?
-
-    seq_beg = self[0, 100].upcase
-
-    if seq_beg.count( "FLPQIE" ) > 0
-      Seq::AA.new(self)
-    elsif seq_beg.count("U") > 0
-      Seq::NA::RNA.new(self)
-    else
-      Seq::NA::DNA.new(self)
-    end
-  end
-
-  # Class containing methods specific for amino acid (AA) sequences.
-  class AA < Seq
-    # Method that returns an array of amino acid residues.
-    def residues
-      %w{ F L S Y C W P H Q R I M T N K V A D E G }
-    end
-
-    # Calculate the molecular weight of an amino acid seuqunce.
-    # The caluculation is only approximate since there is no correction
-    # for amino bond formation and the MW used are somewhat imprecise:
-    # http://www.expasy.ch/tools/pscale/Molecularweight.html
-    def mol_weight
-      raise ArgumentError, "invalid residues found: #{self.delete("#{residues.join( "" )}")}" if self.upcase =~ /[^#{residues.join( "" )}]/
-
-      mol_weight_aa = {
-        "A" => 89.000,    # Ala
-        "R" => 174.000,   # Arg
-        "N" => 132.000,   # Asn
-        "D" => 133.000,   # Asp
-        "C" => 121.000,   # Cys
-        "Q" => 146.000,   # Gln
-        "E" => 147.000,   # Glu
-        "G" => 75.000,    # Gly
-        "H" => 155.000,   # His
-        "I" => 131.000,   # Ile
-        "L" => 131.000,   # Leu
-        "K" => 146.000,   # Lys
-        "M" => 149.000,   # Met
-        "F" => 165.000,   # Phe
-        "P" => 115.000,   # Pro
-        "S" => 105.000,   # Ser
-        "T" => 119.000,   # Thr
-        "W" => 204.000,   # Trp
-        "Y" => 181.000,   # Tyr
-        "V" => 117.000,   # Val
-      }
-
-      mw = 0.0
-
-      self.upcase.each_char { |c| mw += mol_weight_aa[ c ] }
-
-      mw
-    end
-  end
index 25280c5eeca5a0304f4bc2e892337e22dde072d0..d8317d370ed95d864fe644f94d56b8fcc2f3a884 100755 (executable)
@@ -441,19 +441,21 @@ class TestSeq < Test::Unit::TestCase
     @entry.adaptor_clip_left("cgax", 25)
     assert_equal( "efghi", @entry.qual)
   end
-
-  def test_Digest_new_raises_on_bad_pattern_residue
-    assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
+  
+  def test_Seq_match_with_no_match_returns_nil
+    @entry.seq = "atcg"
+    assert_equal(nil, @entry.match("ttt"))
   end
 
-  def test_Digest_new_dont_raise_on_ok_pattern_residue
-    assert_nothing_raised { Digest.new(@entry, "AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
+  def test_Seq_match_returns_correctly
+    @entry.seq = "atcg"
+    assert_equal(0, @entry.match("aTc"))
+    assert_equal(1, @entry.match("Ncg"))
   end
 
-  def test_Digest_each
-    @entry.seq = "aaaaTTTTbbbbTTTT"
-    digest = Digest.new(@entry, "TTNT", 1)
-    assert_equal("aaaaT", digest.first.seq)
+  def test_Seq_match_with_pos_returns_correctly
+    @entry.seq = "atcatc"
+    assert_equal(3, @entry.match("aTc", 2))
   end
 end