<h2 id="classes">Classes/Modules</h2>
<ul>
+ <li class="class"><a href="Base36.html">Base36</a></li>
+
+ <li class="class"><a href="Base36Error.html">Base36Error</a></li>
+
<li class="class"><a href="Biopieces.html">Biopieces</a></li>
+ <li class="class"><a href="BitArray.html">BitArray</a></li>
+
+ <li class="class"><a href="BitArrayError.html">BitArrayError</a></li>
+
+ <li class="class"><a href="Boulder.html">Boulder</a></li>
+
+ <li class="class"><a href="BoulderError.html">BoulderError</a></li>
+
<li class="class"><a href="CastError.html">CastError</a></li>
<li class="class"><a href="Casts.html">Casts</a></li>
<li class="class"><a href="FastaError.html">FastaError</a></li>
+ <li class="class"><a href="Fastq.html">Fastq</a></li>
+
+ <li class="class"><a href="FastqError.html">FastqError</a></li>
+
+ <li class="class"><a href="Filesys.html">Filesys</a></li>
+
+ <li class="class"><a href="FilesysError.html">FilesysError</a></li>
+
+ <li class="class"><a href="Genbank.html">Genbank</a></li>
+
+ <li class="class"><a href="GenbankError.html">GenbankError</a></li>
+
+ <li class="class"><a href="GenbankFeatures.html">GenbankFeatures</a></li>
+
+ <li class="class"><a href="Locator.html">Locator</a></li>
+
+ <li class="class"><a href="LocatorError.html">LocatorError</a></li>
+
<li class="class"><a href="OptionHandler.html">OptionHandler</a></li>
+ <li class="class"><a href="Read.html">Read</a></li>
+
+ <li class="class"><a href="SFF.html">SFF</a></li>
+
+ <li class="class"><a href="SFFError.html">SFFError</a></li>
+
<li class="class"><a href="Seq.html">Seq</a></li>
<li class="class"><a href="SeqError.html">SeqError</a></li>
<li class="class"><a href="Stream.html">Stream</a></li>
+ <li class="class"><a href="String.html">String</a></li>
+
+ <li class="class"><a href="StringError.html">StringError</a></li>
+
</ul>
<h2 id="methods">Methods</h2>
<ul>
- <li><a href="Biopieces.html#method-c-new">::new — Biopieces</a></li>
+ <li><a href="Base36.html#method-c-decode">::decode — Base36</a></li>
- <li><a href="Digest.html#method-c-new">::new — Digest</a></li>
+ <li><a href="Base36.html#method-c-encode">::encode — Base36</a></li>
+
+ <li><a href="Seq.html#method-c-generate_oligos">::generate_oligos — Seq</a></li>
+
+ <li><a href="String.html#method-c-hamming_dist">::hamming_dist — String</a></li>
+
+ <li><a href="GenbankFeatures.html#method-c-new">::new — GenbankFeatures</a></li>
+
+ <li><a href="Locator.html#method-c-new">::new — Locator</a></li>
<li><a href="OptionHandler.html#method-c-new">::new — OptionHandler</a></li>
- <li><a href="Seq.html#method-c-new">::new — Seq</a></li>
+ <li><a href="BitArray.html#method-c-new">::new — BitArray</a></li>
+
+ <li><a href="SFF.html#method-c-new">::new — SFF</a></li>
<li><a href="Casts.html#method-c-new">::new — Casts</a></li>
- <li><a href="Fasta.html#method-c-new">::new — Fasta</a></li>
+ <li><a href="Filesys.html#method-c-new">::new — Filesys</a></li>
+
+ <li><a href="Biopieces.html#method-c-new">::new — Biopieces</a></li>
+
+ <li><a href="Boulder.html#method-c-new">::new — Boulder</a></li>
+
+ <li><a href="Digest.html#method-c-new">::new — Digest</a></li>
+
+ <li><a href="Genbank.html#method-c-new">::new — Genbank</a></li>
+
+ <li><a href="Seq.html#method-c-new">::new — Seq</a></li>
<li><a href="Stream.html#method-c-nread">::nread — Stream</a></li>
<li><a href="Stream.html#method-c-nwrite">::nwrite — Stream</a></li>
- <li><a href="Fasta.html#method-c-open">::open — Fasta</a></li>
+ <li><a href="Filesys.html#method-c-open">::open — Filesys</a></li>
<li><a href="Stream.html#method-c-open">::open — Stream</a></li>
+ <li><a href="SFF.html#method-c-open">::open — SFF</a></li>
+
<li><a href="Stream.html#method-c-read">::read — Stream</a></li>
<li><a href="Stream.html#method-c-write">::write — Stream</a></li>
<li><a href="Stream.html#method-c-zipped%3F">::zipped? — Stream</a></li>
- <li><a href="Fasta.html#method-c-zopen">::zopen — Fasta</a></li>
+ <li><a href="Filesys.html#method-c-zopen">::zopen — Filesys</a></li>
<li><a href="Stream.html#method-c-zread">::zread — Stream</a></li>
<li><a href="Stream.html#method-c-zwrite">::zwrite — Stream</a></li>
+ <li><a href="BitArray.html#method-i-%26">#& — BitArray</a></li>
+
+ <li><a href="String.html#method-i-%26">#& — String</a></li>
+
+ <li><a href="String.html#method-i-%5E">#^ — String</a></li>
+
+ <li><a href="BitArray.html#method-i-%5E">#^ — BitArray</a></li>
+
+ <li><a href="Seq.html#method-i-adaptor_clip_left">#adaptor_clip_left — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-adaptor_clip_right">#adaptor_clip_right — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-adaptor_locate_left">#adaptor_locate_left — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-adaptor_locate_right">#adaptor_locate_right — Seq</a></li>
+
+ <li><a href="Locator.html#method-i-balance_parens%3F">#balance_parens? — Locator</a></li>
+
+ <li><a href="BitArray.html#method-i-bit_pos">#bit_pos — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-bit_set">#bit_set — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-bit_set%3F">#bit_set? — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-bits_in_char">#bits_in_char — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-bits_off">#bits_off — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-bits_on">#bits_on — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-byte_pos">#byte_pos — BitArray</a></li>
+
<li><a href="Casts.html#method-i-check">#check — Casts</a></li>
<li><a href="Casts.html#method-i-check_duplicates">#check_duplicates — Casts</a></li>
+ <li><a href="SFF.html#method-i-check_header_length">#check_header_length — SFF</a></li>
+
<li><a href="Casts.html#method-i-check_keys">#check_keys — Casts</a></li>
+ <li><a href="SFF.html#method-i-check_magic_number">#check_magic_number — SFF</a></li>
+
<li><a href="Casts.html#method-i-check_val_allowed">#check_val_allowed — Casts</a></li>
<li><a href="Casts.html#method-i-check_val_default">#check_val_default — Casts</a></li>
<li><a href="Casts.html#method-i-check_values">#check_values — Casts</a></li>
- <li><a href="Fasta.html#method-i-close">#close — Fasta</a></li>
+ <li><a href="SFF.html#method-i-check_version">#check_version — SFF</a></li>
+
+ <li><a href="Read.html#method-i-clip">#clip — Read</a></li>
+
+ <li><a href="SFF.html#method-i-close">#close — SFF</a></li>
+
+ <li><a href="Filesys.html#method-i-close">#close — Filesys</a></li>
<li><a href="Seq.html#method-i-complement">#complement — Seq</a></li>
+ <li><a href="Seq.html#method-i-composition">#composition — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-convert_phred2illumina%21">#convert_phred2illumina! — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-convert_solexa2illumina%21">#convert_solexa2illumina! — Seq</a></li>
+
+ <li><a href="Read.html#method-i-coordinates_get">#coordinates_get — Read</a></li>
+
<li><a href="Status.html#method-i-delete">#delete — Status</a></li>
<li><a href="Digest.html#method-i-disambiguate">#disambiguate — Digest</a></li>
<li><a href="Biopieces.html#method-i-each">#each — Biopieces</a></li>
- <li><a href="Fasta.html#method-i-each">#each — Fasta</a></li>
+ <li><a href="Boulder.html#method-i-each">#each — Boulder</a></li>
+
+ <li><a href="Filesys.html#method-i-each">#each — Filesys</a></li>
+
+ <li><a href="GenbankFeatures.html#method-i-each">#each — GenbankFeatures</a></li>
+
+ <li><a href="Genbank.html#method-i-each">#each — Genbank</a></li>
<li><a href="Digest.html#method-i-each">#each — Digest</a></li>
+ <li><a href="SFF.html#method-i-each">#each — SFF</a></li>
+
<li><a href="Biopieces.html#method-i-each_record">#each_record — Biopieces</a></li>
+ <li><a href="SFF.html#method-i-fast_forward">#fast_forward — SFF</a></li>
+
<li><a href="Seq.html#method-i-generate">#generate — Seq</a></li>
<li><a href="Fasta.html#method-i-get_entry">#get_entry — Fasta</a></li>
+ <li><a href="Fastq.html#method-i-get_entry">#get_entry — Fastq</a></li>
+
+ <li><a href="Genbank.html#method-i-get_entry">#get_entry — Genbank</a></li>
+
+ <li><a href="Genbank.html#method-i-get_keys">#get_keys — Genbank</a></li>
+
+ <li><a href="GenbankFeatures.html#method-i-get_quals">#get_quals — GenbankFeatures</a></li>
+
+ <li><a href="Genbank.html#method-i-get_seq">#get_seq — Genbank</a></li>
+
<li><a href="Status.html#method-i-get_tmpdir">#get_tmpdir — Status</a></li>
+ <li><a href="Seq.html#method-i-hard_mask">#hard_mask — Seq</a></li>
+
+ <li><a href="SFF.html#method-i-header_parse">#header_parse — SFF</a></li>
+
+ <li><a href="Seq.html#method-i-homopol_max">#homopol_max — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-indels">#indels — Seq</a></li>
+
+ <li><a href="BitArray.html#method-i-init_byte_array">#init_byte_array — BitArray</a></li>
+
+ <li><a href="BitArray.html#method-i-init_count_array">#init_count_array — BitArray</a></li>
+
<li><a href="Seq.html#method-i-is_dna%3F">#is_dna? — Seq</a></li>
<li><a href="Seq.html#method-i-is_protein%3F">#is_protein? — Seq</a></li>
<li><a href="Casts.html#method-i-long_to_sym">#long_to_sym — Casts</a></li>
+ <li><a href="Read.html#method-i-mask">#mask — Read</a></li>
+
+ <li><a href="Seq.html#method-i-match">#match — Seq</a></li>
+
<li><a href="Biopieces.html#method-i-mktmpdir">#mktmpdir — Biopieces</a></li>
<li><a href="OptionHandler.html#method-i-options_check">#options_check — OptionHandler</a></li>
<li><a href="Biopieces.html#method-i-parse">#parse — Biopieces</a></li>
+ <li><a href="Locator.html#method-i-parse_locator">#parse_locator — Locator</a></li>
+
<li><a href="Status.html#method-i-path">#path — Status</a></li>
<li><a href="OptionHandler.html#method-i-print_usage_and_exit">#print_usage_and_exit — OptionHandler</a></li>
<li><a href="OptionHandler.html#method-i-print_usage_short%3F">#print_usage_short? — OptionHandler</a></li>
+ <li><a href="Fasta.html#method-i-puts">#puts — Fasta</a></li>
+
+ <li><a href="Fastq.html#method-i-puts">#puts — Fastq</a></li>
+
<li><a href="Biopieces.html#method-i-puts">#puts — Biopieces</a></li>
- <li><a href="Fasta.html#method-i-puts">#puts — Fasta</a></li>
+ <li><a href="SFF.html#method-i-read_parse">#read_parse — SFF</a></li>
<li><a href="Seq.html#method-i-revcomp">#revcomp — Seq</a></li>
<li><a href="Seq.html#method-i-reverse_complement">#reverse_complement — Seq</a></li>
+ <li><a href="Locator.html#method-i-s_beg">#s_beg — Locator</a></li>
+
+ <li><a href="Locator.html#method-i-s_end">#s_end — Locator</a></li>
+
<li><a href="Status.html#method-i-set">#set — Status</a></li>
<li><a href="Status.html#method-i-set_tmpdir">#set_tmpdir — Status</a></li>
+ <li><a href="Seq.html#method-i-soft_mask">#soft_mask — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-solexa2phred">#solexa2phred — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-solexa_char2illumina_char">#solexa_char2illumina_char — Seq</a></li>
+
+ <li><a href="Locator.html#method-i-strand">#strand — Locator</a></li>
+
+ <li><a href="Seq.html#method-i-subseq">#subseq — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-subseq%21">#subseq! — Seq</a></li>
+
+ <li><a href="Seq.html#method-i-subseq_rand">#subseq_rand — Seq</a></li>
+
<li><a href="Status.html#method-i-time_diff">#time_diff — Status</a></li>
+ <li><a href="Boulder.html#method-i-to_boulder">#to_boulder — Boulder</a></li>
+
<li><a href="Seq.html#method-i-to_bp">#to_bp — Seq</a></li>
+ <li><a href="Read.html#method-i-to_bp">#to_bp — Read</a></li>
+
<li><a href="Seq.html#method-i-to_dna">#to_dna — Seq</a></li>
<li><a href="Seq.html#method-i-to_fasta">#to_fasta — Seq</a></li>
+ <li><a href="Seq.html#method-i-to_key">#to_key — Seq</a></li>
+
<li><a href="Seq.html#method-i-to_rna">#to_rna — Seq</a></li>
+ <li><a href="BitArray.html#method-i-to_s">#to_s — BitArray</a></li>
+
+ <li><a href="Biopieces.html#method-i-to_s">#to_s — Biopieces</a></li>
+
<li><a href="Casts.html#method-i-ubiquitous">#ubiquitous — Casts</a></li>
+ <li><a href="GenbankFeatures.html#method-i-want_feat%3F">#want_feat? — GenbankFeatures</a></li>
+
+ <li><a href="Genbank.html#method-i-want_key%3F">#want_key? — Genbank</a></li>
+
+ <li><a href="GenbankFeatures.html#method-i-want_qual%3F">#want_qual? — GenbankFeatures</a></li>
+
<li><a href="OptionHandler.html#method-i-wiki_path">#wiki_path — OptionHandler</a></li>
+ <li><a href="BitArray.html#method-i-%7C">#| — BitArray</a></li>
+
+ <li><a href="String.html#method-i-%7C">#| — String</a></li>
+
</ul>
<div id="validator-badges">
require 'amatch'
require 'digest'
+require 'narray'
# Residue alphabets
DNA = %w[a t c g]
SCORE_PHRED = 33
SCORE_ILLUMINA = 64
+# Nucleotide equivalents
+EQUAL = {
+ :AA => true, :BU => true, :TH => true, :UY => true,
+ :TT => true, :CB => true, :UH => true, :SC => true,
+ :CC => true, :GB => true, :VA => true, :SG => true,
+ :GG => true, :TB => true, :VC => true, :CS => true,
+ :UU => true, :UB => true, :VG => true, :GS => true,
+ :NA => true, :DA => true, :AV => true, :WA => true,
+ :NT => true, :DG => true, :CV => true, :WT => true,
+ :NC => true, :DT => true, :GV => true, :WU => true,
+ :NG => true, :DU => true, :KG => true, :AW => true,
+ :NU => true, :AD => true, :KT => true, :TW => true,
+ :AN => true, :GD => true, :KU => true, :UW => true,
+ :TN => true, :TD => true, :GK => true, :RA => true,
+ :CN => true, :UD => true, :TK => true, :RG => true,
+ :GN => true, :HA => true, :UK => true, :AR => true,
+ :UN => true, :HC => true, :YC => true, :GR => true,
+ :NN => true, :HT => true, :YT => true, :MA => true,
+ :BC => true, :HU => true, :YU => true, :MC => true,
+ :BG => true, :AH => true, :CY => true, :AM => true,
+ :BT => true, :CH => true, :TY => true, :CM => true,
+}
+
# Error class for all exceptions to do with Seq.
class SeqError < StandardError; end
def to_dna
raise SeqError, "Cannot reverse-transcribe 0 length sequence" if self.length == 0
raise SeqError, "Cannot reverse-transcribe sequence type: #{self.type}" unless self.is_rna?
+
self.type = 'dna'
self.seq.tr!('Uu','Tt')
end
def to_bp
raise SeqError, "Missing seq_name" if self.seq_name.nil?
raise SeqError, "Missing seq" if self.seq.nil?
+
record = {}
record[:SEQ_NAME] = self.seq_name
record[:SEQ] = self.seq
end
end
+ # ------------------------------------------------------------------------------
+ # seq.match(pattern[, pos ] [, hd=max] [, ed=max]) -> matchdata or nil
+ # ------------------------------------------------------------------------------
+ # Method to locate a pattern in a sequence and return the position of the match
+ # or nil if no match was found. Hamming or Edit distance may be specified.
+ def match(pattern, pos = 0)
+ while pos < self.length - pattern.length + 1
+ str1 = self.seq[pos ... pos + pattern.length]
+ str2 = pattern
+
+ puts "pos: #{pos} str1: #{str1} str2: #{str2}"
+
+ rows = str1.length + 1
+ cols = str2.length + 1
+
+ matches = 0
+ mismatches = 0
+ insertions = 0
+ deletions = 0
+
+ matrix = NArray.int(rows, cols)
+
+ for i in 0 ... rows do matrix[i, 0] = i end
+ for j in 0 ... cols do matrix[0, j] = j end
+
+ for j in 1 ... cols do
+ for i in 1 ... rows do
+ puts "pos: #{pos} i: #{i} j: #{j} str1: #{str1} str2: #{str2} str1[i-1]: #{str1[i-1]} str2[j-1]: #{str2[j-1]}"
+
+ if EQUAL[(str1[i - 1].upcase + str2[j - 1].upcase).to_sym]
+ matrix[i, j] = matrix[i - 1, j - 1]
+ matches += 1
+ else
+ del = matrix[i - 1, j] + 1
+ ins = matrix[i, j - 1] + 1
+ mis = matrix[i - 1, j - 1] + 1
+
+ if del < ins and del < mis
+ deletions += 1
+ matrix[i, j] = del
+ elsif ins < del and ins < mis
+ insertions += 1
+ matrix[i, j] = ins
+ else
+ mismatches += 1
+ matrix[i, j] = mis
+ end
+ end
+ end
+ end
+ pp matrix
+ puts "match: #{matches} mis: #{mismatches} del: #{deletions} ins: #{insertions}"
+
+ return pos if matrix[rows - 1, cols - 1] == 0
+
+ pos += 1
+ end
+ end
+
private
# Method to convert a Solexa score (odd ratio) to
(score_phred + 64).chr
end
end
-
-__END__
-
-
-# Class containing generic sequence methods and nucleic acid and amino acid subclasses.
-class Seq < String
- # Guess the sequence type by analyzing the first 100 residues allowing for ambiguity codes.
- def guess_type
- raise ArgumentError, "No sequence." if self.empty?
-
- seq_beg = self[0, 100].upcase
-
- if seq_beg.count( "FLPQIE" ) > 0
- Seq::AA.new(self)
- elsif seq_beg.count("U") > 0
- Seq::NA::RNA.new(self)
- else
- Seq::NA::DNA.new(self)
- end
- end
-
- # Class containing methods specific for amino acid (AA) sequences.
- class AA < Seq
- # Method that returns an array of amino acid residues.
- def residues
- %w{ F L S Y C W P H Q R I M T N K V A D E G }
- end
-
- # Calculate the molecular weight of an amino acid seuqunce.
- # The caluculation is only approximate since there is no correction
- # for amino bond formation and the MW used are somewhat imprecise:
- # http://www.expasy.ch/tools/pscale/Molecularweight.html
- def mol_weight
- raise ArgumentError, "invalid residues found: #{self.delete("#{residues.join( "" )}")}" if self.upcase =~ /[^#{residues.join( "" )}]/
-
- mol_weight_aa = {
- "A" => 89.000, # Ala
- "R" => 174.000, # Arg
- "N" => 132.000, # Asn
- "D" => 133.000, # Asp
- "C" => 121.000, # Cys
- "Q" => 146.000, # Gln
- "E" => 147.000, # Glu
- "G" => 75.000, # Gly
- "H" => 155.000, # His
- "I" => 131.000, # Ile
- "L" => 131.000, # Leu
- "K" => 146.000, # Lys
- "M" => 149.000, # Met
- "F" => 165.000, # Phe
- "P" => 115.000, # Pro
- "S" => 105.000, # Ser
- "T" => 119.000, # Thr
- "W" => 204.000, # Trp
- "Y" => 181.000, # Tyr
- "V" => 117.000, # Val
- }
-
- mw = 0.0
-
- self.upcase.each_char { |c| mw += mol_weight_aa[ c ] }
-
- mw
- end
- end