From: martinahansen Date: Fri, 3 Feb 2012 07:10:13 +0000 (+0000) Subject: added ustar and each_alignment methods to usearch.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=447af95a78261af733caae63976c466e7c396d0c;p=biopieces.git added ustar and each_alignment methods to usearch.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1740 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/usearch.rb b/code_ruby/lib/maasha/usearch.rb index 9f09751..1618f50 100644 --- a/code_ruby/lib/maasha/usearch.rb +++ b/code_ruby/lib/maasha/usearch.rb @@ -107,7 +107,7 @@ class Usearch def each_cluster record = {} - File.open(@outfile, mode="r") do |ios| + File.open(@outfile, "r") do |ios| ios.each_line do |line| if line !~ /^#/ fields = line.chomp.split("\t") @@ -132,7 +132,7 @@ class Usearch def each_hit record = {} - File.open(@outfile, mode="r") do |ios| + File.open(@outfile, "r") do |ios| ios.gets # skip comment line ios.each_line do |line| fields = line.chomp.split("\t") @@ -152,8 +152,49 @@ class Usearch self # conventionally end + # Method to parse a FASTA file with Ustar alignments and for each alignment + # yield an Align object. + def each_alignment + old_cluster = 0 + entries = [] + + Fasta.open(@outfile, "r") do |ios| + ios.each do |entry| + entry.seq.tr!('.', '-') # Replace . with - in Ustar alignments. + cluster, identity, name = entry.seq_name.split('|') + cluster = cluster.to_i + + if cluster == old_cluster + entries << entry + else + fix_alignment(entries) + + yield Align.new(entries) + + old_cluster = cluster + entries = [] + entries << entry + end + end + end + end + private + # Method that fixed Ustar bug resulting in alignments with uneven + # sequence length. + def fix_alignment(entries) + if entries.size > 1 + min, max = entries.minmax { |a, b| a.length <=> b.length } + + if min.length != max.length + entries.each do |entry| + entry.seq << '-' * (max.length - entry.length) + end + end + end + end + # Method to execute a command using a system() call. # The command is composed of bits from the @command variable. def execute