# Error class for all exceptions to do with FASTQ.
class FastqError < StandardError; end
+# Class for parsing FASTQ entries from an ios and return as Seq objects.
class Fastq < Filesys
- # Method to get the next FASTQ entry form an ios and return this
+ # Method to get the next FASTQ entry from an ios and return this
# as a Seq object. If no entry is found or eof then nil is returned.
def get_entry
- begin
- seq_name = @io.gets.chomp!
- seq = @io.gets.chomp!
- qual_name = @io.gets.chomp!
- qual = @io.gets.chomp!
-
- entry = Seq.new
- entry.type = @type.nil? ? nil : @type.downcase
- entry.seq = seq
- entry.seq_name = seq_name[1 .. seq_name.length]
- entry.qual = qual
-
- entry
- rescue
- nil
- end
+ seq_name = @io.gets.chomp!
+ seq = @io.gets.chomp!
+ @io.gets
+ qual = @io.gets.chomp!
+
+ entry = Seq.new
+ entry.seq = seq
+ entry.seq_name = seq_name[1 .. seq_name.length]
+ entry.qual = qual
+ entry.type = nil
+
+ entry
+ rescue
+ nil
end
+end
+
+# Class for indesing FASTQ entries. The index will be
+# a hash with the FASTQ sequence name as key and a
+# FastqElem as value. The latter contains info on
+# byte offset and length for each entry.
+class FastqIndex
+ HEADCHAR = 1
+ NEWLINE = 1
+
+ attr_accessor :ios
+
+ # Method to initialize a FastqIndex object. For reading
+ # entries from file an _ios_ object must be supplied.
+ def initialize(ios = nil)
+ @ios = ios
+ @index = {}
+ @offset = 0
+ end
+
+ # Method to add a Fastq entry to a FastqIndex.
+ def add(entry)
+ offset_seq = @offset + HEADCHAR + entry.seq_name.length + NEWLINE
+ offset_qual = @offset + HEADCHAR + entry.seq_name.length + NEWLINE + entry.length + NEWLINE + HEADCHAR + NEWLINE
+
+ @index[entry.seq_name] = FastqElem.new(offset_seq, offset_qual, entry.length)
+
+ @offset += HEADCHAR + entry.seq_name.length + NEWLINE + entry.length + NEWLINE + HEADCHAR + NEWLINE + entry.length + NEWLINE
+ end
+
+ # Method to read from file a Fastq entry from an indexed position,
+ # and return the entry as a Seq object.
+ def get(seq_name)
+ raise FastqError, "Sequence name: #{seq_name} not found in index." unless @index[seq_name]
+
+ elem = @index[seq_name]
+ @ios.sysseek(elem.offset_seq)
+ seq = @ios.sysread(elem.length)
+ @ios.sysseek(elem.offset_qual)
+ qual = @ios.sysread(elem.length)
+
+ Seq.new(seq_name, seq, nil, qual)
+ end
+
+ private
+
+ # Class for storing index information to be used
+ # with disk based index.
+ class FastqElem
+ attr_reader :offset_seq, :offset_qual, :length
- # TODO - this should be some custom to_s method instead.
- def puts(record)
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
- @io.print ">#{record[:SEQ_NAME]}\n"
- @io.print "#{record[:SEQ]}\n"
+ def initialize(offset_seq, offset_qual, length)
+ @offset_seq = offset_seq
+ @offset_qual = offset_qual
+ @length = length
end
end
end