From 1cde8c9b92b2ce0447985036bc600e84eaaef004 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 3 Feb 2012 07:28:09 +0000 Subject: [PATCH] added FastqIndex to fastq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1741 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/fastq.rb | 60 ++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/code_ruby/lib/maasha/fastq.rb b/code_ruby/lib/maasha/fastq.rb index e2a96ea..77a4706 100644 --- a/code_ruby/lib/maasha/fastq.rb +++ b/code_ruby/lib/maasha/fastq.rb @@ -28,8 +28,9 @@ require 'maasha/filesys' # Error class for all exceptions to do with FASTQ. class FastqError < StandardError; end +# Class for parsing FASTQ entries from an ios and return as Seq objects. class Fastq < Filesys - # Method to get the next FASTQ entry form an ios and return this + # Method to get the next FASTQ entry from an ios and return this # as a Seq object. If no entry is found or eof then nil is returned. def get_entry begin @@ -51,5 +52,62 @@ class Fastq < Filesys end end +# Class for indesing FASTQ entries. The index will be +# a hash with the FASTQ sequence name as key and a +# FastqElem as value. The latter contains info on +# byte offset and length for each entry. +class FastqIndex + HEADCHAR = 1 + NEWLINE = 1 + + attr_accessor :ios + + # Method to initialize a FastqIndex object. For reading + # entries from file an _ios_ object must be supplied. + def initialize(ios) + @ios = ios + @index = {} + @offset = 0 + end + + # Method to add a Fastq entry to a FastqIndex. + def add(entry) + offset_seq = @offset + HEADCHAR + entry.seq_name.length + NEWLINE + offset_qual = @offset + HEADCHAR + entry.seq_name.length + NEWLINE + entry.length + NEWLINE + HEADCHAR + NEWLINE + + @index[entry.seq_name] = FastqElem.new(offset_seq, offset_qual, entry.length) + + @offset += HEADCHAR + entry.seq_name.length + NEWLINE + entry.length + NEWLINE + HEADCHAR + NEWLINE + entry.length + NEWLINE + end + + # Method to read from file a Fastq entry from an indexed position, + # and return the entry as a Seq object. + def get(seq_name) + raise FastqError, "Sequence name: #{seq_name} not found in index." unless @index.has_key? seq_name + + elem = @index[seq_name] + @ios.sysseek(elem.offset_seq) + seq = @ios.sysread(elem.length) + @ios.sysseek(elem.offset_qual) + qual = @ios.sysread(elem.length) + + Seq.new(seq_name, seq, nil, qual) + end + + private + + # Class for storing index information to be used + # with disk based index. + class FastqElem + attr_reader :offset_seq, :offset_qual, :length + + def initialize(offset_seq, offset_qual, length) + @offset_seq = offset_seq + @offset_qual = offset_qual + @length = length + end + end +end + __END__ -- 2.39.2