Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record[:SEQ]
- seq = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SEQ_TYPE], record[:SCORE])
+ seq = Seq.new_bp(record)
comp = seq.composition
comp.each_pair do |key,val|
end
got1 = true
- type1 = Seq.new(nil, record[:SEQ][0 ... 100]).type_guess
+ type1 = Seq.new(seq: record[:SEQ][0 ... 100]).type_guess
elsif got2.nil?
Fasta.open(infile2, "w") do |fasta_io|
fasta_io.puts seq.to_fasta
end
got2 = true
- type2 = Seq.new(nil, record[:SEQ][0 ... 100]).type_guess
+ type2 = Seq.new(seq: record[:SEQ][0 ... 100]).type_guess
end
if got1 and got2
options = Biopieces.options_parse(ARGV, casts)
if options[:forward_rc]
- options[:forward] = Seq.new("test", options[:forward_rc], :dna).reverse.complement.seq
+ options[:forward] = Seq.new(seq: options[:forward_rc], type: :dna).reverse.complement.seq
end
if options[:reverse_rc]
- options[:reverse] = Seq.new("test", options[:reverse_rc], :dna).reverse.complement.seq
+ options[:reverse] = Seq.new(seq: options[:reverse_rc], type: :dna).reverse.complement.seq
end
raise ArgumentError, "no adaptor specified" unless options[:forward] or options[:reverse]
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record[:SEQ]
- seq = Seq.new(nil, record[:SEQ])
+ seq = Seq.new(seq: record[:SEQ])
longest = Seq::Homopolymer.new("", 0, 0)
got_one = false
options = Biopieces.options_parse(ARGV, casts)
sequences = []
-seq = Seq.new(nil, "")
+seq = Seq.new(seq: "")
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record[:SEQ] and record[:SEQ].length >= options[:size]
- entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
+ entry = Seq.new_bp(record)
entry.type = :dna
entry.shred(options[:size], options[:coverage]) do |subentry|
seq = @entry[j + 1 .. i].join.delete(" 0123456789")
- Seq.new(nil, seq, :dna) if seq
+ Seq.new(seq: seq, type: :dna) if seq
end
# Method to get the base keys from EMBL entry and return these
seq = @entry[j + 1 .. i].join.delete(" 0123456789")
- Seq.new(nil, seq, :dna) if seq
+ Seq.new(seq: seq, type: :dna) if seq
end
# Method to get the base keys from Genbank entry and return these
def initialize(locator, seq)
@locator = locator
@seq = seq
- @subseq = Seq.new(nil, "", :dna)
+ @subseq = Seq.new(seq: "", type: :dna)
parse_locator(locator)
end
int_beg = $1.to_i - 1
int_end = $2.to_i - 1
- newseq = Seq.new(nil, @seq.seq[int_beg..int_end], :dna)
+ newseq = Seq.new(seq: @seq.seq[int_beg..int_end], type: :dna)
unless newseq.seq.nil?
newseq.reverse!.complement! if comp
when /^(\d+)$/
pos = $1.to_i - 1
- newseq = Seq.new(nil, @seq.seq[pos], :dna)
+ newseq = Seq.new(seq: @seq.seq[pos], type: :dna)
unless newseq.seq.nil?
newseq.reverse!.complement! if comp
entry[:RNEXT] = rnext
entry[:PNEXT] = pnext
entry[:TLEN] = tlen
- entry[:SEQ] = (qual == '*') ? Seq.new(qname, seq) : Seq.new(qname, seq, qual)
+ entry[:SEQ] = (qual == '*') ? Seq.new(seq_name: qname, seq: seq) : Seq.new(seq_name: qname, seq: seq, qual: qual)
entry[:QUAL] = qual
# Optional fields - where some are really important! HATE HATE HATE SAM!!!
type = record[:SEQ_TYPE].to_sym if record[:SEQ_TYPE]
qual = record[:SCORES]
- self.new(seq_name, seq, type, qual)
+ self.new(seq_name: seq_name, seq: seq, type: type, qual: qual)
end
# Class method that generates all possible oligos of a specifed length and type.
oligos
end
- # Initialize a sequence object with the following arguments:
- # - seq_name: Name of the sequence.
- # - seq: The sequence.
- # - type: The sequence type - DNA, RNA, or protein
- # - qual: An Illumina type quality scores string.
- def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
- @seq_name = seq_name
- @seq = seq
- @type = type
- @qual = qual
+ # Initialize a sequence object with the following options:
+ # - :seq_name Name of the sequence.
+ # - :seq The sequence.
+ # - :type The sequence type - DNA, RNA, or protein
+ # - :qual An Illumina type quality scores string.
+ def initialize(options = {})
+ @seq_name = options[:seq_name]
+ @seq = options[:seq]
+ @type = options[:type]
+ @qual = options[:qual]
- if @qual
- raise SeqError, "Sequence length and score length mismatch: #{@seq.length} != #{@qual.length}" if @seq.length != @qual.length
+ if @qual and @seq.length != @qual.length
+ raise SeqError, "Sequence length and score length mismatch: #{@seq.length} != #{@qual.length}"
end
end
# Method to reverse the sequence.
def reverse
- Seq.new(self.seq_name, self.seq.reverse, self.type, self.qual ? self.qual.reverse : self.qual)
+ Seq.new(
+ seq_name: self.seq_name,
+ seq: self.seq.reverse,
+ type: self.type,
+ qual: (self.qual ? self.qual.reverse : self.qual)
+ )
end
# Method to reverse the sequence.
def complement
raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
- entry = Seq.new
- entry.seq_name = self.seq_name
- entry.type = self.type
- entry.qual = self.qual
+ entry = Seq.new(
+ seq_name: self.seq_name,
+ type: self.type,
+ qual: self.qual
+ )
if self.is_dna?
entry.seq = self.seq.tr('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
# Method to return a new Seq object with shuffled sequence.
def shuffle
- Seq.new(self.seq_name, self.seq.split('').shuffle!.join, self.type, self.qual)
+ Seq.new(
+ seq_name: self.seq_name,
+ seq: self.seq.split('').shuffle!.join,
+ type: self.type,
+ qual: self.qual
+ )
end
# Method to shuffle a sequence randomly inline.
seq_name = self.seq_name.nil? ? nil : self.seq_name.dup
- Seq.new(seq_name, seq, self.type, qual)
+ Seq.new(seq_name: seq_name, seq: seq, type: self.type, qual: qual)
end
# Method that replaces a sequence with a subsequence from a given start position
class TestAssemble < Test::Unit::TestCase
test "Assemble.pair without overlap returns nil" do
- assert_nil(Assemble.pair(Seq.new("test1", "aaaa"), Seq.new("test2", "tttt")))
+ assert_nil(Assemble.pair(Seq.new(seq: "aaaa"), Seq.new(seq: "tttt")))
end
- test "Assemble.pair with overlap returns the correctly" do
- assert_equal("atcGatc", Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "gatc")).seq)
- assert_equal("atCGat", Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "cgat")).seq)
- assert_equal("aTCGa", Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "tcga")).seq)
- assert_equal("ATCG", Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "atcg")).seq)
+ test "Assemble.pair with overlap returns correctly" do
+ assert_equal("atcGatc", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "gatc")).seq)
+ assert_equal("atCGat", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "cgat")).seq)
+ assert_equal("aTCGa", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "tcga")).seq)
+ assert_equal("ATCG", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "atcg")).seq)
end
test "Assemble.pair with overlap and overlap_min returns correctly" do
- assert_nil(Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "gatc"), :overlap_min => 2))
- assert_equal("atCGat", Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "cgat"), :overlap_min => 2).seq)
+ assert_nil(Assemble.pair(Seq.new(seq: "atcg"), Seq.new(seq: "gatc"), :overlap_min => 2))
+ assert_equal("atCGat", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "cgat"), :overlap_min => 2).seq)
end
test "Assemble.pair with overlap and overlap_max returns correctly" do
- assert_equal("aTCGa", Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "tcga"), :overlap_max => 3).seq)
- assert_nil(Assemble.pair(Seq.new("test1", "atcg"), Seq.new("test2", "atcg"), :overlap_max => 3))
+ assert_equal("aTCGa", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg"), Seq.new(seq_name: "t2", seq: "tcga"), :overlap_max => 3).seq)
+ assert_nil(Assemble.pair(Seq.new(seq: "atcg"), Seq.new(seq: "atcg"), :overlap_max => 3))
end
test "Assemble.pair with overlap returns correct quality" do
- assert_equal("?", Assemble.pair(Seq.new("test1", "a", :dna, "I"), Seq.new("test2", "a", :dna, "5")).qual)
- assert_equal("GH??43", Assemble.pair(Seq.new("test1", "atcg", :dna, "GHII"), Seq.new("test2", "cgat", :dna, "5543")).qual)
- assert_equal("I???5", Assemble.pair(Seq.new("test1", "atcg", :dna, "IIII"), Seq.new("test2", "tcga", :dna, "5555")).qual)
+ assert_equal("?", Assemble.pair(Seq.new(seq_name: "t1", seq: "a", qual: "I"), Seq.new(seq_name: "t2", seq: "a", qual: "5")).qual)
+ assert_equal("GH??43", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg", qual: "GHII"), Seq.new(seq_name: "t2", seq: "cgat", qual: "5543")).qual)
+ assert_equal("I???5", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcg", qual: "IIII"), Seq.new(seq_name: "t2", seq: "tcga", qual: "5555")).qual)
end
test "Assemble.pair with mismatch returns the highest scoring" do
- assert_equal("ATCGA", Assemble.pair(Seq.new("t1", "atcga", :dna, "IIIII"), Seq.new("t2", "attga", :dna, "55555"), :mismatches_max => 20).seq)
- assert_equal("ATTGA", Assemble.pair(Seq.new("t1", "atcga", :dna, "55555"), Seq.new("t2", "attga", :dna, "IIIII"), :mismatches_max => 20).seq)
+ assert_equal("ATCGA", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcga", qual: "IIIII"), Seq.new(seq_name: "t2", seq: "attga", qual: "55555"), :mismatches_max => 20).seq)
+ assert_equal("ATTGA", Assemble.pair(Seq.new(seq_name: "t1", seq: "atcga", qual: "55555"), Seq.new(seq_name: "t2", seq: "attga", qual: "IIIII"), :mismatches_max => 20).seq)
end
end
class BackTrackTest < Test::Unit::TestCase
def setup
- # 0 1
- # 01234567890123456789
- @seq = Seq.new("test", "tacgatgctagcatgcacgg")
+ # 0 1
+ # 01234567890123456789
+ @seq = Seq.new(seq: "tacgatgctagcatgcacgg")
@seq.extend(BackTrack)
end
end
test "#each_digest dont raise on ok pattern residue" do
- entry = Seq.new("test", "atcg")
+ entry = Seq.new(seq: "atcg")
assert_nothing_raised { entry.each_digest("AGCUTRYWSMKHDVBNagcutrywsmkhdvbn", 4) }
end
test "#each_digest with no match returns correctly" do
- entry = Seq.new("test", "aaaaaaaaaaa")
+ entry = Seq.new(seq: "aaaaaaaaaaa")
assert_equal(entry.seq, entry.each_digest("CGCG", 1).first.seq)
end
test "#each_digest with matches returns correctly" do
- entry = Seq.new("test", "TTTTaaaaTTTTbbbbTTTT")
+ entry = Seq.new(seq: "TTTTaaaaTTTTbbbbTTTT")
seqs = entry.each_digest("TTNT", 1)
assert_equal("T", seqs[0].seq)
assert_equal("TTTaaaaT", seqs[1].seq)
class TestDynamic < Test::Unit::TestCase
def setup
- @p = Seq.new("test", "atcg")
+ @p = Seq.new(seq_name: "test", seq: "atcg")
@p.extend(Dynamic)
end
end
test "#patscan locates three patterns ok" do
- p = Seq.new("test", "ataacgagctagctagctagctgactac")
+ p = Seq.new(seq_name: "test", seq: "ataacgagctagctagctagctgactac")
p.extend(Dynamic)
assert_equal(3, p.patscan("tag").count)
end
test "#patscan with pos locates two patterns ok" do
- p = Seq.new("test", "ataacgagctagctagctagctgactac")
+ p = Seq.new(seq_name: "test", seq: "ataacgagctagctagctagctgactac")
p.extend(Dynamic)
assert_equal(2, p.patscan("tag", 10).count)
end
class HomopolymerTest < Test::Unit::TestCase
def setup
- # 0 1
- # 01234567890123456789
- @entry = Seq.new("test", "tacgatgctagcatgcacgg")
+ # 0 1
+ # 01234567890123456789
+ @entry = Seq.new(seq: "tacgatgctagcatgcacgg")
@entry.extend(Homopolymer)
end
class TestLocator < Test::Unit::TestCase
def setup
- @seq = Seq.new(nil, "tcatgatcaagatctaacagcagaagtacacttctattta", :dna)
+ @seq = Seq.new(seq: "tcatgatcaagatctaacagcagaagtacacttctattta", type: :dna)
end
test "#new with single position returns correctly" do
end
test "Seq.new with differnet length SEQ and SCORES raises" do
- assert_raise(SeqError) { Seq.new("test", "ATCG", "dna", "hhh") }
+ assert_raise(SeqError) { Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "hhh") }
end
test "Seq.new_bp returns correctly" do
end
test "#to_fasta wraps correctly" do
- entry = Seq.new("test", "ATCG")
+ entry = Seq.new(seq_name: "test", seq: "ATCG")
assert_equal(">test\nAT\nCG\n", entry.to_fasta(2))
end
end
test "#to_key with bad residue raises" do
- entry = Seq.new("test", "AUCG")
+ entry = Seq.new(seq_name: "test", seq: "AUCG")
assert_raise(SeqError) { entry.to_key }
end
test "#to_key returns correctly" do
- entry = Seq.new("test", "ATCG")
+ entry = Seq.new(seq_name: "test", seq: "ATCG")
assert_equal(54, entry.to_key)
end
end
test "#hamming_distance returns correctly" do
- seq1 = Seq.new("test1", "ATCG")
- seq2 = Seq.new("test2", "atgg")
+ seq1 = Seq.new(seq: "ATCG")
+ seq2 = Seq.new(seq: "atgg")
assert_equal(1, seq1.hamming_distance(seq2))
end
test "#hamming_distance with ambiguity codes return correctly" do
- seq1 = Seq.new("test1", "ATCG")
- seq2 = Seq.new("test2", "atng")
+ seq1 = Seq.new(seq: "ATCG")
+ seq2 = Seq.new(seq: "atng")
assert_equal(1, seq1.hamming_distance(seq2))
assert_equal(0, seq1.hamming_distance(seq2, ambiguity: true))
end
test "#edit_distance returns correctly" do
- seq1 = Seq.new("test1", "ATCG")
- seq2 = Seq.new("test2", "tgncg")
+ seq1 = Seq.new(seq: "ATCG")
+ seq2 = Seq.new(seq: "tgncg")
assert_equal(2, seq1.edit_distance(seq2))
end
end
test "#+ without qual returns correctly" do
- entry = Seq.new("test1", "at") + Seq.new("test2", "cg")
+ entry = Seq.new(seq_name: "test1", seq: "at") + Seq.new(seq_name: "test2", seq: "cg")
assert_nil(entry.seq_name)
assert_equal("atcg", entry.seq)
assert_nil(entry.type)
end
test "#+ with qual returns correctly" do
- entry = Seq.new("test1", "at", :dna, "II") + Seq.new("test2", "cg", :dna, "JJ")
+ entry = Seq.new(seq_name: "test1", seq: "at", type: :dna, qual: "II") + Seq.new(seq_name: "test2", seq: "cg", type: :dna, qual: "JJ")
assert_nil(entry.seq_name)
assert_equal("atcg", entry.seq)
assert_equal(:dna, entry.type)
test "#<< with different types raises" do
@entry.seq = "atcg"
- assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna) }
+ assert_raise(SeqError) { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
end
test "#<< with missing qual in one entry raises" do
@entry.seq = "atcg"
@entry.type = :dna
- assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna, "IIII") }
+ assert_raise(SeqError) { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
@entry.qual = "IIII"
- assert_raise(SeqError) { @entry << Seq.new("test", "atcg", :dna) }
+ assert_raise(SeqError) { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna) }
end
test "#<< with nil qual in both entries dont raise" do
@entry.seq = "atcg"
- assert_nothing_raised { @entry << Seq.new("test", "atcg") }
+ assert_nothing_raised { @entry << Seq.new(seq_name: "test", seq: "atcg") }
end
test "#<< with qual in both entries dont raise" do
@entry.seq = "atcg"
@entry.type = :dna
@entry.qual = "IIII"
- assert_nothing_raised { @entry << Seq.new("test", "atcg", :dna, "IIII") }
+ assert_nothing_raised { @entry << Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "IIII") }
end
test "#<< without qual returns correctly" do
@entry.seq = "atcg"
- @entry << Seq.new("test", "ATCG")
+ @entry << Seq.new(seq_name: "test", seq: "ATCG")
assert_equal("atcgATCG", @entry.seq)
end
@entry.seq = "atcg"
@entry.type = :dna
@entry.qual = "HHHH"
- @entry << Seq.new("test", "ATCG", :dna, "IIII")
+ @entry << Seq.new(seq_name: "test", seq: "ATCG", type: :dna, qual: "IIII")
assert_equal("atcgATCG", @entry.seq)
assert_equal("HHHHIIII", @entry.qual)
end
test "#[] with qual returns correctly" do
- entry = Seq.new("test", "atcg", :dna, "FGHI")
+ entry = Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
e = entry[2]
end
test "#[] without qual returns correctly" do
- entry = Seq.new("test", "atcg")
+ entry = Seq.new(seq_name: "test", seq: "atcg")
e = entry[2]
end
test "[]= with qual returns correctly" do
- entry = Seq.new("test", "atcg", :dna, "FGHI")
+ entry = Seq.new(seq_name: "test", seq: "atcg", type: :dna, qual: "FGHI")
- entry[0] = Seq.new("foo", "T", :dna, "I")
+ entry[0] = Seq.new(seq_name: "foo", seq: "T", type: :dna, qual: "I")
assert_equal("test", entry.seq_name)
assert_equal("Ttcg", entry.seq)
end
test "[]= without qual returns correctly" do
- entry = Seq.new("test", "atcg")
+ entry = Seq.new(seq_name: "test", seq: "atcg")
- entry[0] = Seq.new("foo", "T")
+ entry[0] = Seq.new(seq_name: "foo", seq: "T")
assert_equal("test", entry.seq_name)
assert_equal("Ttcg", entry.seq)