# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-#
+# Find and count MID tags in sequences in the stream.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
CTATGTACAG CTCGATATAG CTCGCACGCG CTGCGTCACG CTGTGCGTCG
TAGCATACTG TATACATGTG TATCACTCAG TATCTGATAG TCGTGACATG
TCTGATCGAG TGACATCTCG TGAGCTAGAG TGATAGAGCG TGCGTGTGCG
- TGCTAGTCAG TGTATCACAG TGTGCGCGTG
+ TGCTAGTCAG TGTATCACAG TGTGCGCGTG ACACGACGAC ACACGTAGTA
+ ACACTACTCG ACGACACGTA ACGAGTAGAC ACGCGTCTAG ACGTACACAC
+ ACGTACTGTG ACGTAGATCG ACTACGTCTC ACTATACGAG ACTCGCGTCG
}
+
count_hash = Hash.new { |hash, key| hash[key] = 0 }
mid_hash = {}
bp.each_record do |record|
if record.has_key? :SEQ
- if mid_hash.has_key? record[:SEQ][4 ... 14]
- count_hash[record[:SEQ][4 ... 14]] += 1
+ tag = record[:SEQ][4 ... 14].upcase
+
+ if mid_hash.has_key? tag
+ count_hash[tag] += 1
end
end
bp.puts record