3 # Copyright (C) 2007-2010 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This program is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
35 mids = %w{ ACGAGTGCGT ACGCTCGACA AGACGCACTC AGCACTGTAG ATCAGACACG
36 ATATCGCGAG CGTGTCTCTA CTCGCGTGTC TAGTATCAGC TCTCTATGCG
37 TGATACGTCT TACTGAGCTA CATAGTAGTG CGAGAGATAC ATACGACGTA
38 TCACGTACTA CGTCTAGTAC TCTACGTAGC TGTACTACTC ACGACTACAG
39 CGTAGACTAG TACGAGTATG TACTCTCGTG TAGAGACGAG TCGTCGCTCG
40 ACATACGCGT ACGCGAGTAT ACTACTATGT ACTGTACAGT AGACTATACT
41 AGCGTCGTCT AGTACGCTAT ATAGAGTACT CACGCTACGT CAGTAGACGT
42 CGACGTGACT TACACACACT TACACGTGAT TACAGATCGT TACGCTGTCT
43 TAGTGTAGAT TCGATCACGT TCGCACTAGT TCTAGCGACT TCTATACTAT
44 TGACGTATGT TGTGAGTAGT ACAGTATATA ACGCGATCGA ACTAGCAGTA
45 AGCTCACGTA AGTATACATA AGTCGAGAGA AGTGCTACGA CGATCGTATA
46 CGCAGTACGA CGCGTATACA CGTACAGTCA CGTACTCAGA CTACGCTCTA
47 CTATAGCGTA TACGTCATCA TAGTCGCATA TATATATACA TATGCTAGTA
48 TCACGCGAGA TCGATAGTGA TCGCTGCGTA TCTGACGTCA TGAGTCAGTA
49 TGTAGTGTGA TGTCACACGA TGTCGTCGCA ACACATACGC ACAGTCGTGC
50 ACATGACGAC ACGACAGCTC ACGTCTCATC ACTCATCTAC ACTCGCGCAC
51 AGAGCGTCAC AGCGACTAGC AGTAGTGATC AGTGACACAC AGTGTATGTC
52 ATAGATAGAC ATATAGTCGC ATCTACTGAC CACGTAGATC CACGTGTCGC
53 CATACTCTAC CGACACTATC CGAGACGCGC CGTATGCGAC CGTCGATCTC
54 CTACGACTGC CTAGTCACTC CTCTACGCTC CTGTACATAC TAGACTGCAC
55 TAGCGCGCGC TAGCTCTATC TATAGACATC TATGATACGC TCACTCATAC
56 TCATCGAGTC TCGAGCTCTC TCGCAGACAC TCTGTCTCGC TGAGTGACGC
57 TGATGTGTAC TGCTATAGAC TGCTCGCTAC ACGTGCAGCG ACTCACAGAG
58 AGACTCAGCG AGAGAGTGTG AGCTATCGCG AGTCTGACTG AGTGAGCTCG
59 ATAGCTCTCG ATCACGTGCG ATCGTAGCAG ATCGTCTGTG ATGTACGATG
60 ATGTGTCTAG CACACGATAG CACTCGCACG CAGACGTCTG CAGTACTGCG
61 CGACAGCGAG CGATCTGTCG CGCGTGCTAG CGCTCGAGTG CGTGATGACG
62 CTATGTACAG CTCGATATAG CTCGCACGCG CTGCGTCACG CTGTGCGTCG
63 TAGCATACTG TATACATGTG TATCACTCAG TATCTGATAG TCGTGACATG
64 TCTGATCGAG TGACATCTCG TGAGCTAGAG TGATAGAGCG TGCGTGTGCG
65 TGCTAGTCAG TGTATCACAG TGTGCGCGTG
68 count_hash = Hash.new { |hash, key| hash[key] = 0 }
71 mids.each_with_index do |mid, i|
79 options = bp.parse(ARGV, casts)
81 bp.each_record do |record|
82 if record.has_key? :SEQ
83 if mid_hash.has_key? record[:SEQ][4 ... 14]
84 count_hash[record[:SEQ][4 ... 14]] += 1
90 mids.each_with_index do |mid, i|
91 if count_hash[mid] > 0
93 record[:REC_TYPE] = "MID"
94 record[:MID_NUM] = i + 1
95 record[:MID_COUNT] = count_hash[mid]
96 record[:MID_SEQ] = mid
101 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<