3 # Copyright (C) 2007-2010 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This program is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Find and count MID tags in sequences in the stream.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
37 mids = %w{ ACGAGTGCGT ACGCTCGACA AGACGCACTC AGCACTGTAG ATCAGACACG
38 ATATCGCGAG CGTGTCTCTA CTCGCGTGTC TAGTATCAGC TCTCTATGCG
39 TGATACGTCT TACTGAGCTA CATAGTAGTG CGAGAGATAC ATACGACGTA
40 TCACGTACTA CGTCTAGTAC TCTACGTAGC TGTACTACTC ACGACTACAG
41 CGTAGACTAG TACGAGTATG TACTCTCGTG TAGAGACGAG TCGTCGCTCG
42 ACATACGCGT ACGCGAGTAT ACTACTATGT ACTGTACAGT AGACTATACT
43 AGCGTCGTCT AGTACGCTAT ATAGAGTACT CACGCTACGT CAGTAGACGT
44 CGACGTGACT TACACACACT TACACGTGAT TACAGATCGT TACGCTGTCT
45 TAGTGTAGAT TCGATCACGT TCGCACTAGT TCTAGCGACT TCTATACTAT
46 TGACGTATGT TGTGAGTAGT ACAGTATATA ACGCGATCGA ACTAGCAGTA
47 AGCTCACGTA AGTATACATA AGTCGAGAGA AGTGCTACGA CGATCGTATA
48 CGCAGTACGA CGCGTATACA CGTACAGTCA CGTACTCAGA CTACGCTCTA
49 CTATAGCGTA TACGTCATCA TAGTCGCATA TATATATACA TATGCTAGTA
50 TCACGCGAGA TCGATAGTGA TCGCTGCGTA TCTGACGTCA TGAGTCAGTA
51 TGTAGTGTGA TGTCACACGA TGTCGTCGCA ACACATACGC ACAGTCGTGC
52 ACATGACGAC ACGACAGCTC ACGTCTCATC ACTCATCTAC ACTCGCGCAC
53 AGAGCGTCAC AGCGACTAGC AGTAGTGATC AGTGACACAC AGTGTATGTC
54 ATAGATAGAC ATATAGTCGC ATCTACTGAC CACGTAGATC CACGTGTCGC
55 CATACTCTAC CGACACTATC CGAGACGCGC CGTATGCGAC CGTCGATCTC
56 CTACGACTGC CTAGTCACTC CTCTACGCTC CTGTACATAC TAGACTGCAC
57 TAGCGCGCGC TAGCTCTATC TATAGACATC TATGATACGC TCACTCATAC
58 TCATCGAGTC TCGAGCTCTC TCGCAGACAC TCTGTCTCGC TGAGTGACGC
59 TGATGTGTAC TGCTATAGAC TGCTCGCTAC ACGTGCAGCG ACTCACAGAG
60 AGACTCAGCG AGAGAGTGTG AGCTATCGCG AGTCTGACTG AGTGAGCTCG
61 ATAGCTCTCG ATCACGTGCG ATCGTAGCAG ATCGTCTGTG ATGTACGATG
62 ATGTGTCTAG CACACGATAG CACTCGCACG CAGACGTCTG CAGTACTGCG
63 CGACAGCGAG CGATCTGTCG CGCGTGCTAG CGCTCGAGTG CGTGATGACG
64 CTATGTACAG CTCGATATAG CTCGCACGCG CTGCGTCACG CTGTGCGTCG
65 TAGCATACTG TATACATGTG TATCACTCAG TATCTGATAG TCGTGACATG
66 TCTGATCGAG TGACATCTCG TGAGCTAGAG TGATAGAGCG TGCGTGTGCG
67 TGCTAGTCAG TGTATCACAG TGTGCGCGTG ACACGACGAC ACACGTAGTA
68 ACACTACTCG ACGACACGTA ACGAGTAGAC ACGCGTCTAG ACGTACACAC
69 ACGTACTGTG ACGTAGATCG ACTACGTCTC ACTATACGAG ACTCGCGTCG
73 count_hash = Hash.new { |hash, key| hash[key] = 0 }
76 mids.each_with_index do |mid, i|
81 casts << {:long=>'pos', :short=>'p', :type=>'uint', :mandatory=>false, :default=>0, :allowed=>nil, :disallowed=>nil}
85 options = bp.parse(ARGV, casts)
89 bp.each_record do |record|
90 if record.has_key? :SEQ
91 tag = record[:SEQ][pos ... pos + MID_LEN].upcase
93 if mid_hash.has_key? tag
100 mids.each_with_index do |mid, i|
101 if count_hash[mid] > 0
103 record[:REC_TYPE] = "MID"
104 record[:MID_NUM] = i + 1
105 record[:MID_COUNT] = count_hash[mid]
106 record[:MID_SEQ] = mid
111 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<