]> git.donarmstrong.com Git - biopieces.git/blob - code_python/Cjung/calc_density
fixed seq qual length check
[biopieces.git] / code_python / Cjung / calc_density
1 #!/usr/bin/python
2
3 import os, string, sys, getopt, Args, optparse
4
5 record_delimiter = "\n---\n"
6
7 class Calc_density:
8         in_stream = None
9         out_stream = None
10         eo_buffer = False
11         buffer = ''
12         rec_dic = {}
13         rec_num = 0
14
15         density_dic = {}
16         win_size = 10000
17         step = win_size
18
19         ###########################################
20         def __init__(self):
21                 pass
22         ###########################################
23
24         ###########################################
25         def set_values(self, win_size, step):
26                 #if step > win_size:
27                 #       return False
28                 self.win_size = win_size
29                 self.step = step
30                 return True
31         ###########################################
32
33         ###########################################
34         def open_streams(self, input_file, output_file):
35                 if input_file == '':
36                         self.in_stream = sys.stdin
37                 else:
38                         try:
39                                 self.in_stream = open(input_file, 'r')
40                         except:
41                                 raise IOError
42
43                 if output_file == '':
44                         self.out_stream = sys.stdout
45                 else:
46                         try:
47                                 self.out_stream = open(output_file, 'w')
48                         except:
49                                 raise IOError
50         ###########################################
51
52         ###########################################
53         def close_streams(self):
54                 if self.in_stream:
55                         self.in_stream.close()
56                 if self.out_stream:
57                         self.out_stream.close()
58         ###########################################
59
60         ###########################################
61         def get_record(self):
62                 rec = ''
63                 eof_flag = False
64                 while not self.eo_buffer:
65                         if self.in_stream.isatty():
66                                 eof_flag = True
67
68                         if eof_flag:
69                                 if self.buffer == '':
70                                         self.eo_buffer = True
71                                         break
72                         else:
73                                 tmp = self.in_stream.read(1000)
74                         if not tmp:
75                                 eof_flag = True
76
77                         self.buffer = self.buffer + tmp
78                         delim_index = self.buffer.find(record_delimiter)
79                         if delim_index >= 0:
80                                 rec = self.buffer[:delim_index]
81                                 self.buffer = self.buffer[delim_index + len(record_delimiter):]
82                                 break
83                 return rec
84         ###########################################
85
86         ###########################################
87         def process_record(self, rec):
88                 lines = rec.split("\n")
89                 self.rec_num += 1
90                 self.rec_dic[self.rec_num] = {}
91
92                 chr = ''
93                 chr_beg = -1
94                 score = 1
95
96                 for l in lines:
97                         toks = l.split(": ")
98                         self.rec_dic[self.rec_num][toks[0]] = toks[1]
99                         if toks[0]=='SCORE':
100                                 score = string.atoi(toks[1])
101
102                 chr = self.rec_dic[self.rec_num]["CHR"]
103                 chr_beg = string.atoi(self.rec_dic[self.rec_num]["CHR_BEG"])
104
105                 if not self.density_dic.has_key(chr):
106                         self.density_dic[chr] = {}
107
108                 k_cur = (chr_beg / step) * step
109                 if chr_beg in range(k_cur, k_cur+win_size):
110                         if not self.density_dic[chr].has_key(k_cur):
111                                 self.density_dic[chr][k_cur] = 0
112                         self.density_dic[chr][k_cur] += score
113
114                 tmp_k_cur = k_cur
115                 while(1):
116                         k_prev = tmp_k_cur - step
117                         if not chr_beg in range(k_prev, k_prev+win_size):
118                                 break
119                         if not self.density_dic[chr].has_key(k_prev):
120                                 self.density_dic[chr][k_prev] = 0
121                         self.density_dic[chr][k_prev] += score
122                         tmp_k_cur = k_prev
123
124                 tmp_k_cur = k_cur
125                 while(1):
126                         k_next = tmp_k_cur + step
127                         if not chr_beg in range(k_next, k_next+win_size):
128                                 break
129
130                         if not self.density_dic[chr].has_key(k_next):
131                                 self.density_dic[chr][k_next] = 0
132                         self.density_dic[chr][k_next] += score
133                         tmp_k_cur = k_next
134
135                 return self.rec_num
136         ###########################################
137
138         ###########################################
139         def put_record(self, r_num):
140                 rec = self.rec_dic[r_num]
141                 for k in rec.keys():
142                         self.out_stream.write("%s: %s\n" % (k, rec[k]))
143                 print "---"
144         ###########################################
145
146         ###########################################
147         def put_new_record(self):
148                 chrs = self.density_dic.keys()
149                 chrs.sort()
150                 for chr in chrs:
151                         max_k = max(self.density_dic[chr].keys())
152                         for k in range(0, max_k+self.step, step):
153                                 chr_beg, chr_end = k, k+self.win_size
154                                 try:
155                                         count = self.density_dic[chr][k]
156                                 except:
157                                         count = 0
158                                 self.out_stream.write("WIN_CHR: %s\n" % (chr))
159                                 self.out_stream.write("WIN_CHR_BEG: %d\n" % (chr_beg))
160                                 self.out_stream.write("WIN_CHR_END: %d\n" % (chr_end))
161                                 self.out_stream.write("WIN_DENSITY: %d\n" % (count))
162                                 self.out_stream.write("---\n")
163         ###########################################
164
165         ###########################################
166         def print_usage(self, opt):
167                 #print opt
168                 bp_dir = os.environ['BP_DIR']
169                 usage_path = bp_dir + os.path.sep + "bp_usage" + os.path.sep + "calc_density.wiki"
170                 os.system("print_wiki -i %s %s" % (usage_path, opt))
171         ###########################################
172
173
174 # main
175
176 den = Calc_density()
177
178 try:
179         opts, args = getopt.getopt(sys.argv[1:], "I:O:?vxw:s:", ["stream_in=", "stream_out=", "help", "verbose", "no_stream", "window_size=", "step_size="])
180 except getopt.GetoptError, err:
181         # print help information and exit:
182         print str(err) # will print something like "option -a not recognized"
183         den.print_usage("")
184         sys.exit(2)
185
186 if len(opts)==0:
187         if os.isatty( sys.stdin.fileno() ):
188                 den.print_usage("")
189                 sys.exit(1)
190
191 stream_in = ""
192 stream_out = ""
193 verbose = False
194
195 win_size_flag = 0
196 step_flag = 0
197
198 for o, a in opts:
199         if o in ("-I", "--stream_in"):
200                 stream_in = a
201         elif o in ("-O", "--stream_out"):
202                 stream_out = a
203         elif o == "-?":
204                 den.print_usage("-?")
205                 sys.exit(1)
206         elif o == "--help":
207                 den.print_usage("-?")
208                 sys.exit(1)
209         elif o in ("-v", "--verbose"):
210                 verbose = True
211         elif o in ("-w", "--window_size"):
212                 win_size = string.atoi(a)
213                 win_size_flag = 1
214         elif o in ("-s", "--step_size"):
215                 step = string.atoi(a)
216                 step_flag = 1
217         
218 # default values
219 if not win_size_flag:
220         win_size = 10000
221 if not step_flag:
222         step = win_size         # default increment step is win_size
223
224 if not den.set_values(win_size, step):
225         sys.stderr.write("ERROR!\n")
226         sys.stderr.write("increment step must not be larger than window size\n")
227         sys.exit(1)
228
229 try:
230         den.open_streams(stream_in, stream_out)
231 except:
232         sys.stderr.write("%s\n" % ("IOError"))
233         sys.exit(1)
234
235 if len(opts)==0:
236         if not den.in_stream:
237                 den.print_usage("")
238                 sys.exit(1)
239
240
241 while True:
242         rec = den.get_record()
243         if rec=='':
244                 break
245         rec_num = den.process_record(rec)
246         den.put_record(rec_num)
247 den.put_new_record()    # print out newly generated records. i.e., WIN_CHR, WIN_CHR_BEG, WIN_CHR_END and WIN_DENSITY for this script.
248
249
250 den.close_streams()
251
252