]> git.donarmstrong.com Git - mothur.git/blob - decalc.cpp
removing mallard
[mothur.git] / decalc.cpp
1 /*
2  *  decalc.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "decalc.h"
11
12 //***************************************************************************************************************
13 void DeCalculator::setMask(string m) { 
14         try {
15                 seqMask = m; 
16                 
17                 if (seqMask.length() != 0) {
18                         //whereever there is a base in the mask, save that value is query and subject
19                         for (int i = 0; i < seqMask.length(); i++) {
20                                 if (isalpha(seqMask[i])) {
21                                         h.insert(i);
22                                 }
23                         }
24                 }else {
25                         for (int i = 0; i < alignLength; i++) {   h.insert(i);  }
26                 }
27         }
28         catch(exception& e) {
29                 errorOut(e, "DeCalculator", "setMask");
30                 exit(1);
31         } 
32 }
33 //***************************************************************************************************************
34 void DeCalculator::runMask(Sequence* seq) {
35         try{
36                 
37                 string q = seq->getAligned();
38                 string tempQuery = "";
39                 
40                 //whereever there is a base in the mask, save that value is query and subject
41                 set<int>::iterator setit;
42                 for ( setit=h.begin() ; setit != h.end(); setit++ )  {
43                         tempQuery += q[*setit];
44                 }
45                 
46                 //save masked values
47                 seq->setAligned(tempQuery);
48                 seq->setUnaligned(tempQuery);
49         }
50         catch(exception& e) {
51                 errorOut(e, "DeCalculator", "runMask");
52                 exit(1);
53         }
54 }
55 //***************************************************************************************************************
56 //num is query's spot in querySeqs
57 void DeCalculator::trimSeqs(Sequence* query, Sequence* subject, map<int, int>& trim) {
58         try {
59                 
60                 string q = query->getAligned();
61                 string s = subject->getAligned();
62                 
63                 int front = 0;
64                 for (int i = 0; i < q.length(); i++) {
65 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
66                         if (isalpha(q[i]) && isalpha(s[i])) { front = i; break;  }
67                 }
68 //cout << endl << endl;         
69                 int back = 0;           
70                 for (int i = q.length(); i >= 0; i--) {
71 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
72                         if (isalpha(q[i]) && isalpha(s[i])) { back = i; break;  }
73                 }
74                 
75                 trim[front] = back;
76                 
77         }
78         catch(exception& e) {
79                 errorOut(e, "DeCalculator", "trimSeqs");
80                 exit(1);
81         }
82 }
83 //***************************************************************************************************************
84 vector<int>  DeCalculator::findWindows(Sequence* query, int front, int back, int& size, int increment) {
85         try {
86                 
87                 vector<int> win; 
88                 
89                 int cutoff = back - front;  //back - front 
90                         
91                 //if window is set to default
92                 if (size == 0) {  if (cutoff > 1200) {  size = 300; }
93                                                         else{  size = (cutoff / 4); }  } 
94                 else if (size > (cutoff / 4)) { 
95                                 mothurOut("You have selected to large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
96                                 size = (cutoff / 4); 
97                 }
98         
99         /*      string seq = query->getAligned().substr(front, cutoff);
100                         
101                 //count bases
102                 int numBases = 0;
103                 for (int l = 0; l < seq.length(); l++) {  if (isalpha(seq[l])) { numBases++; }  }
104 //cout << "num Bases = " << numBases << endl;                   
105                 //save start of seq
106                 win.push_back(front);
107 //cout << front << '\t';                
108                 //move ahead increment bases at a time until all bases are in a window
109                 int countBases = 0;
110                 int totalBases = 0;  //used to eliminate window of blanks at end of sequence
111                         
112                 seq = query->getAligned();
113                 for (int m = front; m < (back - size) ; m++) {
114                                 
115                         //count number of bases you see
116                         if (isalpha(seq[m])) { countBases++;  }
117                                 
118                         //if you have seen enough bases to make a new window
119                         if (countBases >= increment) {
120                                 //total bases is the number of bases in a window already.
121                                 totalBases += countBases;
122 //cout << "total bases = " << totalBases << endl;
123                                 win.push_back(m);  //save spot in alignment
124 //cout << m << '\t';
125                                 countBases = 0;                         //reset bases you've seen in this window
126                         }
127                                 
128                         //no need to continue if all your bases are in a window
129                         if (totalBases == numBases) {   break;  }
130                 }
131         
132
133                 //get last window if needed
134                 if (totalBases < numBases) {   win.push_back(back-size);  }
135 //cout << endl << endl;
136 */      
137                 
138                 //this follows wigeon, but we may want to consider that it chops off the end values if the sequence cannot be evenly divided into steps
139                 for (int m = front; m < (back - size) ; m+=increment) {  win.push_back(m);  }
140
141
142         
143                 return win;
144         
145         }
146         catch(exception& e) {
147                 errorOut(e, "DeCalculator", "findWindows");
148                 exit(1);
149         }
150 }
151
152 //***************************************************************************************************************
153 vector<float> DeCalculator::calcObserved(Sequence* query, Sequence* subject, vector<int> window, int size) {
154         try {
155                 
156                 vector<float> temp;     
157                 //int gaps = 0;         
158                 for (int m = 0; m < window.size(); m++) {
159                                                 
160                         string seqFrag = query->getAligned().substr(window[m], size);
161                         string seqFragsub = subject->getAligned().substr(window[m], size);
162                                 
163                         int diff = 0;
164                         for (int b = 0; b < seqFrag.length(); b++) {
165                                 //if at least one is a base and they are not equal
166                                 if( (isalpha(seqFrag[b]) || isalpha(seqFragsub[b])) && (seqFrag[b] != seqFragsub[b]) ) { diff++; }
167                                 
168                                 //ignore gaps
169                                 //if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
170                         }
171                
172                         //percentage of mismatched bases
173                         float dist;
174                         
175                         //if the whole fragment is 0 distance = 0
176                         //if ((seqFrag.length()-gaps) == 0)  { dist =  0.0; }
177                
178                         //percentage of mismatched bases
179                         //else {  dist = diff / (float) (seqFrag.length()-gaps) * 100;   } 
180                         
181                         dist = diff / (float) (seqFrag.length()) * 100; 
182                         
183                         temp.push_back(dist);
184                 }
185                         
186                 return temp;
187         }
188         catch(exception& e) {
189                 errorOut(e, "DeCalculator", "calcObserved");
190                 exit(1);
191         }
192 }
193 //***************************************************************************************************************
194 float DeCalculator::calcDist(Sequence* query, Sequence* subject, int front, int back) {
195         try {
196                 
197                 //so you only look at the trimmed part of the sequence
198                 int cutoff = back - front;  
199                 int gaps = 0;
200                         
201                 //from first startpoint with length back-front
202                 string seqFrag = query->getAligned().substr(front, cutoff);
203                 string seqFragsub = subject->getAligned().substr(front, cutoff);
204                                                                                                                 
205                 int diff = 0;
206                 for (int b = 0; b < seqFrag.length(); b++) {
207                         //ignore gaps
208                         if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
209                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
210                 }
211                 
212                 //if the whole fragment is 0 distance = 0
213                 if ((seqFrag.length()-gaps) == 0)  { return 0.0; }
214                
215                 //percentage of mismatched bases
216                 float dist = diff / (float) (seqFrag.length()-gaps) * 100;       
217                                 
218                 return dist;
219         }
220         catch(exception& e) {
221                 errorOut(e, "DeCalculator", "calcDist");
222                 exit(1);
223         }
224 }
225
226 //***************************************************************************************************************
227 vector<float> DeCalculator::calcExpected(vector<float> qav, float coef) {
228         try {
229                 
230                 //for each window
231                 vector<float> queryExpected;
232                         
233                 for (int m = 0; m < qav.size(); m++) {          
234                                 
235                         float expected = qav[m] * coef;
236                                 
237                         queryExpected.push_back(expected);      
238                 }
239                         
240                 return queryExpected;
241                                 
242         }
243         catch(exception& e) {
244                 errorOut(e, "DeCalculator", "calcExpected");
245                 exit(1);
246         }
247 }
248 //***************************************************************************************************************
249 float DeCalculator::calcDE(vector<float> obs, vector<float> exp) {
250         try {
251                 
252                 //for each window
253                 float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
254                 for (int m = 0; m < obs.size(); m++) {          sum += ((obs[m] - exp[m]) * (obs[m] - exp[m]));         }
255                         
256                 float de = sqrt((sum / (obs.size() - 1)));
257                         
258                 return de;
259         }
260         catch(exception& e) {
261                 errorOut(e, "DeCalculator", "calcDE");
262                 exit(1);
263         }
264 }
265
266 //***************************************************************************************************************
267
268 vector<float> DeCalculator::calcFreq(vector<Sequence*> seqs, string filename) {
269         try {
270
271                 vector<float> prob;
272                 string freqfile = getRootName(filename) + "freq";
273                 ofstream outFreq;
274                 
275                 openOutputFile(freqfile, outFreq);
276                 
277                 string length = toString(seqs.size());  //if there are 5000 seqs in the template then set precision to 3
278                 int precision = length.length() - 1;
279                 
280                 //format output
281                 outFreq.setf(ios::fixed, ios::floatfield); outFreq.setf(ios::showpoint);
282                 
283                 //at each position in the sequence
284                 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
285                         
286                         vector<int> freq;   freq.resize(4,0);
287                         int gaps = 0;
288                         
289                         //find the frequency of each nucleotide
290                         for (int j = 0; j < seqs.size(); j++) {
291                                 
292                                 char value = seqs[j]->getAligned()[i];
293                                 
294                                 if(toupper(value) == 'A')                                                                       {       freq[0]++;      }
295                                 else if(toupper(value) == 'T' || toupper(value) == 'U')         {       freq[1]++;      }
296                                 else if(toupper(value) == 'G')                                                          {       freq[2]++;      }
297                                 else if(toupper(value) == 'C')                                                          {       freq[3]++;      }
298                                 else { gaps++; }
299                         }
300                         
301                         //find base with highest frequency
302                         int highest = 0;
303                         for (int m = 0; m < freq.size(); m++) {   if (freq[m] > highest) {  highest = freq[m];  }               }
304                         
305                         float highFreq = highest / (float) (seqs.size());       
306                         
307                         float Pi;
308                         Pi =  (highFreq - 0.25) / 0.75; 
309                         
310                         //cannot have probability less than 0.
311                         if (Pi < 0) { Pi = 0.0; }
312                         
313                         //saves this for later
314                         outFreq << setprecision(precision) << i << '\t' << highFreq << endl;
315         
316                         if (h.count(i) > 0) {
317                                 prob.push_back(Pi); 
318                         }
319                 }
320                 
321                 outFreq.close();
322                 
323                 return prob;
324                                 
325         }
326         catch(exception& e) {
327                 errorOut(e, "DeCalculator", "calcFreq");
328                 exit(1);
329         }
330 }
331 //***************************************************************************************************************
332 vector<float>  DeCalculator::findQav(vector<int> window, int size, vector<float> probabilityProfile) {
333         try {
334                 vector<float>  averages; 
335                                 
336                 //for each window find average
337                 for (int m = 0; m < window.size(); m++) {
338                                 
339                         float average = 0.0;
340                                 
341                         //while you are in the window for this sequence
342                         int count = 0;
343                         for (int j = window[m]; j < (window[m]+size); j++) {   
344                                 average += probabilityProfile[j];
345                                 count++;
346                         }
347                                 
348                         average = average / count;
349         
350                         //save this windows average
351                         averages.push_back(average);
352                 }
353                                 
354                 return averages;
355         }
356         catch(exception& e) {
357                 errorOut(e, "DeCalculator", "findQav");
358                 exit(1);
359         }
360 }
361 //********************************************************************************************************************
362 //sorts lowest to highest
363 inline bool compareQuanMembers(quanMember left, quanMember right){
364         return (left.score < right.score);      
365
366 //***************************************************************************************************************
367 //seqs have already been masked
368 vector< vector<quanMember> > DeCalculator::getQuantiles(vector<Sequence*> seqs, vector<int> windowSizesTemplate, int window, vector<float> probProfile, int increment, int start, int end, vector<float>& highestDE) {
369         try {
370                 vector< vector<quanMember> > quan; 
371                 
372                 //percentage of mismatched pairs 1 to 100
373                 quan.resize(100);
374                 
375                 //for each sequence
376                 for(int i = start; i < end; i++){
377                 
378                         mothurOut("Processing sequence " + toString(i)); mothurOutEndLine();
379                         Sequence* query = new Sequence(seqs[i]->getName(), seqs[i]->getAligned());
380                         
381                         //compare to every other sequence in template
382                         for(int j = 0; j < i; j++){
383                                 
384                                 Sequence* subject = new Sequence(seqs[j]->getName(), seqs[j]->getAligned());
385                                 
386                                 map<int, int> trim;
387                                 map<int, int>::iterator it;
388                                 
389                                 trimSeqs(query, subject, trim);
390                                 
391                                 it = trim.begin();
392                                 int front = it->first; int back = it->second;
393                                 
394                                 //reset window for each new comparison
395                                 windowSizesTemplate[i] = window;
396                                 
397                                 vector<int> win = findWindows(query, front, back, windowSizesTemplate[i], increment);
398                                 
399                                 vector<float> obsi = calcObserved(query, subject, win, windowSizesTemplate[i]);
400                                 
401                                 vector<float> q = findQav(win, windowSizesTemplate[i], probProfile);
402                                                                         
403                                 float alpha = getCoef(obsi, q);
404                                                 
405                                 vector<float> exp = calcExpected(q, alpha);
406                                 
407                                 float de = calcDE(obsi, exp);
408                                                                 
409                                 float dist = calcDist(query, subject, front, back); 
410                                 
411                                 dist = ceil(dist);
412                                 
413                                 quanMember newScore(de, i, j);
414                                 
415                                 //dist-1 because vector indexes start at 0.
416                                 quan[dist-1].push_back(newScore);
417                                 
418                                 //save highestDE
419                                 if (de > highestDE[i])  { highestDE[i] = de;  }
420                                 if(de > highestDE[j])   { highestDE[j] = de;  }
421                                 
422                                 delete subject;
423                         }
424                         
425                         delete query;
426                 }
427
428                 return quan;
429                                                 
430         }
431         catch(exception& e) {
432                 errorOut(e, "DeCalculator", "getQuantiles");
433                 exit(1);
434         }
435 }
436
437 //***************************************************************************************************************
438 //this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right.  may want to revisit in the future...
439 vector< vector<float> > DeCalculator::removeObviousOutliers(vector< vector<quanMember> >& quantiles, int num) {
440         try {
441                 vector< vector<float> > quan; 
442                 quan.resize(100);
443         
444                 /*vector<quanMember> contributions;  
445                 vector<int> seen;  //seen[0] is the number of outliers that template seqs[0] was part of.
446                 seen.resize(num,0);
447                                 
448                 //find contributions
449                 for (int i = 0; i < quantiles.size(); i++) {
450                 
451                         //find mean of this quantile score
452                         sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
453                         
454                         float high = quantiles[i][int(quantiles[i].size() * 0.99)].score;
455                         float low =  quantiles[i][int(quantiles[i].size() * 0.01)].score;
456                 
457                         //look at each value in quantiles to see if it is an outlier
458                         for (int j = 0; j < quantiles[i].size(); j++) {
459                                 
460                                 //is this score between 1 and 99%
461                                 if ((quantiles[i][j].score > low) && (quantiles[i][j].score < high)) {
462                                         
463                                 }else {
464                                         //add to contributions
465                                         contributions.push_back(quantiles[i][j]);
466                                         seen[quantiles[i][j].member1]++;
467                                         seen[quantiles[i][j].member2]++;
468                                 }
469                         }
470                 }
471
472                 //find contributer with most offending score related to it
473                 int largestContrib = findLargestContrib(seen);
474         
475                 //while you still have guys to eliminate
476                 while (contributions.size() > 0) {
477                 
478                         mothurOut("Removing scores contributed by sequence " + toString(largestContrib) + " in your template file."); mothurOutEndLine();
479                         
480                         //remove from quantiles all scores that were made using this largestContrib
481                         for (int i = 0; i < quantiles.size(); i++) {
482 //cout << "before remove " << quantiles[i].size() << '\t';
483                                 removeContrib(largestContrib, quantiles[i]);
484 //cout << "after remove " << quantiles[i].size() << endl;
485                         }
486 //cout << count << " largest contrib = " << largestContrib << endl;  count++;
487                         //remove from contributions all scores that were made using this largestContrib
488                         removeContrib(largestContrib, contributions);
489                         
490                         //"erase" largestContrib
491                         seen[largestContrib] = -1;
492                         
493                         //get next largestContrib
494                         largestContrib = findLargestContrib(seen);
495                 }
496 ABOVE IS ATTEMPT #1             
497 **************************************************************************************************
498 BELOW IS ATTEMPT #2             
499                 vector<int> marked = returnObviousOutliers(quantiles, num);
500                 vector<int> copyMarked = marked;
501                 
502                 //find the 99% of marked
503                 sort(copyMarked.begin(), copyMarked.end());
504                 int high = copyMarked[int(copyMarked.size() * 0.99)];
505 cout << "high = " << high << endl;              
506                 
507                 for(int i = 0; i < marked.size(); i++) {
508                         if (marked[i] > high) { 
509                                 mothurOut("Removing scores contributed by sequence " + toString(marked[i]) + " in your template file."); mothurOutEndLine();
510                                 for (int i = 0; i < quantiles.size(); i++) {
511                                         removeContrib(marked[i], quantiles[i]);
512                                 }
513                         }
514
515                 }
516                 
517                 
518                 //adjust quantiles
519                 for (int i = 0; i < quantiles.size(); i++) {
520                         vector<float> temp;
521                         
522                         if (quantiles[i].size() == 0) {
523                                 //in case this is not a distance found in your template files
524                                 for (int g = 0; g < 6; g++) {
525                                         temp.push_back(0.0);
526                                 }
527                         }else{
528                                 
529                                 sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
530                                 
531                                 //save 10%
532                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)].score);
533                                 //save 25%
534                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)].score);
535                                 //save 50%
536                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)].score);
537                                 //save 75%
538                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)].score);
539                                 //save 95%
540                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)].score);
541                                 //save 99%
542                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)].score);
543                                 
544                         }
545                         
546                         quan[i] = temp;
547                         
548                 }
549 */
550                 return quan;
551         }
552         catch(exception& e) {
553                 errorOut(e, "DeCalculator", "removeObviousOutliers");
554                 exit(1);
555         }
556 }
557 //***************************************************************************************************************
558 //follows Mallard algorythn in paper referenced from mallard class
559 vector<int> DeCalculator::returnObviousOutliers(vector< vector<quanMember> > quantiles, int num) {
560         try {
561                 vector< vector<float> > quan; 
562                 quan.resize(100);
563         
564                 map<quanMember*, float> contributions;  //map of quanMember to distance from high or low - how bad is it.
565                 vector<int> marked;  //marked[0] is the penalty of template seqs[0]. the higher the penalty the more likely the sequence is chimeric
566                 marked.resize(num,0);
567                                 
568                 //find contributions
569                 for (int i = 0; i < quantiles.size(); i++) {
570                 
571                         //find mean of this quantile score
572                         sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
573                         
574                         float high = quantiles[i][int(quantiles[i].size() * 0.99)].score;
575                 
576                         //look at each value in quantiles to see if it is an outlier
577                         for (int j = 0; j < quantiles[i].size(); j++) {
578                                 
579                                 //is this score between above 99%
580                                 if (quantiles[i][j].score > high) {
581                                         //find out how "bad" of an outlier you are - so you can rank the outliers
582                                         float dist = quantiles[i][j].score - high;
583                                         contributions[&(quantiles[i][j])] = dist;
584                                         
585                                         //penalizing sequences for being in multiple outliers
586                                         marked[quantiles[i][j].member1]++;
587                                         marked[quantiles[i][j].member2]++;
588                                 }
589                         }
590                 }
591
592                 //find contributer with most offending score related to it
593                 vector<quanMember> outliers = sortContrib(contributions);
594                 
595                 //go through the outliers marking the potential chimeras
596                 for (int i = 0; i < outliers.size(); i++) {
597                         
598                         //who is responsible for this outlying score?  
599                         //if member1 has greater score mark him
600                         //if member2 has greater score mark her
601                         //if they are the same mark both
602                         if (marked[outliers[i].member1] > marked[outliers[i].member2])                          {       marked[outliers[i].member1]++;  }
603                         else if (marked[outliers[i].member2] > marked[outliers[i].member1])                     {       marked[outliers[i].member2]++;  }
604                         else if (marked[outliers[i].member2] == marked[outliers[i].member1])            {       marked[outliers[i].member2]++;  marked[outliers[i].member1]++;  }
605                 }
606                 
607                 return marked;
608         }
609         catch(exception& e) {
610                 errorOut(e, "DeCalculator", "removeObviousOutliers");
611                 exit(1);
612         }
613 }
614 //***************************************************************************************************************
615 //put quanMember in the vector based on how far they are from the 99% or 1%.  Biggest offenders in front.
616 vector<quanMember> DeCalculator::sortContrib(map<quanMember*, float> quan) {
617         try{
618                 
619                 vector<quanMember> newQuan;
620                 map<quanMember*, float>::iterator it;
621                 
622                 while (quan.size() > 0) {
623                         
624                          map<quanMember*, float>::iterator largest = quan.begin(); 
625                           
626                         //find biggest member
627                         for (it = quan.begin(); it != quan.end(); it++) {
628                                 if (it->second > largest->second) {  largest = it;  }
629                         }
630 cout << largest->second << '\t' << largest->first->score << '\t' << largest->first->member1 << '\t' << largest->first->member2 << endl;
631                         //save it 
632                         newQuan.push_back(*(largest->first));
633                 
634                         //erase from quan
635                         quan.erase(largest);
636                 }
637                 
638                 return newQuan;
639                 
640         }
641         catch(exception& e) {
642                 errorOut(e, "DeCalculator", "sortContrib");
643                 exit(1);
644         }
645 }
646
647 //***************************************************************************************************************
648 //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used...
649 /*int DeCalculator::findLargestContrib(vector<int> seen) {
650         try{
651                 
652                 int largest = 0;
653                 
654                 int largestContribs;
655                 
656                 for (int i = 0; i < seen.size(); i++)  {  
657                         
658                         if (seen[i] > largest) {
659                                 largestContribs = i;
660                                 largest = seen[i];
661                         }
662                 }
663                 
664                 return largestContribs;
665                 
666         }
667         catch(exception& e) {
668                 errorOut(e, "DeCalculator", "findLargestContribs");
669                 exit(1);
670         }
671 }
672 //***************************************************************************************************************
673 void DeCalculator::removeContrib(int bad, vector<quanMember>& quan) {
674         try{
675         
676                 vector<quanMember> newQuan;
677                 for (int i = 0; i < quan.size(); i++)  {  
678                         if ((quan[i].member1 != bad) && (quan[i].member2 != bad) ) {  
679                                 newQuan.push_back(quan[i]);  
680                         }
681                 }
682                 
683                 quan = newQuan;
684                 
685         }
686         catch(exception& e) {
687                 errorOut(e, "DeCalculator", "removeContrib");
688                 exit(1);
689         }
690 }
691 */
692 //***************************************************************************************************************
693 float DeCalculator::findAverage(vector<float> myVector) {
694         try{
695                 
696                 float total = 0.0;
697                 for (int i = 0; i < myVector.size(); i++)  {  total += myVector[i];  }
698                 
699                 float average = total / (float) myVector.size();
700                 
701                 return average;
702                 
703         }
704         catch(exception& e) {
705                 errorOut(e, "DeCalculator", "findAverage");
706                 exit(1);
707         }
708 }
709
710 //***************************************************************************************************************
711 float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
712         try {
713         
714                 //find average prob for this seqs windows
715                 float probAverage = findAverage(qav);
716                                 
717                 //find observed average 
718                 float obsAverage = findAverage(obs);
719                         
720                 float coef = obsAverage / probAverage;
721                                                 
722                 return coef;
723         }
724         catch(exception& e) {
725                 errorOut(e, "DeCalculator", "getCoef");
726                 exit(1);
727         }
728 }
729 //***************************************************************************************************************
730
731