]> git.donarmstrong.com Git - mothur.git/blob - alignnode.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / alignnode.cpp
1 /*
2  *  alignNode.cpp
3  *  bayesian
4  *
5  *  Created by Pat Schloss on 10/11/11.
6  *  Copyright 2011 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "alignnode.h"
11 #include "taxonomynode.h"
12
13 #include "bayesian.h"
14
15 /**************************************************************************************************/
16
17 AlignNode::AlignNode(string n, int l): TaxonomyNode(n, l){
18
19         alignLength = 0;
20 }
21
22 /**************************************************************************************************/
23
24 void AlignNode::printTheta(){
25     try {
26         m->mothurOut("A:\t"); for(int i=0;i<alignLength;i++){   m->mothurOut(toString(theta[i].A)+ '\t');               }       m->mothurOutEndLine();
27         m->mothurOut("T:\t"); for(int i=0;i<alignLength;i++){   m->mothurOut(toString(theta[i].T)+ '\t');               }       m->mothurOutEndLine();
28         m->mothurOut("G:\t"); for(int i=0;i<alignLength;i++){   m->mothurOut(toString(theta[i].G)+ '\t');               }       m->mothurOutEndLine();
29         m->mothurOut("C:\t"); for(int i=0;i<alignLength;i++){   m->mothurOut(toString(theta[i].C)+ '\t');               }       m->mothurOutEndLine();
30         m->mothurOut("I:\t"); for(int i=0;i<alignLength;i++){   m->mothurOut(toString(theta[i].gap)+ '\t');             }       m->mothurOutEndLine();
31     }
32         catch(exception& e) {
33                 m->errorOut(e, "AlignNode", "printTheta");
34                 exit(1);
35         }
36 }
37
38 /**************************************************************************************************/
39
40 int AlignNode::loadSequence(string& sequence){
41         try {
42         alignLength = (int)sequence.length();   //      this function runs through the alignment and increments the frequency
43         //      of each base for a particular taxon.  we are building the thetas
44         
45         if(theta.size() == 0){
46             theta.resize(alignLength);
47             columnCounts.resize(alignLength, 0);
48         }
49         
50         for(int i=0;i<alignLength;i++){
51             
52             if (m->control_pressed) { return 0; }
53             
54             char base = sequence[i];
55             
56             if(base == 'A')             {       theta[i].A++;   columnCounts[i]++;              }       //      our thetas will be alignLength x 5  
57             else if(base == 'T'){       theta[i].T++;   columnCounts[i]++;              }       //      and we ignore any position that has  
58             else if(base == 'G'){       theta[i].G++;   columnCounts[i]++;              }       //      an ambiguous base call
59             else if(base == 'C'){       theta[i].C++;   columnCounts[i]++;              }
60             else if(base == '-'){       theta[i].gap++; columnCounts[i]++;              }
61             else if(base == 'U'){       theta[i].T++;   columnCounts[i]++;              }
62         }
63         
64         numSeqs++;
65         
66         return 0;
67     }
68         catch(exception& e) {
69                 m->errorOut(e, "AlignNode", "loadSequence");
70                 exit(1);
71         }
72 }       
73
74 /**************************************************************************************************/
75
76 int AlignNode::checkTheta(){
77     try {
78         for(int i=0;i<alignLength;i++){
79             
80             if (m->control_pressed) { return 0; }
81             
82             if(theta[i].gap == columnCounts[i]){
83                 columnCounts[i] = 0;
84             }
85             //        else{
86             //            int maxCount = theta[i].A;
87             //            
88             //            if(theta[i].T > maxCount)   {    maxCount = theta[i].T;  }
89             //            if(theta[i].G > maxCount)   {    maxCount = theta[i].T;  }
90             //            if(theta[i].C > maxCount)   {    maxCount = theta[i].T;  }
91             //            if(theta[i].gap > maxCount) {    maxCount = theta[i].T;  }
92             //        
93             //            if(maxCount < columnCounts[i] * 0.25){// || maxCount == columnCounts[i]){   //remove any column where the maximum frequency is <50%
94             //                columnCounts[i] = 0;
95             //            }
96             //        }
97             
98         }
99         
100         return 0;
101     }
102         catch(exception& e) {
103                 m->errorOut(e, "AlignNode", "checkTheta");
104                 exit(1);
105         }
106 }
107
108 /**************************************************************************************************/
109
110 int AlignNode::addThetas(vector<thetaAlign> newTheta, int newNumSeqs){
111         try {
112         if(alignLength == 0){
113             alignLength = (int)newTheta.size();
114             theta.resize(alignLength);
115             columnCounts.resize(alignLength);
116         }
117         
118         for(int i=0;i<alignLength;i++){ 
119             
120             if (m->control_pressed) { return 0; }
121             
122             theta[i].A += newTheta[i].A;                columnCounts[i] += newTheta[i].A;
123             theta[i].T += newTheta[i].T;                columnCounts[i] += newTheta[i].T;
124             theta[i].G += newTheta[i].G;                columnCounts[i] += newTheta[i].G;
125             theta[i].C += newTheta[i].C;                columnCounts[i] += newTheta[i].C;
126             theta[i].gap += newTheta[i].gap;    columnCounts[i] += newTheta[i].gap;
127         }
128         
129         numSeqs += newNumSeqs;
130         
131         return 0;
132     }
133         catch(exception& e) {
134                 m->errorOut(e, "AlignNode", "addThetas");
135                 exit(1);
136         }
137 }
138
139 /**************************************************************************************************/
140
141 double AlignNode::getSimToConsensus(string& query){
142         try {
143         double similarity = 0;
144         
145         int length = 0;
146         
147         for(int i=0;i<alignLength;i++){
148             
149             if (m->control_pressed) { return similarity; }
150             
151             char base = query[i];
152             
153             if(base != '.' && base != 'N' && columnCounts[i] != 0){
154                 
155                 double fraction = 0;
156                 
157                 if(base == 'A'){
158                     fraction = (int) theta[i].A / (double) columnCounts[i];
159                     similarity += fraction;
160                     length++;
161                 }
162                 else if(base == 'T'){
163                     fraction = (int) theta[i].T / (double) columnCounts[i];
164                     similarity += fraction;
165                     length++;
166                 }
167                 else if(base == 'G'){
168                     fraction = (int) theta[i].G / (double) columnCounts[i];
169                     similarity += fraction;
170                     length++;
171                 }
172                 else if(base == 'C'){
173                     fraction = (int) theta[i].C / (double) columnCounts[i];
174                     similarity += fraction;
175                     length++;
176                 }
177                 else if(base == '-'){
178                     fraction = (int) theta[i].gap / (double) columnCounts[i];
179                     similarity += fraction;
180                     length++;
181                 }
182             }
183         }
184         
185         if(length != 0){
186             similarity /= double(length);
187         }
188         else {
189             similarity = 0;
190         }
191         
192         return similarity;      
193         
194     }
195     catch(exception& e) {
196         m->errorOut(e, "AlignNode", "getSimToConsensus");
197         exit(1);
198     }
199 }
200
201 /**************************************************************************************************/
202
203 double AlignNode::getPxGivenkj_D_j(string& query){      //P(x | k_j, D, j)
204         try {
205         double PxGivenkj_D_j = 0;
206         
207         int count = 0;
208         double alpha = 1 / (double)totalSeqs;   //flat prior
209         
210         
211         for(int s=0;s<alignLength;s++){
212             
213             if (m->control_pressed) { return PxGivenkj_D_j; }
214             
215             char base = query[s];
216             thetaAlign thetaS = theta[s];
217             
218             if(base != '.' && base != 'N' && columnCounts[s] != 0){
219                 double Nkj_s = (double)columnCounts[s]; 
220                 double nkj_si = 0;
221                 
222                 
223                 if(base == 'A')         {       nkj_si = (double)thetaS.A;              }
224                 else if(base == 'T'){   nkj_si = (double)thetaS.T;              }       
225                 else if(base == 'G'){   nkj_si = (double)thetaS.G;              }       
226                 else if(base == 'C'){   nkj_si = (double)thetaS.C;              }       
227                 else if(base == '-'){   nkj_si = (double)thetaS.gap;    }
228                 else if(base == 'U'){   nkj_si = (double)thetaS.T;              }       
229                 
230                 //                      double alpha = pow(0.2, double(Nkj_s)) + 0.0001;        //need to make 1e-4 a variable in future; this is the non-flat prior
231                 
232                 //                      if(columnCounts[s] != nkj_si){                                          //deal only with segregating sites...
233                                 double numerator = nkj_si + alpha;
234                                 double denomenator = Nkj_s + 5.0 * alpha;
235                                 
236                                 PxGivenkj_D_j += log(numerator) - log(denomenator);             
237                                 count++;
238                 //                      }
239             }
240             if(base != '.' && columnCounts[s] == 0 && thetaS.gap == 0){
241                 count = 0;
242                 break;
243             }
244             
245         }
246         
247         if(count == 0){ PxGivenkj_D_j = -1e10;  }       
248         
249         return PxGivenkj_D_j;
250     }
251     catch(exception& e) {
252         m->errorOut(e, "AlignNode", "getPxGivenkj_D_j");
253         exit(1);
254     }
255 }
256
257 /**************************************************************************************************/