5 * Created by Pat Schloss on 10/11/11.
6 * Copyright 2011 Patrick D. Schloss. All rights reserved.
10 #include "alignnode.h"
11 #include "taxonomynode.h"
15 /**************************************************************************************************/
17 AlignNode::AlignNode(string n, int l): TaxonomyNode(n, l){
22 /**************************************************************************************************/
24 void AlignNode::printTheta(){
26 m->mothurOut("A:\t"); for(int i=0;i<alignLength;i++){ m->mothurOut(toString(theta[i].A)+ '\t'); } m->mothurOutEndLine();
27 m->mothurOut("T:\t"); for(int i=0;i<alignLength;i++){ m->mothurOut(toString(theta[i].T)+ '\t'); } m->mothurOutEndLine();
28 m->mothurOut("G:\t"); for(int i=0;i<alignLength;i++){ m->mothurOut(toString(theta[i].G)+ '\t'); } m->mothurOutEndLine();
29 m->mothurOut("C:\t"); for(int i=0;i<alignLength;i++){ m->mothurOut(toString(theta[i].C)+ '\t'); } m->mothurOutEndLine();
30 m->mothurOut("I:\t"); for(int i=0;i<alignLength;i++){ m->mothurOut(toString(theta[i].gap)+ '\t'); } m->mothurOutEndLine();
33 m->errorOut(e, "AlignNode", "printTheta");
38 /**************************************************************************************************/
40 int AlignNode::loadSequence(string& sequence){
42 alignLength = (int)sequence.length(); // this function runs through the alignment and increments the frequency
43 // of each base for a particular taxon. we are building the thetas
45 if(theta.size() == 0){
46 theta.resize(alignLength);
47 columnCounts.resize(alignLength, 0);
50 for(int i=0;i<alignLength;i++){
52 if (m->control_pressed) { return 0; }
54 char base = sequence[i];
56 if(base == 'A') { theta[i].A++; columnCounts[i]++; } // our thetas will be alignLength x 5
57 else if(base == 'T'){ theta[i].T++; columnCounts[i]++; } // and we ignore any position that has
58 else if(base == 'G'){ theta[i].G++; columnCounts[i]++; } // an ambiguous base call
59 else if(base == 'C'){ theta[i].C++; columnCounts[i]++; }
60 else if(base == '-'){ theta[i].gap++; columnCounts[i]++; }
61 else if(base == 'U'){ theta[i].T++; columnCounts[i]++; }
69 m->errorOut(e, "AlignNode", "loadSequence");
74 /**************************************************************************************************/
76 int AlignNode::checkTheta(){
78 for(int i=0;i<alignLength;i++){
80 if (m->control_pressed) { return 0; }
82 if(theta[i].gap == columnCounts[i]){
86 // int maxCount = theta[i].A;
88 // if(theta[i].T > maxCount) { maxCount = theta[i].T; }
89 // if(theta[i].G > maxCount) { maxCount = theta[i].T; }
90 // if(theta[i].C > maxCount) { maxCount = theta[i].T; }
91 // if(theta[i].gap > maxCount) { maxCount = theta[i].T; }
93 // if(maxCount < columnCounts[i] * 0.25){// || maxCount == columnCounts[i]){ //remove any column where the maximum frequency is <50%
94 // columnCounts[i] = 0;
102 catch(exception& e) {
103 m->errorOut(e, "AlignNode", "checkTheta");
108 /**************************************************************************************************/
110 int AlignNode::addThetas(vector<thetaAlign> newTheta, int newNumSeqs){
112 if(alignLength == 0){
113 alignLength = (int)newTheta.size();
114 theta.resize(alignLength);
115 columnCounts.resize(alignLength);
118 for(int i=0;i<alignLength;i++){
120 if (m->control_pressed) { return 0; }
122 theta[i].A += newTheta[i].A; columnCounts[i] += newTheta[i].A;
123 theta[i].T += newTheta[i].T; columnCounts[i] += newTheta[i].T;
124 theta[i].G += newTheta[i].G; columnCounts[i] += newTheta[i].G;
125 theta[i].C += newTheta[i].C; columnCounts[i] += newTheta[i].C;
126 theta[i].gap += newTheta[i].gap; columnCounts[i] += newTheta[i].gap;
129 numSeqs += newNumSeqs;
133 catch(exception& e) {
134 m->errorOut(e, "AlignNode", "addThetas");
139 /**************************************************************************************************/
141 double AlignNode::getSimToConsensus(string& query){
143 double similarity = 0;
147 for(int i=0;i<alignLength;i++){
149 if (m->control_pressed) { return similarity; }
151 char base = query[i];
153 if(base != '.' && base != 'N' && columnCounts[i] != 0){
158 fraction = (int) theta[i].A / (double) columnCounts[i];
159 similarity += fraction;
162 else if(base == 'T'){
163 fraction = (int) theta[i].T / (double) columnCounts[i];
164 similarity += fraction;
167 else if(base == 'G'){
168 fraction = (int) theta[i].G / (double) columnCounts[i];
169 similarity += fraction;
172 else if(base == 'C'){
173 fraction = (int) theta[i].C / (double) columnCounts[i];
174 similarity += fraction;
177 else if(base == '-'){
178 fraction = (int) theta[i].gap / (double) columnCounts[i];
179 similarity += fraction;
186 similarity /= double(length);
195 catch(exception& e) {
196 m->errorOut(e, "AlignNode", "getSimToConsensus");
201 /**************************************************************************************************/
203 double AlignNode::getPxGivenkj_D_j(string& query){ //P(x | k_j, D, j)
205 double PxGivenkj_D_j = 0;
208 double alpha = 1 / (double)totalSeqs; //flat prior
211 for(int s=0;s<alignLength;s++){
213 if (m->control_pressed) { return PxGivenkj_D_j; }
215 char base = query[s];
216 thetaAlign thetaS = theta[s];
218 if(base != '.' && base != 'N' && columnCounts[s] != 0){
219 double Nkj_s = (double)columnCounts[s];
223 if(base == 'A') { nkj_si = (double)thetaS.A; }
224 else if(base == 'T'){ nkj_si = (double)thetaS.T; }
225 else if(base == 'G'){ nkj_si = (double)thetaS.G; }
226 else if(base == 'C'){ nkj_si = (double)thetaS.C; }
227 else if(base == '-'){ nkj_si = (double)thetaS.gap; }
228 else if(base == 'U'){ nkj_si = (double)thetaS.T; }
230 // double alpha = pow(0.2, double(Nkj_s)) + 0.0001; //need to make 1e-4 a variable in future; this is the non-flat prior
232 // if(columnCounts[s] != nkj_si){ //deal only with segregating sites...
233 double numerator = nkj_si + alpha;
234 double denomenator = Nkj_s + 5.0 * alpha;
236 PxGivenkj_D_j += log(numerator) - log(denomenator);
240 if(base != '.' && columnCounts[s] == 0 && thetaS.gap == 0){
247 if(count == 0){ PxGivenkj_D_j = -1e10; }
249 return PxGivenkj_D_j;
251 catch(exception& e) {
252 m->errorOut(e, "AlignNode", "getPxGivenkj_D_j");
257 /**************************************************************************************************/