]> git.donarmstrong.com Git - mothur.git/blob - kmernode.cpp
changes while testing
[mothur.git] / kmernode.cpp
1 /*
2  *  kmerNode.cpp
3  *  bayesian
4  *
5  *  Created by Pat Schloss on 10/11/11.
6  *  Copyright 2011 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "kmernode.h"
11
12
13 /**********************************************************************************************************************/
14
15 KmerNode::KmerNode(string s, int l, int n) : TaxonomyNode(s, l), kmerSize(n) {
16         try {
17         int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
18         
19         numPossibleKmers = power4s[kmerSize];
20         numUniqueKmers = 0;
21         
22         kmerVector.assign(numPossibleKmers, 0);
23     }
24         catch(exception& e) {
25                 m->errorOut(e, "KmerNode", "KmerNode");
26                 exit(1);
27         }
28 }
29
30 /**********************************************************************************************************************/
31
32 void KmerNode::loadSequence(vector<int>& kmerProfile){
33         try {
34         for(int i=0;i<numPossibleKmers;i++){
35             if (m->control_pressed) { break; }
36             if(kmerVector[i] == 0 && kmerProfile[i] != 0)       {       numUniqueKmers++;       }
37             
38             kmerVector[i] += kmerProfile[i];
39         }
40         
41         numSeqs++;
42     }
43         catch(exception& e) {
44                 m->errorOut(e, "KmerNode", "loadSequence");
45                 exit(1);
46         }
47 }       
48
49 /**********************************************************************************************************************/
50
51 string KmerNode::getKmerBases(int kmerNumber){
52         try {
53         //      Here we convert the kmer number into the kmer in terms of bases.
54         //
55         //      Example:        Score = 915 (for a 6-mer)
56         //                              Base6 = (915 / 4^0) % 4 = 915 % 4 = 3 => T      [T]
57         //                              Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A      [AT]
58         //                              Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C       [CAT]
59         //                              Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G       [GCAT]
60         //                              Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T        [TGCAT]
61         //                              Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A        [ATGCAT] -> this checks out with the previous method
62         
63         int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
64         
65         string kmer = "";
66         
67         if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){       //      if the kmer number is the same as the maxKmer then it must
68             for(int i=0;i<kmerSize;i++){                                        //      have had an N in it and so we'll just call it N x kmerSize
69                 kmer += 'N';
70             }
71         }
72         else{
73             for(int i=0;i<kmerSize;i++){
74                 if (m->control_pressed) { return kmer; }
75                 int nt = (int)(kmerNumber / (float)power4s[i]) % 4;             //      the '%' operator returns the remainder 
76                 if(nt == 0)             {       kmer = 'A' + kmer;      }                               //      from int-based division ]
77                 else if(nt == 1){       kmer = 'C' + kmer;      }
78                 else if(nt == 2){       kmer = 'G' + kmer;      }
79                 else if(nt == 3){       kmer = 'T' + kmer;      }
80             }
81         }
82         return kmer;
83     }
84         catch(exception& e) {
85                 m->errorOut(e, "KmerNode", "getKmerBases");
86                 exit(1);
87         }
88 }
89
90 /**************************************************************************************************/
91
92 void KmerNode::addThetas(vector<int> newTheta, int newNumSeqs){
93         try {
94         for(int i=0;i<numPossibleKmers;i++){
95             if (m->control_pressed) { break; }
96             kmerVector[i] += newTheta[i];               
97         }
98         
99         //      if(alignLength == 0){
100         //              alignLength = (int)newTheta.size();
101         //              theta.resize(alignLength);
102         //              columnCounts.resize(alignLength);
103         //      }
104         //      
105         //      for(int i=0;i<alignLength;i++){ 
106         //              theta[i].A += newTheta[i].A;            columnCounts[i] += newTheta[i].A;
107         //              theta[i].T += newTheta[i].T;            columnCounts[i] += newTheta[i].T;
108         //              theta[i].G += newTheta[i].G;            columnCounts[i] += newTheta[i].G;
109         //              theta[i].C += newTheta[i].C;            columnCounts[i] += newTheta[i].C;
110         //              theta[i].gap += newTheta[i].gap;        columnCounts[i] += newTheta[i].gap;
111         //      }
112         
113         numSeqs += newNumSeqs;
114     }
115         catch(exception& e) {
116                 m->errorOut(e, "KmerNode", "addThetas");
117                 exit(1);
118         }
119 }
120
121 /**********************************************************************************************************************/
122
123 int KmerNode::getNumUniqueKmers(){
124     try {
125         if(numUniqueKmers == 0){
126             
127             for(int i=0;i<numPossibleKmers;i++){
128                 if (m->control_pressed) { return numUniqueKmers; }
129                 if(kmerVector[i] != 0){
130                     numUniqueKmers++;
131                 }
132                 
133             }
134             
135         }
136         
137         return numUniqueKmers;  
138         
139     }
140         catch(exception& e) {
141                 m->errorOut(e, "KmerNode", "getNumUniqueKmers");
142                 exit(1);
143         }
144 }
145
146 /**********************************************************************************************************************/
147
148 void KmerNode::printTheta(){
149         try {
150         m->mothurOut(name + "\n");
151         for(int i=0;i<numPossibleKmers;i++){
152             if(kmerVector[i] != 0){
153                 m->mothurOut(getKmerBases(i) + '\t' + toString(kmerVector[i]) + "\n");
154             }
155         }
156         m->mothurOutEndLine();  
157     }
158         catch(exception& e) {
159                 m->errorOut(e, "KmerNode", "printTheta");
160                 exit(1);
161         }
162         
163 }
164 /**************************************************************************************************/
165
166 double KmerNode::getSimToConsensus(vector<int>& queryKmerProfile){
167         try {
168         double present = 0;
169         
170         for(int i=0;i<numPossibleKmers;i++){
171             if (m->control_pressed) { return present; }
172             if(queryKmerProfile[i] != 0 && kmerVector[i] != 0){
173                 present++;
174             }
175         }       
176         
177         return present / double(queryKmerProfile.size() - kmerSize + 1);
178     }
179         catch(exception& e) {
180                 m->errorOut(e, "KmerNode", "getSimToConsensus");
181                 exit(1);
182         }
183 }
184
185 /**********************************************************************************************************************/
186
187 double KmerNode::getPxGivenkj_D_j(vector<int>& queryKmerProfile)        {       
188         try {
189         double sumLogProb = 0.0000;
190         double alpha = 1.0 / (double)totalSeqs; //flat prior
191         //      double alpha = pow((1.0 / (double)numUniqueKmers), numSeqs)+0.0001;     //non-flat prior
192         
193         for(int i=0;i<numPossibleKmers;i++){
194             if (m->control_pressed) { return sumLogProb; }
195             if(queryKmerProfile[i] != 0){               //numUniqueKmers needs to be the value from Root;
196                 sumLogProb += log((kmerVector[i] + alpha) / (numSeqs + numUniqueKmers * alpha));
197             }
198             
199         }
200         return sumLogProb;
201     }
202         catch(exception& e) {
203                 m->errorOut(e, "KmerNode", "getPxGivenkj_D_j");
204                 exit(1);
205         }
206
207 }
208
209 /**********************************************************************************************************************/