5 * Created by Pat Schloss on 10/11/11.
6 * Copyright 2011 Patrick D. Schloss. All rights reserved.
13 /**********************************************************************************************************************/
15 KmerNode::KmerNode(string s, int l, int n) : TaxonomyNode(s, l), kmerSize(n) {
17 int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
19 numPossibleKmers = power4s[kmerSize];
22 kmerVector.assign(numPossibleKmers, 0);
25 m->errorOut(e, "KmerNode", "KmerNode");
30 /**********************************************************************************************************************/
32 void KmerNode::loadSequence(vector<int>& kmerProfile){
34 for(int i=0;i<numPossibleKmers;i++){
35 if (m->control_pressed) { break; }
36 if(kmerVector[i] == 0 && kmerProfile[i] != 0) { numUniqueKmers++; }
38 kmerVector[i] += kmerProfile[i];
44 m->errorOut(e, "KmerNode", "loadSequence");
49 /**********************************************************************************************************************/
51 string KmerNode::getKmerBases(int kmerNumber){
53 // Here we convert the kmer number into the kmer in terms of bases.
55 // Example: Score = 915 (for a 6-mer)
56 // Base6 = (915 / 4^0) % 4 = 915 % 4 = 3 => T [T]
57 // Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A [AT]
58 // Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C [CAT]
59 // Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G [GCAT]
60 // Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T [TGCAT]
61 // Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A [ATGCAT] -> this checks out with the previous method
63 int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
67 if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){ // if the kmer number is the same as the maxKmer then it must
68 for(int i=0;i<kmerSize;i++){ // have had an N in it and so we'll just call it N x kmerSize
73 for(int i=0;i<kmerSize;i++){
74 if (m->control_pressed) { return kmer; }
75 int nt = (int)(kmerNumber / (float)power4s[i]) % 4; // the '%' operator returns the remainder
76 if(nt == 0) { kmer = 'A' + kmer; } // from int-based division ]
77 else if(nt == 1){ kmer = 'C' + kmer; }
78 else if(nt == 2){ kmer = 'G' + kmer; }
79 else if(nt == 3){ kmer = 'T' + kmer; }
85 m->errorOut(e, "KmerNode", "getKmerBases");
90 /**************************************************************************************************/
92 void KmerNode::addThetas(vector<int> newTheta, int newNumSeqs){
94 for(int i=0;i<numPossibleKmers;i++){
95 if (m->control_pressed) { break; }
96 kmerVector[i] += newTheta[i];
99 // if(alignLength == 0){
100 // alignLength = (int)newTheta.size();
101 // theta.resize(alignLength);
102 // columnCounts.resize(alignLength);
105 // for(int i=0;i<alignLength;i++){
106 // theta[i].A += newTheta[i].A; columnCounts[i] += newTheta[i].A;
107 // theta[i].T += newTheta[i].T; columnCounts[i] += newTheta[i].T;
108 // theta[i].G += newTheta[i].G; columnCounts[i] += newTheta[i].G;
109 // theta[i].C += newTheta[i].C; columnCounts[i] += newTheta[i].C;
110 // theta[i].gap += newTheta[i].gap; columnCounts[i] += newTheta[i].gap;
113 numSeqs += newNumSeqs;
115 catch(exception& e) {
116 m->errorOut(e, "KmerNode", "addThetas");
121 /**********************************************************************************************************************/
123 int KmerNode::getNumUniqueKmers(){
125 if(numUniqueKmers == 0){
127 for(int i=0;i<numPossibleKmers;i++){
128 if (m->control_pressed) { return numUniqueKmers; }
129 if(kmerVector[i] != 0){
137 return numUniqueKmers;
140 catch(exception& e) {
141 m->errorOut(e, "KmerNode", "getNumUniqueKmers");
146 /**********************************************************************************************************************/
148 void KmerNode::printTheta(){
150 m->mothurOut(name + "\n");
151 for(int i=0;i<numPossibleKmers;i++){
152 if(kmerVector[i] != 0){
153 m->mothurOut(getKmerBases(i) + '\t' + toString(kmerVector[i]) + "\n");
156 m->mothurOutEndLine();
158 catch(exception& e) {
159 m->errorOut(e, "KmerNode", "printTheta");
164 /**************************************************************************************************/
166 double KmerNode::getSimToConsensus(vector<int>& queryKmerProfile){
170 for(int i=0;i<numPossibleKmers;i++){
171 if (m->control_pressed) { return present; }
172 if(queryKmerProfile[i] != 0 && kmerVector[i] != 0){
177 return present / double(queryKmerProfile.size() - kmerSize + 1);
179 catch(exception& e) {
180 m->errorOut(e, "KmerNode", "getSimToConsensus");
185 /**********************************************************************************************************************/
187 double KmerNode::getPxGivenkj_D_j(vector<int>& queryKmerProfile) {
189 double sumLogProb = 0.0000;
190 double alpha = 1.0 / (double)totalSeqs; //flat prior
191 // double alpha = pow((1.0 / (double)numUniqueKmers), numSeqs)+0.0001; //non-flat prior
193 for(int i=0;i<numPossibleKmers;i++){
194 if (m->control_pressed) { return sumLogProb; }
195 if(queryKmerProfile[i] != 0){ //numUniqueKmers needs to be the value from Root;
196 sumLogProb += log((kmerVector[i] + alpha) / (numSeqs + numUniqueKmers * alpha));
202 catch(exception& e) {
203 m->errorOut(e, "KmerNode", "getPxGivenkj_D_j");
209 /**********************************************************************************************************************/