]> git.donarmstrong.com Git - mothur.git/blob - kmer.cpp
2c2783384c3849c2941509547d2a67b83dee6f9f
[mothur.git] / kmer.cpp
1 /*
2  *  kmer.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include <string>
13 #include <vector>
14
15 #include "kmer.hpp"
16
17 /**************************************************************************************************/
18
19 Kmer::Kmer(int size) : kmerSize(size) {
20         
21         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
22         maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1;
23         
24 }
25
26 /**************************************************************************************************/
27
28 string Kmer::getKmerString(string sequence){
29         int length = sequence.length();
30         int nKmers = length - kmerSize + 1;
31         vector<int> counts(maxKmer, 0);
32         
33         for(int i=0;i<nKmers;i++){
34                 int kmerNumber = getKmerNumber(sequence, i);
35                 counts[kmerNumber]++;
36         }
37         
38         string kmerString = "";
39         for(int i=0;i<maxKmer;i++){
40                 kmerString += getASCII(counts[i]);
41         }               
42         
43         return kmerString;      
44 }
45         
46 /**************************************************************************************************/
47
48 int Kmer::getKmerNumber(string sequence, int index){
49         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
50         int kmer = 0;
51         for(int i=0;i<kmerSize;i++){
52                 if(toupper(sequence[i+index]) == 'A')           {       kmer += (0 * power4s[kmerSize-i-1]);    }
53                 else if(toupper(sequence[i+index]) == 'C')      {       kmer += (1 * power4s[kmerSize-i-1]);    }
54                 else if(toupper(sequence[i+index]) == 'G')      {       kmer += (2 * power4s[kmerSize-i-1]);    }
55                 else if(toupper(sequence[i+index]) == 'U')      {       kmer += (3 * power4s[kmerSize-i-1]);    }
56                 else if(toupper(sequence[i+index]) == 'T')      {       kmer += (3 * power4s[kmerSize-i-1]);    }
57                 else if(toupper(sequence[i+index]) == 'N')      {       return (int)power4s[kmerSize];                  }
58         }
59         return kmer;    
60 }
61         
62 /**************************************************************************************************/
63         
64 string Kmer::getKmerBases(int kmerNumber){
65         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
66         
67         string kmer = "";
68         
69         if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){       
70                 for(int i=0;i<kmerSize;i++){
71                         kmer += 'N';
72                 }
73         }
74         else{
75                 for(int i=0;i<kmerSize;i++){
76                         int nt = (int)(kmerNumber / (float)power4s[i]) % 4;
77                         if(nt == 0)             {       kmer = 'A' + kmer;      }
78                         else if(nt == 1){       kmer = 'C' + kmer;      }
79                         else if(nt == 2){       kmer = 'G' + kmer;      }
80                         else if(nt == 3){       kmer = 'T' + kmer;      }
81                 }
82         }
83         return kmer;
84 }
85
86 /**************************************************************************************************/
87
88 char Kmer::getASCII(int number)         {       return (char)(33+number);                       }
89
90 /**************************************************************************************************/
91
92 int Kmer::getNumber(char character)     {       return ((int)(character-'!'));          }
93
94 /**************************************************************************************************/