]> git.donarmstrong.com Git - mothur.git/blob - kmer.cpp
added mothur.h and fixed includes in many files
[mothur.git] / kmer.cpp
1 /*
2  *  kmer.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/15/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include "mothur.h"
13 #include "kmer.hpp"
14
15 /**************************************************************************************************/
16
17 Kmer::Kmer(int size) : kmerSize(size) {
18         
19         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
20         maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1;
21         
22 }
23
24 /**************************************************************************************************/
25
26 string Kmer::getKmerString(string sequence){
27         int length = sequence.length();
28         int nKmers = length - kmerSize + 1;
29         vector<int> counts(maxKmer, 0);
30         
31         for(int i=0;i<nKmers;i++){
32                 int kmerNumber = getKmerNumber(sequence, i);
33                 counts[kmerNumber]++;
34         }
35         
36         string kmerString = "";
37         for(int i=0;i<maxKmer;i++){
38                 kmerString += getASCII(counts[i]);
39         }               
40         
41         return kmerString;      
42 }
43         
44 /**************************************************************************************************/
45
46 int Kmer::getKmerNumber(string sequence, int index){
47         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
48         int kmer = 0;
49         for(int i=0;i<kmerSize;i++){
50                 if(toupper(sequence[i+index]) == 'A')           {       kmer += (0 * power4s[kmerSize-i-1]);    }
51                 else if(toupper(sequence[i+index]) == 'C')      {       kmer += (1 * power4s[kmerSize-i-1]);    }
52                 else if(toupper(sequence[i+index]) == 'G')      {       kmer += (2 * power4s[kmerSize-i-1]);    }
53                 else if(toupper(sequence[i+index]) == 'U')      {       kmer += (3 * power4s[kmerSize-i-1]);    }
54                 else if(toupper(sequence[i+index]) == 'T')      {       kmer += (3 * power4s[kmerSize-i-1]);    }
55                 else if(toupper(sequence[i+index]) == 'N')      {       return (int)power4s[kmerSize];                  }
56         }
57         return kmer;    
58 }
59         
60 /**************************************************************************************************/
61         
62 string Kmer::getKmerBases(int kmerNumber){
63         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
64         
65         string kmer = "";
66         
67         if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){       
68                 for(int i=0;i<kmerSize;i++){
69                         kmer += 'N';
70                 }
71         }
72         else{
73                 for(int i=0;i<kmerSize;i++){
74                         int nt = (int)(kmerNumber / (float)power4s[i]) % 4;
75                         if(nt == 0)             {       kmer = 'A' + kmer;      }
76                         else if(nt == 1){       kmer = 'C' + kmer;      }
77                         else if(nt == 2){       kmer = 'G' + kmer;      }
78                         else if(nt == 3){       kmer = 'T' + kmer;      }
79                 }
80         }
81         return kmer;
82 }
83
84 /**************************************************************************************************/
85
86 char Kmer::getASCII(int number)         {       return (char)(33+number);                       }
87
88 /**************************************************************************************************/
89
90 int Kmer::getNumber(char character)     {       return ((int)(character-'!'));          }
91
92 /**************************************************************************************************/