X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=kmer.cpp;h=6ee84d3da3e8917c8c02d9c2d37deb68e08397b5;hb=c3f0a9c8f932b923f3a6fbbf143e8f4b85fd6f5f;hp=2c2783384c3849c2941509547d2a67b83dee6f9f;hpb=20a2d0350a737a434c89f303662d64a8eeea7b05;p=mothur.git diff --git a/kmer.cpp b/kmer.cpp index 2c27833..6ee84d3 100644 --- a/kmer.cpp +++ b/kmer.cpp @@ -7,47 +7,85 @@ * */ -using namespace std; - -#include -#include - #include "kmer.hpp" /**************************************************************************************************/ -Kmer::Kmer(int size) : kmerSize(size) { - - int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 }; - maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1; +Kmer::Kmer(int size) : kmerSize(size) { // The constructor sets the size of the kmer + int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; + // No reason to waste the time of recalculating + maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1; // powers of 4 everytime through. We need an + // extra kmer if we get a non-ATGCU base } /**************************************************************************************************/ -string Kmer::getKmerString(string sequence){ - int length = sequence.length(); - int nKmers = length - kmerSize + 1; +string Kmer::getKmerString(string sequence){ // Calculate kmer for each position in the sequence, count the freq + int length = sequence.length(); // of each kmer, and convert it to an ascii character with base '!'. + int nKmers = length - kmerSize + 1; // Export the string of characters as a string vector counts(maxKmer, 0); - for(int i=0;i > Kmer::getKmerCounts(string sequence){ // Calculate kmer for each position in the sequence, save info in a map + int length = sequence.length(); // so you know at each spot in the sequence what kmers were found + int nKmers = length - kmerSize + 1; // + vector< map > counts; counts.resize(nKmers); // a map kmer counts for each spot + map::iterator it; + + for(int i=0;i T [T] +// Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A [AT] +// Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C [CAT] +// Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G [GCAT] +// Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T [TGCAT] +// Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A [ATGCAT] -> this checks out with the previous method + + int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; string kmer = ""; - if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){ - for(int i=0;i