X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=kmer.cpp;h=50574f4a5ee0709daf2ebb67b8f0e49971e20ef0;hp=2c2783384c3849c2941509547d2a67b83dee6f9f;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=20a2d0350a737a434c89f303662d64a8eeea7b05 diff --git a/kmer.cpp b/kmer.cpp index 2c27833..50574f4 100644 --- a/kmer.cpp +++ b/kmer.cpp @@ -7,47 +7,85 @@ * */ -using namespace std; - -#include -#include - #include "kmer.hpp" /**************************************************************************************************/ -Kmer::Kmer(int size) : kmerSize(size) { - - int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 }; - maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1; +Kmer::Kmer(int size) : kmerSize(size) { // The constructor sets the size of the kmer + int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; + // No reason to waste the time of recalculating + maxKmer = power4s[kmerSize]+1;// (int)pow(4.,k)+1; // powers of 4 everytime through. We need an + // extra kmer if we get a non-ATGCU base } /**************************************************************************************************/ -string Kmer::getKmerString(string sequence){ - int length = sequence.length(); - int nKmers = length - kmerSize + 1; +string Kmer::getKmerString(string sequence){ // Calculate kmer for each position in the sequence, count the freq + int length = sequence.length(); // of each kmer, and convert it to an ascii character with base '!'. + int nKmers = length - kmerSize + 1; // Export the string of characters as a string vector counts(maxKmer, 0); - for(int i=0;i > Kmer::getKmerCounts(string sequence){ // Calculate kmer for each position in the sequence, save info in a map + int length = sequence.length(); // so you know at each spot in the sequence what kmers were found + int nKmers = length - kmerSize + 1; // + vector< map > counts; counts.resize(nKmers); // a map kmer counts for each spot + map::iterator it; + + for(int i=0;i T [T] +// Base5 = (915 / 4^1) % 4 = 228 % 4 = 0 => A [AT] +// Base4 = (915 / 4^2) % 4 = 57 % 4 = 1 => C [CAT] +// Base3 = (915 / 4^3) % 4 = 14 % 4 = 2 => G [GCAT] +// Base2 = (915 / 4^4) % 4 = 3 % 4 = 3 => T [TGCAT] +// Base1 = (915 / 4^5) % 4 = 0 % 4 = 0 => A [ATGCAT] -> this checks out with the previous method + + int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; string kmer = ""; - if(kmerNumber == power4s[kmerSize]){//pow(4.,7)){ - for(int i=0;i=0;i--){ + if(kmerString[i] == 'A') { reverse += 'T'; } + else if(kmerString[i] == 'T'){ reverse += 'A'; } + else if(kmerString[i] == 'G'){ reverse += 'C'; } + else if(kmerString[i] == 'C'){ reverse += 'G'; } + else { reverse += 'N'; } + } + + int reverseNumber = getKmerNumber(reverse, 0); + + return reverseNumber; + +} + +/**************************************************************************************************/ +char Kmer::getASCII(int number) { return (char)(33+number); } // '!' is the first printable char and + // has the int value of 33 /**************************************************************************************************/ -int Kmer::getNumber(char character) { return ((int)(character-'!')); } +int Kmer::getNumber(char character) { return ((int)(character-'!')); } // '!' has the value of 33 /**************************************************************************************************/