X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=suffixtree.cpp;h=9cd835185c31a61bd024590f5a3fc7396d31610f;hb=372fb21ea66ced432b109225851a1b80ef0491a3;hp=492460d4dd68f4f025504b58acbc80fad09b09b7;hpb=74c78f9abd9e733f0c2f812efec97a76632fcbf8;p=mothur.git diff --git a/suffixtree.cpp b/suffixtree.cpp index 492460d..9cd8351 100644 --- a/suffixtree.cpp +++ b/suffixtree.cpp @@ -35,22 +35,23 @@ inline bool compareParents(SuffixNode* left, SuffixNode* right){// this is neces //******************************************************************************************************************** -SuffixTree::SuffixTree(){} +SuffixTree::SuffixTree(){ m = MothurOut::getInstance(); } //******************************************************************************************************************** SuffixTree::~SuffixTree(){ for(int i=0;igetName(); - sequence = seq->convert2ints(); + seqName = seq.getName(); + sequence = seq.convert2ints(); sequence += '5'; // this essentially concatenates a '$' to the end of the sequence to int seqLength = sequence.length(); // make it a cononical suffix tree @@ -74,7 +75,7 @@ string SuffixTree::getSeqName() { void SuffixTree::print(){ vector hold = nodeVector; sort(hold.begin(), hold.end(), compareParents); - cout << "Address\t\tParent\tNode\tSuffix\tStartC\tEndC\tSuffix" << endl; + m->mothurOut("Address\t\tParent\tNode\tSuffix\tStartC\tEndC\tSuffix"); m->mothurOutEndLine(); for(int i=1;i<=nodeCounter;i++){ hold[i]->print(sequence, i); } @@ -124,7 +125,46 @@ int SuffixTree::countSuffixes(string compareSequence, int& minValue){ // here we return numSuffixes; // change the value and return the number of suffixes } +//******************************************************************************************************************** +int SuffixTree::countSuffixes(string compareSequence){ // here we count the number of suffix parts + // we need to rewrite a user supplied sequence. if the + int numSuffixes = 0; // count exceeds the supplied minValue, bail out. The + int seqLength = compareSequence.length(); // time complexity should be O(L) + int position = 0; + + int presentNode = 0; + + while(position < seqLength){ // while the position in the query sequence isn't at the end... + + int newNode = nodeVector[presentNode]->getChild(compareSequence[position]); // see if the current node has a + // child that matches the next character in the query + if(newNode == -1){ + if(presentNode == 0){ position++; } // if not, go back to the root and increase the count + numSuffixes++; // by one. + presentNode = 0; + } + else{ // if there is, move to that node and see how far down + presentNode = newNode; // it we can get + + for(int i=nodeVector[newNode]->getStartCharPos(); i<=nodeVector[newNode]->getEndCharPos(); i++){ + if(compareSequence[position] == sequence[i]){ + position++; // as long as the query and branch agree, keep going + } + else{ + numSuffixes++; // if there is a mismatch, increase the number of + presentNode = 0; // suffixes and go back to the root + break; + } + } + } + // if we get all the way through the node we'll go to the top of the while loop and find the child node + // that corresponds to what we are interested in + } + numSuffixes--; // the method puts an extra count on numSuffixes + + return numSuffixes; // change the value and return the number of suffixes +} //******************************************************************************************************************** void SuffixTree::canonize(){ // if you have to ask how this works, you don't really want to know and this really