From afcbef163b4f32d0ff25a834cb9af8eef8d06ffa Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 27 Aug 2010 13:50:03 +0000 Subject: [PATCH] fixed phylo.diversity --- aligncommand.cpp | 3 +- clearcutcommand.cpp | 5 +- makefile | 4 +- mothurout.cpp | 67 +++++++++++++++++----- phylodiversitycommand.cpp | 31 +++++++--- phylodiversitycommand.h | 2 +- preclustercommand.cpp | 117 +++++++++++++++++++++----------------- preclustercommand.h | 7 ++- readotu.cpp | 2 +- readtreecommand.cpp | 82 +++++++++++++------------- readtreecommand.h | 1 + sharedcommand.cpp | 8 +++ sharedrabundvector.cpp | 1 + summarycommand.cpp | 6 +- summarycommand.h | 2 +- tree.cpp | 12 ++-- treegroupscommand.cpp | 17 +++--- 17 files changed, 225 insertions(+), 142 deletions(-) diff --git a/aligncommand.cpp b/aligncommand.cpp index 0f4c89a..5aabeef 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -90,6 +90,8 @@ AlignCommand::AlignCommand(string option) { //go through files and make sure they are good, if not, then disregard them for (int i = 0; i < candidateFileNames.size(); i++) { + //candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]); + if (inputDir != "") { string path = m->hasPath(candidateFileNames[i]); //if the user has not given a path then, add inputdir. else leave path alone. @@ -348,7 +350,6 @@ int AlignCommand::execute(){ #else vector positions = m->divideFile(candidateFileNames[s], processors); - for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } diff --git a/clearcutcommand.cpp b/clearcutcommand.cpp index 7d0d539..eabcc73 100644 --- a/clearcutcommand.cpp +++ b/clearcutcommand.cpp @@ -137,7 +137,6 @@ void ClearcutCommand::help(){ try { m->mothurOut("The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n"); m->mothurOut("For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n"); - m->mothurOut("The clearcut executable must be in a folder called clearcut in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n"); m->mothurOut("The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n"); m->mothurOut("The phylip parameter allows you to enter your phylip formatted distance matrix. \n"); m->mothurOut("The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n"); @@ -255,6 +254,10 @@ int ClearcutCommand::execute() { clearcut_main(numArgs, clearcutParameters); + //free memory + for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; } + delete[] clearcutParameters; + if (!stdoutWanted) { m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); diff --git a/makefile b/makefile index dc5065d..ffee66e 100644 --- a/makefile +++ b/makefile @@ -15,8 +15,8 @@ CXXFLAGS += -O3 MOTHUR_FILES = "\"../Release\"" -RELEASE_DATE = "\"8/5/2010\"" -VERSION = "\"1.12.3\"" +RELEASE_DATE = "\"8/30/2010\"" +VERSION = "\"1.13.0\"" CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION} diff --git a/mothurout.cpp b/mothurout.cpp index fe3ea09..88a18ed 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -257,6 +257,8 @@ int MothurOut::mem_usage(double& vm_usage, double& resident_set) { /***********************************************************************/ int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){ try { + fileName = getFullPathName(fileName); + fileHandle.open(fileName.c_str(), ios::app); if(!fileHandle) { mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine(); @@ -445,6 +447,9 @@ string MothurOut::getExtension(string longName){ /***********************************************************************/ bool MothurOut::isBlank(string fileName){ try { + + fileName = getFullPathName(fileName); + ifstream fileHandle; fileHandle.open(fileName.c_str()); if(!fileHandle) { @@ -829,7 +834,8 @@ vector MothurOut::setFilePosFasta(string filename, int& num) /**************************************************************************************************/ vector MothurOut::setFilePosEachLine(string filename, int& num) { try { - + filename = getFullPathName(filename); + vector positions; ifstream in; openInputFile(filename, in); @@ -851,7 +857,7 @@ vector MothurOut::setFilePosEachLine(string filename, int& nu FILE * pFile; unsigned long int size; - + //get num bytes in file pFile = fopen (filename.c_str(),"rb"); if (pFile==NULL) perror ("Error opening file"); @@ -881,6 +887,8 @@ vector MothurOut::divideFile(string filename, int& proc) { FILE * pFile; unsigned long int size; + filename = getFullPathName(filename); + //get num bytes in file pFile = fopen (filename.c_str(),"rb"); if (pFile==NULL) perror ("Error opening file"); @@ -893,7 +901,7 @@ vector MothurOut::divideFile(string filename, int& proc) { //estimate file breaks unsigned long int chunkSize = 0; chunkSize = size / proc; - + //file to small to divide by processors if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; } @@ -914,15 +922,16 @@ vector MothurOut::divideFile(string filename, int& proc) { //there was not another sequence before the end of the file unsigned long int sanityPos = in.tellg(); - if (sanityPos = -1) { break; } - else { filePos.push_back(newSpot); } + + if (sanityPos == -1) { break; } + else { filePos.push_back(newSpot); } in.close(); } //save end pos filePos.push_back(size); - + //sanity check filePos for (int i = 0; i < (filePos.size()-1); i++) { if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } @@ -1112,7 +1121,21 @@ void MothurOut::splitAtChar(string& estim, vector& container, char symbo //This function parses the estimator options and puts them in a vector void MothurOut::splitAtDash(string& estim, vector& container) { try { - string individual; + string individual = ""; + int estimLength = estim.size(); + for(int i=0;i& container) { } } //get last one - container.push_back(estim); + container.push_back(estim); */ } catch(exception& e) { errorOut(e, "MothurOut", "splitAtDash"); @@ -1134,17 +1157,31 @@ void MothurOut::splitAtDash(string& estim, vector& container) { //This function parses the label options and puts them in a set void MothurOut::splitAtDash(string& estim, set& container) { try { - string individual; - - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); + string individual = ""; + int estimLength = estim.size(); + for(int i=0;i counts; - for (int j = 0; j < globaldata->Groups.size(); j++) { counts[globaldata->Groups[j]] = 0; } + map< string, set > countedBranch; + for (int j = 0; j < globaldata->Groups.size(); j++) { counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets for(int k = 0; k < numLeafNodes; k++){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //calc branch length of randomLeaf k - float br = calcBranchLength(trees[i], randomLeaf[k]); + float br = calcBranchLength(trees[i], randomLeaf[k], countedBranch); //for each group in the groups update the total branch length accounting for the names file vector groups = trees[i]->tree[randomLeaf[k]].getGroup(); + for (int j = 0; j < groups.size(); j++) { int numSeqsInGroupJ = 0; map::iterator it; @@ -201,9 +203,11 @@ int PhyloDiversityCommand::execute(){ if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j numSeqsInGroupJ = it->second; } - - for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { - diversity[groups[j]][s] = diversity[groups[j]][s-1] + ((float) numSeqsInGroupJ * br); + + if (numSeqsInGroupJ != 0) { diversity[groups[j]][(counts[groups[j]]+1)] = diversity[groups[j]][counts[groups[j]]] + br; } + + for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { + diversity[groups[j]][s] = diversity[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths } counts[groups[j]] += numSeqsInGroupJ; } @@ -268,6 +272,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector >& div, ofst else { score = div[globaldata->Groups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << '\t'; + }else { out << "NA" << '\t'; } } out << endl; @@ -318,26 +323,34 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector } } //********************************************************************************************************************** -float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){ +float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set >& counted){ try { //calc the branch length //while you aren't at root float sum = 0.0; int index = leaf; - + + vector groups = t->tree[leaf].getGroup(); + while(t->tree[index].getParent() != -1){ //if you have a BL if(t->tree[index].getBranchLength() != -1){ - sum += abs(t->tree[index].getBranchLength()); + if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch + sum += abs(t->tree[index].getBranchLength()); + for (int j = 0; j < groups.size(); j++) { counted[groups[j]].insert(index); } + } } index = t->tree[index].getParent(); } //get last breanch length added if(t->tree[index].getBranchLength() != -1){ - sum += abs(t->tree[index].getBranchLength()); + if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch + sum += abs(t->tree[index].getBranchLength()); + for (int j = 0; j < groups.size(); j++) { counted[groups[j]].insert(index); } + } } return sum; diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h index 76c996c..a7e0d2e 100644 --- a/phylodiversitycommand.h +++ b/phylodiversitycommand.h @@ -33,7 +33,7 @@ class PhyloDiversityCommand : public Command { void printData(set&, map< string, vector >&, ofstream&, int); void printSumData(map< string, vector >&, ofstream&, int); - float calcBranchLength(Tree*, int); + float calcBranchLength(Tree*, int, map< string, set >&); }; #endif diff --git a/preclustercommand.cpp b/preclustercommand.cpp index 0bd9132..a7f2113 100644 --- a/preclustercommand.cpp +++ b/preclustercommand.cpp @@ -125,69 +125,65 @@ int PreClusterCommand::execute(){ if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); return 0; } if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); return 0; } - string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile)); - string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile); - string newNamesFile = fileroot + "precluster.names"; - ofstream outFasta; - ofstream outNames; - - m->openOutputFile(newFastaFile, outFasta); - m->openOutputFile(newNamesFile, outNames); - + //clear sizes since you only needed this info to build the alignSeqs seqPNode structs +// sizes.clear(); + //sort seqs by number of identical seqs - alignSeqs.sort(comparePriority); - + sort(alignSeqs.begin(), alignSeqs.end(), comparePriority); + int count = 0; - int i = 0; + //think about running through twice... - list::iterator itList; - list::iterator itList2; - for (itList = alignSeqs.begin(); itList != alignSeqs.end();) { + for (int i = 0; i < numSeqs; i++) { - //try to merge it with all smaller seqs - for (itList2 = alignSeqs.begin(); itList2 != alignSeqs.end();) { - - if (m->control_pressed) { outFasta.close(); outNames.close(); remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; } - + //are you active + // itActive = active.find(alignSeqs[i].seq.getName()); - if (itList->seq.getName() != itList2->seq.getName()) { //you don't want to merge with yourself - //are you within "diff" bases + if (alignSeqs[i].active) { //this sequence has not been merged yet + + //try to merge it with all smaller seqs + for (int j = i+1; j < numSeqs; j++) { - int mismatch = calcMisMatches((*itList).seq.getAligned(), (*itList2).seq.getAligned()); - - if (mismatch <= diffs) { - //merge - (*itList).names += ',' + (*itList2).names; - (*itList).numIdentical += (*itList2).numIdentical; + if (m->control_pressed) { return 0; } + + if (alignSeqs[j].active) { //this sequence has not been merged yet + //are you within "diff" bases + int mismatch = calcMisMatches(alignSeqs[i].seq.getAligned(), alignSeqs[j].seq.getAligned()); - itList2 = alignSeqs.erase(itList2); //itList2--; - count++; - }else{ itList2++; } - }else{ itList2++; } - - } + if (mismatch <= diffs) { + //merge + alignSeqs[i].names += ',' + alignSeqs[j].names; + alignSeqs[i].numIdentical += alignSeqs[j].numIdentical; - //ouptut this sequence - printData(outFasta, outNames, (*itList)); - - //remove sequence - itList = alignSeqs.erase(itList); - - i++; + alignSeqs[j].active = 0; + alignSeqs[j].numIdentical = 0; + count++; + } + }//end if j active + }//end if i != j + //remove from active list + alignSeqs[i].active = 0; + + }//end if active i if(i % 100 == 0) { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } } - - if(i % 100 != 0) { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } - outFasta.close(); - outNames.close(); + if(numSeqs % 100 != 0) { m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } + - if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; } - - m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); - m->mothurOut("Total number of sequences before precluster was " + toString(numSeqs) + "."); m->mothurOutEndLine(); - m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); + string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile)); + + string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile); + string newNamesFile = fileroot + "precluster.names"; + + if (m->control_pressed) { return 0; } + + m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine(); + m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); + printData(newFastaFile, newNamesFile); + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; } @@ -287,10 +283,25 @@ int PreClusterCommand::calcMisMatches(string seq1, string seq2){ /**************************************************************************************************/ -void PreClusterCommand::printData(ofstream& outFasta, ofstream& outNames, seqPNode thisSeq){ +void PreClusterCommand::printData(string newfasta, string newname){ try { - thisSeq.seq.printSequence(outFasta); - outNames << thisSeq.seq.getName() << '\t' << thisSeq.names << endl; + ofstream outFasta; + ofstream outNames; + + m->openOutputFile(newfasta, outFasta); + m->openOutputFile(newname, outNames); + + + for (int i = 0; i < alignSeqs.size(); i++) { + if (alignSeqs[i].numIdentical != 0) { + alignSeqs[i].seq.printSequence(outFasta); + outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; + } + } + + outFasta.close(); + outNames.close(); + } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "printData"); diff --git a/preclustercommand.h b/preclustercommand.h index a26fda4..de6a572 100644 --- a/preclustercommand.h +++ b/preclustercommand.h @@ -20,8 +20,9 @@ struct seqPNode { int numIdentical; Sequence seq; string names; + bool active; seqPNode() {} - seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm) {} + seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) {} ~seqPNode() {} }; /************************************************************/ @@ -38,7 +39,7 @@ private: int diffs, length; bool abort; string fastafile, namefile, outputDir; - list alignSeqs; //maps the number of identical seqs to a sequence + vector alignSeqs; //maps the number of identical seqs to a sequence map names; //represents the names file first column maps to second column map sizes; //this map a seq name to the number of identical seqs in the names file map::iterator itSize; @@ -48,7 +49,7 @@ private: void readNameFile(); //int readNamesFASTA(); int calcMisMatches(string, string); - void printData(ofstream&, ofstream&, seqPNode); //fasta filename, names file name + void printData(string, string); //fasta filename, names file name }; /************************************************************/ diff --git a/readotu.cpp b/readotu.cpp index 3952abc..00448f8 100644 --- a/readotu.cpp +++ b/readotu.cpp @@ -35,7 +35,7 @@ void ReadOTUFile::read(GlobalData* globaldata){ //memory leak prevention //if (globaldata->ginput != NULL) { delete globaldata->ginput; } globaldata->ginput = input; //saving to be used by collector and rarefact commands. - + if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary. //cout << input << '\t' << globaldata << endl; diff --git a/readtreecommand.cpp b/readtreecommand.cpp index 0a86f36..fdfdb05 100644 --- a/readtreecommand.cpp +++ b/readtreecommand.cpp @@ -154,46 +154,47 @@ int ReadTreeCommand::execute(){ } -// Sarah, isn't this checking already done when assigning the sequences to the groups? it makes read.tree -// wicked slow... For some reason my tree is coming in here eventhough the number of sequences in the tree -// agrees with the number of lines in the name file and the number of sequences represented by the name file -// agrees with the number of sequences in the group file - - //output any names that are in group file but not in tree + //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. + int numNamesInTree; + if (namefile != "") { + if (numUniquesInName == globaldata->Treenames.size()) { numNamesInTree = nameMap.size(); } + else { numNamesInTree = globaldata->Treenames.size(); } + }else { numNamesInTree = globaldata->Treenames.size(); } -// if (globaldata->Treenames.size() < treeMap->getNumSeqs()) { -// cout << "in here" << endl; -// for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) { -// //is that name in the tree? -// int count = 0; -// for (int j = 0; j < globaldata->Treenames.size(); j++) { -// if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it -// count++; -// } -// -// if (m->control_pressed) { -// for (int i = 0; i < T.size(); i++) { delete T[i]; } -// globaldata->gTree.clear(); -// delete globaldata->gTreemap; -// return 0; -// } -// -// //then you did not find it so report it -// if (count == globaldata->Treenames.size()) { -// //if it is in your namefile then don't remove -// map::iterator it = nameMap.find(treeMap->namesOfSeqs[i]); -// -// if (it == nameMap.end()) { -// m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); -// treeMap->removeSeq(treeMap->namesOfSeqs[i]); -// i--; //need this because removeSeq removes name from namesOfSeqs -// } -// } -// } -// -// globaldata->gTreemap = treeMap; -// } + //output any names that are in group file but not in tree + if (numNamesInTree < treeMap->getNumSeqs()) { + for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) { + //is that name in the tree? + int count = 0; + for (int j = 0; j < globaldata->Treenames.size(); j++) { + if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it + count++; + } + + if (m->control_pressed) { + for (int i = 0; i < T.size(); i++) { delete T[i]; } + globaldata->gTree.clear(); + delete globaldata->gTreemap; + return 0; + } + + //then you did not find it so report it + if (count == globaldata->Treenames.size()) { + //if it is in your namefile then don't remove + map::iterator it = nameMap.find(treeMap->namesOfSeqs[i]); + + if (it == nameMap.end()) { + m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); + treeMap->removeSeq(treeMap->namesOfSeqs[i]); + i--; //need this because removeSeq removes name from namesOfSeqs + } + } + } + + globaldata->gTreemap = treeMap; + } + return 0; } catch(exception& e) { @@ -205,6 +206,7 @@ int ReadTreeCommand::execute(){ int ReadTreeCommand::readNamesFile() { try { globaldata->names.clear(); + numUniquesInName = 0; ifstream in; m->openInputFile(namefile, in); @@ -215,6 +217,8 @@ int ReadTreeCommand::readNamesFile() { while(!in.eof()) { in >> first >> second; m->gobble(in); + numUniquesInName++; + itNames = globaldata->names.find(first); if (itNames == globaldata->names.end()) { globaldata->names[first] = second; @@ -224,7 +228,7 @@ int ReadTreeCommand::readNamesFile() { m->splitAtComma(second, dupNames); for (int i = 0; i < dupNames.size(); i++) { nameMap[dupNames[i]] = dupNames[i]; } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); return 1; } + }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; } } in.close(); diff --git a/readtreecommand.h b/readtreecommand.h index b0d10f7..5d5b9b5 100644 --- a/readtreecommand.h +++ b/readtreecommand.h @@ -32,6 +32,7 @@ private: map nameMap; int readNamesFile(); + int numUniquesInName; }; diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 2c1b6af..fb3691e 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -259,6 +259,7 @@ int SharedCommand::execute(){ globaldata->setGroupFile(""); globaldata->setSharedFile(filename); + if (m->control_pressed) { delete input; globaldata->ginput = NULL; remove(filename.c_str()); @@ -286,11 +287,15 @@ void SharedCommand::printSharedData(vector thislookup) { if (order.size() == 0) { //user has not specified an order so do aplabetically sort(thislookup.begin(), thislookup.end(), compareSharedRabunds); + globaldata->Groups.clear(); + //initialize bin values for (int i = 0; i < thislookup.size(); i++) { out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t'; thislookup[i]->print(out); + globaldata->Groups.push_back(thislookup[i]->getGroup()); + RAbundVector rav = thislookup[i]->getRAbundVector(); m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()])); rav.print(*(filehandles[thislookup[i]->getGroup()])); @@ -305,6 +310,7 @@ void SharedCommand::printSharedData(vector thislookup) { myMap[thislookup[i]->getGroup()] = thislookup[i]; } + globaldata->Groups.clear(); //loop through ordered list and print the rabund for (int i = 0; i < order.size(); i++) { @@ -313,6 +319,8 @@ void SharedCommand::printSharedData(vector thislookup) { if(myIt != myMap.end()) { //we found it out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t'; (myIt->second)->print(out); + + globaldata->Groups.push_back((myIt->second)->getGroup()); RAbundVector rav = (myIt->second)->getRAbundVector(); m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()])); diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index c15dfac..6259ba2 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -112,6 +112,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), if (globaldata->gGroupmap == NULL) { //save group in groupmap + groupmap->namesOfGroups.push_back(groupN); groupmap->groupIndex[groupN] = count; } diff --git a/summarycommand.cpp b/summarycommand.cpp index 70470b0..208544a 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -445,7 +445,7 @@ vector SummaryCommand::parseSharedFile(string filename) { } } //********************************************************************************************************************** -string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector outputNames) { +string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector& outputNames) { try { ofstream out; @@ -501,7 +501,9 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector< } //close each groups summary file - for (int i=0; i groups; vector parseSharedFile(string); - string createGroupSummaryFile(int, int, vector); + string createGroupSummaryFile(int, int, vector&); }; diff --git a/tree.cpp b/tree.cpp index a104477..dbb3f44 100644 --- a/tree.cpp +++ b/tree.cpp @@ -70,7 +70,7 @@ void Tree::addNamesToCounts() { //go through each leaf and update its pcounts and pgroups - float A = clock(); + //float A = clock(); for (int i = 0; i < numLeaves; i++) { @@ -137,8 +137,8 @@ void Tree::addNamesToCounts() { }//end else }//end for - float B = clock(); - cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl; + //float B = clock(); + //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl; } catch(exception& e) { @@ -175,7 +175,7 @@ void Tree::setIndex(string searchName, int index) { /*****************************************************************/ int Tree::assembleTree() { try { - float A = clock(); + //float A = clock(); //if user has given a names file we want to include that info in the pgroups and pcount info. if(globaldata->names.size() != 0) { addNamesToCounts(); } @@ -187,8 +187,8 @@ int Tree::assembleTree() { tree[i].pGroups = (mergeGroups(i)); tree[i].pcount = (mergeGcounts(i)); } - float B = clock(); - cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; + //float B = clock(); + //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl; return 0; } catch(exception& e) { diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index eb3c334..9594767 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -244,7 +244,8 @@ int TreeGroupCommand::execute(){ if (format == "sharedfile") { //if the users entered no valid calculators don't execute command if (treeCalculators.size() == 0) { m->mothurOut("You have given no valid calculators."); m->mothurOutEndLine(); return 0; } - + + if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; globaldata->gGroupmap = NULL; } //you have groups read = new ReadOTUFile(globaldata->inputFileName); read->read(&*globaldata); @@ -258,17 +259,17 @@ int TreeGroupCommand::execute(){ //used in tree constructor globaldata->runParse = false; - //clear globaldatas old tree names if any - globaldata->Treenames.clear(); - - //fills globaldatas tree names - globaldata->Treenames = globaldata->Groups; - //create treemap class from groupmap for tree class to use tmap = new TreeMap(); tmap->makeSim(globaldata->gGroupmap); globaldata->gTreemap = tmap; + //clear globaldatas old tree names if any + globaldata->Treenames.clear(); + + //fills globaldatas tree names + globaldata->Treenames = globaldata->Groups; + if (m->control_pressed) { return 0; } //create tree file @@ -606,7 +607,7 @@ int TreeGroupCommand::process(vector thisLookup) { subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); data = treeCalculators[i]->getValues(subset); //saves the calculator outputs - + //cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl; if (m->control_pressed) { return 1; } //save values in similarity matrix -- 2.39.2