]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed phylo.diversity
authorwestcott <westcott>
Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
committerwestcott <westcott>
Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
17 files changed:
aligncommand.cpp
clearcutcommand.cpp
makefile
mothurout.cpp
phylodiversitycommand.cpp
phylodiversitycommand.h
preclustercommand.cpp
preclustercommand.h
readotu.cpp
readtreecommand.cpp
readtreecommand.h
sharedcommand.cpp
sharedrabundvector.cpp
summarycommand.cpp
summarycommand.h
tree.cpp
treegroupscommand.cpp

index 0f4c89a28ce7ca2b01eae1de815bebfb78ff6a77..5aabeef9437bd9f5fdd90f6b785e52065afc3115 100644 (file)
@@ -90,6 +90,8 @@ AlignCommand::AlignCommand(string option)  {
                                
                                //go through files and make sure they are good, if not, then disregard them
                                for (int i = 0; i < candidateFileNames.size(); i++) {
+                                       //candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]);
+                                       
                                        if (inputDir != "") {
                                                string path = m->hasPath(candidateFileNames[i]);
                                                //if the user has not given a path then, add inputdir. else leave path alone.
@@ -348,7 +350,6 @@ int AlignCommand::execute(){
 #else
 
                vector<unsigned long int> positions = m->divideFile(candidateFileNames[s], processors);
-                               
                for (int i = 0; i < (positions.size()-1); i++) {
                        lines.push_back(new linePair(positions[i], positions[(i+1)]));
                }       
index 7d0d539eddeeae6e3ee6bfbe3e114c6b90e1bb7a..eabcc7360bfd8c96793a577b3a98953d33199227 100644 (file)
@@ -137,7 +137,6 @@ void ClearcutCommand::help(){
        try {
                m->mothurOut("The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n");
                m->mothurOut("For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n");
-               m->mothurOut("The clearcut executable must be in a folder called clearcut in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n");
                m->mothurOut("The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n");
                m->mothurOut("The phylip parameter allows you to enter your phylip formatted distance matrix. \n");
                m->mothurOut("The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n");
@@ -255,6 +254,10 @@ int ClearcutCommand::execute() {
                
                clearcut_main(numArgs, clearcutParameters); 
                
+               //free memory
+               for(int i = 0; i < cPara.size(); i++)  {  delete[] cPara[i];  }
+               delete[] clearcutParameters; 
+               
                if (!stdoutWanted) {    
                        m->mothurOutEndLine();
                        m->mothurOut("Output File Names: "); m->mothurOutEndLine();
index dc5065db04a20db83b186cfcc3fe81e2d5901356..ffee66ec76967b2f69f376706ae72d24e78911e1 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ CXXFLAGS += -O3
 
 MOTHUR_FILES = "\"../Release\""
 
-RELEASE_DATE = "\"8/5/2010\""
-VERSION = "\"1.12.3\""
+RELEASE_DATE = "\"8/30/2010\""
+VERSION = "\"1.13.0\""
 
 CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION}
 
index fe3ea097e64faf1b0418f22fa3b248b026ad4dd1..88a18ed452314c38c5d2cf6909ea44db465d61af 100644 (file)
@@ -257,6 +257,8 @@ int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
 /***********************************************************************/
 int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
        try {
+               fileName = getFullPathName(fileName);
+               
                fileHandle.open(fileName.c_str(), ios::app);
                if(!fileHandle) {
                        mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
@@ -445,6 +447,9 @@ string MothurOut::getExtension(string longName){
 /***********************************************************************/
 bool MothurOut::isBlank(string fileName){
        try {
+               
+               fileName = getFullPathName(fileName);
+               
                ifstream fileHandle;
                fileHandle.open(fileName.c_str());
                if(!fileHandle) {
@@ -829,7 +834,8 @@ vector<unsigned long int> MothurOut::setFilePosFasta(string filename, int& num)
 /**************************************************************************************************/
 vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& num) {
        try {
-
+                       filename = getFullPathName(filename);
+                       
                        vector<unsigned long int> positions;
                        ifstream in;
                        openInputFile(filename, in);
@@ -851,7 +857,7 @@ vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& nu
                
                        FILE * pFile;
                        unsigned long int size;
-               
+                       
                        //get num bytes in file
                        pFile = fopen (filename.c_str(),"rb");
                        if (pFile==NULL) perror ("Error opening file");
@@ -881,6 +887,8 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                FILE * pFile;
                unsigned long int size;
                
+               filename = getFullPathName(filename);
+               
                //get num bytes in file
                pFile = fopen (filename.c_str(),"rb");
                if (pFile==NULL) perror ("Error opening file");
@@ -893,7 +901,7 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                //estimate file breaks
                unsigned long int chunkSize = 0;
                chunkSize = size / proc;
-               
+       
                //file to small to divide by processors
                if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
        
@@ -914,15 +922,16 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                        
                        //there was not another sequence before the end of the file
                        unsigned long int sanityPos = in.tellg();
-                       if (sanityPos = -1) {   break;  }
-                       else {   filePos.push_back(newSpot);  }
+
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(newSpot);  }
                        
                        in.close();
                }
                
                //save end pos
                filePos.push_back(size);
-               
+
                //sanity check filePos
                for (int i = 0; i < (filePos.size()-1); i++) {
                        if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
@@ -1112,7 +1121,21 @@ void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbo
 //This function parses the estimator options and puts them in a vector
 void MothurOut::splitAtDash(string& estim, vector<string>& container) {
        try {
-               string individual;
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == '-'){
+                               container.push_back(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
+                       }
+               }
+               container.push_back(individual);
+
+       
+       /*      string individual;
                
                while (estim.find_first_of('-') != -1) {
                        individual = estim.substr(0,estim.find_first_of('-'));
@@ -1122,7 +1145,7 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                        }
                }
                //get last one
-               container.push_back(estim);
+               container.push_back(estim); */
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtDash");
@@ -1134,17 +1157,31 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
 //This function parses the label options and puts them in a set
 void MothurOut::splitAtDash(string& estim, set<string>& container) {
        try {
-               string individual;
-               
-               while (estim.find_first_of('-') != -1) {
-                       individual = estim.substr(0,estim.find_first_of('-'));
-                       if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-                               estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == '-'){
                                container.insert(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
                        }
                }
+               container.insert(individual);
+
+       //      string individual;
+               
+       //      while (estim.find_first_of('-') != -1) {
+       //              individual = estim.substr(0,estim.find_first_of('-'));
+       //              if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
+       //                      estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+       //                      container.insert(individual);
+       //              }
+       //      }
                //get last one
-               container.insert(estim);
+       //      container.insert(estim);
+       
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtDash");
index 619a7f7786cbe9852f87ea5953ff52459284f5b3..98002e7f9ebbffff8ea4064c87d09ec8197a16ea 100644 (file)
@@ -183,17 +183,19 @@ int PhyloDiversityCommand::execute(){
                
                                //initialize counts
                                map<string, int> counts;
-                               for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; }
+                               map< string, set<int> > countedBranch;  
+                               for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
                                
                                for(int k = 0; k < numLeafNodes; k++){
                                                
                                        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
                                        
                                        //calc branch length of randomLeaf k
-                                       float br = calcBranchLength(trees[i], randomLeaf[k]);
+                                       float br = calcBranchLength(trees[i], randomLeaf[k], countedBranch);
                        
                                        //for each group in the groups update the total branch length accounting for the names file
                                        vector<string> groups = trees[i]->tree[randomLeaf[k]].getGroup();
+                                       
                                        for (int j = 0; j < groups.size(); j++) {
                                                int numSeqsInGroupJ = 0;
                                                map<string, int>::iterator it;
@@ -201,9 +203,11 @@ int PhyloDiversityCommand::execute(){
                                                if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
                                                        numSeqsInGroupJ = it->second;
                                                }
-                                       
-                                               for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
-                                                       diversity[groups[j]][s] = diversity[groups[j]][s-1] + ((float) numSeqsInGroupJ * br);
+                                               
+                                               if (numSeqsInGroupJ != 0) {     diversity[groups[j]][(counts[groups[j]]+1)] = diversity[groups[j]][counts[groups[j]]] + br;  }
+                                               
+                                               for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+                                                       diversity[groups[j]][s] = diversity[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
                                                }
                                                counts[groups[j]] += numSeqsInGroupJ;
                                        }
@@ -268,6 +272,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofst
                                        else            {       score = div[globaldata->Groups[j]][numSampled] / (float)numIters;       }
                                        
                                        out << setprecision(4) << score << '\t';
+       
                                }else { out << "NA" << '\t'; }
                        }
                        out << endl;
@@ -318,26 +323,34 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
        }
 }
 //**********************************************************************************************************************
-float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){
+float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
        try {
 
                //calc the branch length
                //while you aren't at root
                float sum = 0.0;
                int index = leaf;
-
+               
+               vector<string> groups = t->tree[leaf].getGroup();
+                                       
                while(t->tree[index].getParent() != -1){
                        
                        //if you have a BL
                        if(t->tree[index].getBranchLength() != -1){
-                               sum += abs(t->tree[index].getBranchLength());
+                               if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+                                       sum += abs(t->tree[index].getBranchLength());
+                                       for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+                               }
                        }
                        index = t->tree[index].getParent();
                }
                        
                //get last breanch length added
                if(t->tree[index].getBranchLength() != -1){
-                       sum += abs(t->tree[index].getBranchLength());
+                       if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+                               sum += abs(t->tree[index].getBranchLength());
+                               for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+                       }
                }
                
                return sum;
index 76c996c405a92fdb7f5c51d67728c46cf9c902b0..a7e0d2e5d090f3e567446fe4e76d9ee3dd7d776b 100644 (file)
@@ -33,7 +33,7 @@ class PhyloDiversityCommand : public Command {
                
                void printData(set<int>&, map< string, vector<float> >&, ofstream&, int);
                void printSumData(map< string, vector<float> >&, ofstream&, int);
-               float calcBranchLength(Tree*, int);
+               float calcBranchLength(Tree*, int, map< string, set<int> >&);
 };
 
 #endif
index 0bd91328f0b6aab5a1f8f20cdaaefab2876cc15d..a7f211345697a72c18b6fa0aaa1ac40200c46d94 100644 (file)
@@ -125,69 +125,65 @@ int PreClusterCommand::execute(){
                if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); return 0;  }
                if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); return 0;  }
                
-               string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
-               string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
-               string newNamesFile = fileroot + "precluster.names";
-               ofstream outFasta;
-               ofstream outNames;
-               
-               m->openOutputFile(newFastaFile, outFasta);
-               m->openOutputFile(newNamesFile, outNames);
-
+               //clear sizes since you only needed this info to build the alignSeqs seqPNode structs
+//             sizes.clear();
+       
                //sort seqs by number of identical seqs
-               alignSeqs.sort(comparePriority);
-
+               sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
+       
                int count = 0;
-               int i = 0;
+
                //think about running through twice...
-               list<seqPNode>::iterator itList;
-               list<seqPNode>::iterator itList2;
-               for (itList = alignSeqs.begin(); itList != alignSeqs.end();) {
+               for (int i = 0; i < numSeqs; i++) {
                        
-                       //try to merge it with all smaller seqs
-                       for (itList2 = alignSeqs.begin(); itList2 != alignSeqs.end();) {
-                               
-                               if (m->control_pressed) { outFasta.close(); outNames.close(); remove(newFastaFile.c_str()); remove(newNamesFile.c_str());  return 0; }
-                               
+                       //are you active
+                       //                      itActive = active.find(alignSeqs[i].seq.getName());
                        
-                               if (itList->seq.getName() != itList2->seq.getName()) { //you don't want to merge with yourself
-                                       //are you within "diff" bases
+                       if (alignSeqs[i].active) {  //this sequence has not been merged yet
+                               
+                               //try to merge it with all smaller seqs
+                               for (int j = i+1; j < numSeqs; j++) {
                                        
-                                       int mismatch = calcMisMatches((*itList).seq.getAligned(), (*itList2).seq.getAligned());
-
-                                       if (mismatch <= diffs) {
-                                               //merge
-                                               (*itList).names += ',' + (*itList2).names;
-                                               (*itList).numIdentical += (*itList2).numIdentical;
+                                       if (m->control_pressed) { return 0; }
+                                       
+                                       if (alignSeqs[j].active) {  //this sequence has not been merged yet
+                                               //are you within "diff" bases
+                                               int mismatch = calcMisMatches(alignSeqs[i].seq.getAligned(), alignSeqs[j].seq.getAligned());
                                                
-                                               itList2 = alignSeqs.erase(itList2); //itList2--;
-                                               count++;
-                                       }else{ itList2++; }
-                               }else{ itList2++; }
-
-                       }
+                                               if (mismatch <= diffs) {
+                                                       //merge
+                                                       alignSeqs[i].names += ',' + alignSeqs[j].names;
+                                                       alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
 
-                       //ouptut this sequence
-                       printData(outFasta, outNames, (*itList));
-                       
-                       //remove sequence
-                       itList = alignSeqs.erase(itList); 
-                       
-                       i++;
+                                                       alignSeqs[j].active = 0;
+                                                       alignSeqs[j].numIdentical = 0;
+                                                       count++;
+                                               }
+                                       }//end if j active
+                               }//end if i != j
                        
+                       //remove from active list 
+                               alignSeqs[i].active = 0;
+                               
+                       }//end if active i
                        if(i % 100 == 0)        { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); }
                }
-                               
-               if(i % 100 != 0)        { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); }
                
-               outFasta.close();
-               outNames.close();
+               if(numSeqs % 100 != 0)  { m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine();   }
+       
                
-               if (m->control_pressed) {  remove(newFastaFile.c_str()); remove(newNamesFile.c_str());  return 0; }
-
-               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences.");
-               m->mothurOut("Total number of sequences before precluster was " + toString(numSeqs) + "."); m->mothurOutEndLine();
-               m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); 
+               string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
+               
+               string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
+               string newNamesFile = fileroot + "precluster.names";
+               
+               if (m->control_pressed) { return 0; }
+               
+               m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
+               m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
+               printData(newFastaFile, newNamesFile);
+               
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
                
                if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; }
                
@@ -287,10 +283,25 @@ int PreClusterCommand::calcMisMatches(string seq1, string seq2){
 
 /**************************************************************************************************/
 
-void PreClusterCommand::printData(ofstream& outFasta, ofstream& outNames, seqPNode thisSeq){
+void PreClusterCommand::printData(string newfasta, string newname){
        try {
-               thisSeq.seq.printSequence(outFasta); 
-               outNames << thisSeq.seq.getName() << '\t' << thisSeq.names << endl;
+               ofstream outFasta;
+               ofstream outNames;
+               
+               m->openOutputFile(newfasta, outFasta);
+               m->openOutputFile(newname, outNames);
+                               
+               
+               for (int i = 0; i < alignSeqs.size(); i++) {
+                       if (alignSeqs[i].numIdentical != 0) {
+                               alignSeqs[i].seq.printSequence(outFasta); 
+                               outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+                       }
+               }
+               
+               outFasta.close();
+               outNames.close();
+               
        }
        catch(exception& e) {
                m->errorOut(e, "PreClusterCommand", "printData");
index a26fda41cba8e316fd8649604e6934a5960f22f4..de6a5727575a7f35d2605239968a37435faa388f 100644 (file)
@@ -20,8 +20,9 @@ struct seqPNode {
        int numIdentical;
        Sequence seq;
        string names;
+       bool active;
        seqPNode() {}
-       seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm) {}
+       seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) {}
        ~seqPNode() {}
 };
 /************************************************************/
@@ -38,7 +39,7 @@ private:
        int diffs, length;
        bool abort;
        string fastafile, namefile, outputDir;
-       list<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
+       vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
        map<string, string> names; //represents the names file first column maps to second column
        map<string, int> sizes;  //this map a seq name to the number of identical seqs in the names file
        map<string, int>::iterator itSize; 
@@ -48,7 +49,7 @@ private:
        void readNameFile();
        //int readNamesFASTA();
        int calcMisMatches(string, string);
-       void printData(ofstream&, ofstream&, seqPNode); //fasta filename, names file name
+       void printData(string, string); //fasta filename, names file name
 };
 
 /************************************************************/
index 3952abca652ef1711532e07dcc859062eeb193c2..00448f8a24cced84a346dfedc434ffa838698371 100644 (file)
@@ -35,7 +35,7 @@ void ReadOTUFile::read(GlobalData* globaldata){
                //memory leak prevention
                //if (globaldata->ginput != NULL) { delete globaldata->ginput;  }
                globaldata->ginput = input;     //saving to be used by collector and rarefact commands.
-       
+
                if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
 
 //cout << input << '\t' << globaldata << endl;
index 0a86f36dfdced49cb6151205e82c0372556e1209..fdfdb050f22c4ccc281646d1d56c8a5aa6b33806 100644 (file)
@@ -154,46 +154,47 @@ int ReadTreeCommand::execute(){
                }
 
                
-//             Sarah, isn't this checking already done when assigning the sequences to the groups?  it makes read.tree
-//             wicked slow...  For some reason my tree is coming in here eventhough the number of sequences in the tree
-//             agrees with the number of lines in the name file and the number of sequences represented by the name file
-//             agrees with the number of sequences in the group file
-
-               //output any names that are in group file but not in tree
+               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+               int numNamesInTree;
+               if (namefile != "")  {  
+                       if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       else {   numNamesInTree = globaldata->Treenames.size();  }
+               }else {  numNamesInTree = globaldata->Treenames.size();  }
                
-//             if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
-//                     cout << "in here" << endl;
-//                     for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
-//                             //is that name in the tree?
-//                             int count = 0;
-//                             for (int j = 0; j < globaldata->Treenames.size(); j++) {
-//                                     if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
-//                                     count++;
-//                             }
-//                             
-//                             if (m->control_pressed) {  
-//                                     for (int i = 0; i < T.size(); i++) {  delete T[i];  }
-//                                     globaldata->gTree.clear();
-//                                     delete globaldata->gTreemap;
-//                                     return 0;
-//                             }
-//                             
-//                             //then you did not find it so report it 
-//                             if (count == globaldata->Treenames.size()) { 
-//                                     //if it is in your namefile then don't remove
-//                                     map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
-//                                     
-//                                     if (it == nameMap.end()) {
-//                                             m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-//                                             treeMap->removeSeq(treeMap->namesOfSeqs[i]);
-//                                             i--; //need this because removeSeq removes name from namesOfSeqs
-//                                     }
-//                             }
-//                     }
-//                     
-//                     globaldata->gTreemap = treeMap;
-//             }
                
+               //output any names that are in group file but not in tree
+               if (numNamesInTree < treeMap->getNumSeqs()) {
+                       for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
+                               //is that name in the tree?
+                               int count = 0;
+                               for (int j = 0; j < globaldata->Treenames.size(); j++) {
+                                       if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
+                                       count++;
+                               }
+                               
+                               if (m->control_pressed) {  
+                                       for (int i = 0; i < T.size(); i++) {  delete T[i];  }
+                                       globaldata->gTree.clear();
+                                       delete globaldata->gTreemap;
+                                       return 0;
+                               }
+                               
+                               //then you did not find it so report it 
+                               if (count == globaldata->Treenames.size()) { 
+                                       //if it is in your namefile then don't remove
+                                       map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
+                                       
+                                       if (it == nameMap.end()) {
+                                               m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                                               treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
+                                       }
+                               }
+                       }
+                       
+                       globaldata->gTreemap = treeMap;
+               }
+
                return 0;
        }
        catch(exception& e) {
@@ -205,6 +206,7 @@ int ReadTreeCommand::execute(){
 int ReadTreeCommand::readNamesFile() {
        try {
                globaldata->names.clear();
+               numUniquesInName = 0;
                
                ifstream in;
                m->openInputFile(namefile, in);
@@ -215,6 +217,8 @@ int ReadTreeCommand::readNamesFile() {
                while(!in.eof()) {
                        in >> first >> second; m->gobble(in);
                        
+                       numUniquesInName++;
+
                        itNames = globaldata->names.find(first);
                        if (itNames == globaldata->names.end()) {  
                                globaldata->names[first] = second; 
@@ -224,7 +228,7 @@ int ReadTreeCommand::readNamesFile() {
                                m->splitAtComma(second, dupNames);
                                
                                for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  }
-                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); return 1; }                        
+                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }                 
                }
                in.close();
                
index b0d10f70b57e9fa8ec27622ee9566b67d5c2853c..5d5b9b5eb74f17983d58fd55610218d2c23a8866 100644 (file)
@@ -32,6 +32,7 @@ private:
        map<string, string> nameMap;
        
        int readNamesFile();
+       int numUniquesInName;
 
 };
 
index 2c1b6af3004e7351085e4538f7023e342bc181f9..fb3691e662a7ee2b378d8e1dd13e4221b698106d 100644 (file)
@@ -259,6 +259,7 @@ int SharedCommand::execute(){
                globaldata->setGroupFile("");
                globaldata->setSharedFile(filename);
                
+               
                if (m->control_pressed) { 
                                delete input;  globaldata->ginput = NULL; 
                                remove(filename.c_str()); 
@@ -286,11 +287,15 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                if (order.size() == 0) { //user has not specified an order so do aplabetically
                        sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
                        
+                       globaldata->Groups.clear();
+                       
                        //initialize bin values
                        for (int i = 0; i < thislookup.size(); i++) {
                                out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
                                thislookup[i]->print(out);
                                
+                               globaldata->Groups.push_back(thislookup[i]->getGroup());
+                               
                                RAbundVector rav = thislookup[i]->getRAbundVector();
                                m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
                                rav.print(*(filehandles[thislookup[i]->getGroup()]));
@@ -305,6 +310,7 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                                myMap[thislookup[i]->getGroup()] = thislookup[i];
                        }
                        
+                       globaldata->Groups.clear();
                        
                        //loop through ordered list and print the rabund
                        for (int i = 0; i < order.size(); i++) {
@@ -313,6 +319,8 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                                if(myIt != myMap.end()) { //we found it
                                        out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
                                        (myIt->second)->print(out);
+                                       
+                                       globaldata->Groups.push_back((myIt->second)->getGroup());
                                
                                        RAbundVector rav = (myIt->second)->getRAbundVector();
                                        m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
index c15dfac5a1cde0b8fb08173702c44b227e06c2c0..6259ba2b03796ff2dc71bf9125217287eb545f4c 100644 (file)
@@ -112,6 +112,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
                        
                        if (globaldata->gGroupmap == NULL) { 
                                //save group in groupmap
+       
                                groupmap->namesOfGroups.push_back(groupN);
                                groupmap->groupIndex[groupN] = count;
                        }
index 70470b015684b386947056686789da5077000565..208544adf1bd7ec2f0f59603c4eb86840275cf80 100644 (file)
@@ -445,7 +445,7 @@ vector<string> SummaryCommand::parseSharedFile(string filename) {
        }
 }
 //**********************************************************************************************************************
-string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string> outputNames) {
+string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames) {
        try {
                
                ofstream out;
@@ -501,7 +501,9 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<
                }       
                
                //close each groups summary file
-               for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  }
+               for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  remove(outputNames[i].c_str());  }
+               outputNames.clear();
+               
                out.close();
                
                //return combine file name
index 6b3fa69c67e46cd92d44f0064404d2743dd29b7a..7c7ee2c8eea829a9c6dc95fa794df5fcccc7bda1 100644 (file)
@@ -43,7 +43,7 @@ private:
        vector<string> groups;
        
        vector<string> parseSharedFile(string);
-       string createGroupSummaryFile(int, int, vector<string>);
+       string createGroupSummaryFile(int, int, vector<string>&);
 
 
 };
index a104477d0a90280a528d5cea4deb80ba2133ba09..dbb3f44403a124679f2e44f0de490faaacbc36ee 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -70,7 +70,7 @@ void Tree::addNamesToCounts() {
                                
                //go through each leaf and update its pcounts and pgroups
                
-               float A = clock();
+               //float A = clock();
 
                for (int i = 0; i < numLeaves; i++) {
 
@@ -137,8 +137,8 @@ void Tree::addNamesToCounts() {
                        }//end else
                }//end for              
                
-               float B = clock();
-               cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;       
+               //float B = clock();
+               //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;     
 
        }
        catch(exception& e) {
@@ -175,7 +175,7 @@ void Tree::setIndex(string searchName, int index) {
 /*****************************************************************/
 int Tree::assembleTree() {
        try {
-               float A = clock();
+               //float A = clock();
 
                //if user has given a names file we want to include that info in the pgroups and pcount info.
                if(globaldata->names.size() != 0) {  addNamesToCounts();  }
@@ -187,8 +187,8 @@ int Tree::assembleTree() {
                        tree[i].pGroups = (mergeGroups(i));
                        tree[i].pcount = (mergeGcounts(i));
                }
-               float B = clock();
-               cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+               //float B = clock();
+               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
                return 0;
        }
        catch(exception& e) {
index eb3c33481d4ef68199f2b42e452ea93c1679bbf4..9594767f81ad234a969db760d7cc0be6e1575111 100644 (file)
@@ -244,7 +244,8 @@ int TreeGroupCommand::execute(){
                if (format == "sharedfile") {
                        //if the users entered no valid calculators don't execute command
                        if (treeCalculators.size() == 0) { m->mothurOut("You have given no valid calculators."); m->mothurOutEndLine(); return 0; }
-
+                       
+                       if (globaldata->gGroupmap != NULL) {  delete globaldata->gGroupmap;   globaldata->gGroupmap = NULL;  }
                        //you have groups
                        read = new ReadOTUFile(globaldata->inputFileName);      
                        read->read(&*globaldata); 
@@ -258,17 +259,17 @@ int TreeGroupCommand::execute(){
                        //used in tree constructor 
                        globaldata->runParse = false;
                        
-                       //clear globaldatas old tree names if any
-                       globaldata->Treenames.clear();
-               
-                       //fills globaldatas tree names
-                       globaldata->Treenames = globaldata->Groups;
-               
                        //create treemap class from groupmap for tree class to use
                        tmap = new TreeMap();
                        tmap->makeSim(globaldata->gGroupmap);
                        globaldata->gTreemap = tmap;
                        
+                       //clear globaldatas old tree names if any
+                       globaldata->Treenames.clear();
+                       
+                       //fills globaldatas tree names
+                       globaldata->Treenames = globaldata->Groups;
+               
                        if (m->control_pressed) { return 0; }
                        
                        //create tree file
@@ -606,7 +607,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                                                                subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
                                                                
                                                                data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
-                                                               
+                                               //cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl;
                                                                if (m->control_pressed) { return 1; }
                                                                
                                                                //save values in similarity matrix