fixed phylo.diversity

author westcott <westcott>

Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)

committer westcott <westcott>

Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
author westcott <westcott>
Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
committer westcott <westcott>
Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
diff --git a/aligncommand.cpp b/aligncommand.cpp

index 0f4c89a28ce7ca2b01eae1de815bebfb78ff6a77..5aabeef9437bd9f5fdd90f6b785e52065afc3115 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -90,6 +90,8 @@ AlignCommand::AlignCommand(string option)  {
                                 
                                 //go through files and make sure they are good, if not, then disregard them
                                 for (int i = 0; i < candidateFileNames.size(); i++) {
+                                       //candidateFileNames[i] = m->getFullPathName(candidateFileNames[i]);
+                                       
                                         if (inputDir != "") {
                                                 string path = m->hasPath(candidateFileNames[i]);
                                                 //if the user has not given a path then, add inputdir. else leave path alone.
@@ -348,7 +350,6 @@ int AlignCommand::execute(){
  #else
  
                 vector<unsigned long int> positions = m->divideFile(candidateFileNames[s], processors);
-                               
                 for (int i = 0; i < (positions.size()-1); i++) {
                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
                 }       
diff --git a/clearcutcommand.cpp b/clearcutcommand.cpp

index 7d0d539eddeeae6e3ee6bfbe3e114c6b90e1bb7a..eabcc7360bfd8c96793a577b3a98953d33199227 100644 (file)
--- a/clearcutcommand.cpp
+++ b/clearcutcommand.cpp
@@ -137,7 +137,6 @@ void ClearcutCommand::help(){
         try {
                 m->mothurOut("The clearcut command interfaces mothur with the clearcut program written by Initiative for Bioinformatics and Evolutionary Studies (IBEST) at the University of Idaho.\n");
                 m->mothurOut("For more information about clearcut refer to http://bioinformatics.hungry.com/clearcut/ \n");
-               m->mothurOut("The clearcut executable must be in a folder called clearcut in the same folder as your mothur executable, similar to mothur's requirements for using blast. \n");
                 m->mothurOut("The clearcut command parameters are phylip, fasta, version, verbose, quiet, seed, norandom, shuffle, neighbor, expblen, expdist, ntrees, matrixout, stdout, kimura, jukes, protein, DNA. \n");
                 m->mothurOut("The phylip parameter allows you to enter your phylip formatted distance matrix. \n");
                 m->mothurOut("The fasta parameter allows you to enter your aligned fasta file, if you enter a fastafile you specify if the sequences are DNA or protein using the DNA or protein parameters. \n");
@@ -255,6 +254,10 @@ int ClearcutCommand::execute() {
                 
                 clearcut_main(numArgs, clearcutParameters); 
                 
+               //free memory
+               for(int i = 0; i < cPara.size(); i++)  {  delete[] cPara[i];  }
+               delete[] clearcutParameters; 
+               
                 if (!stdoutWanted) {    
                         m->mothurOutEndLine();
                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
diff --git a/makefile b/makefile

index dc5065db04a20db83b186cfcc3fe81e2d5901356..ffee66ec76967b2f69f376706ae72d24e78911e1 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ CXXFLAGS += -O3
  
  MOTHUR_FILES = "\"../Release\""
  
-RELEASE_DATE = "\"8/5/2010\""
-VERSION = "\"1.12.3\""
+RELEASE_DATE = "\"8/30/2010\""
+VERSION = "\"1.13.0\""
  
  CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION}
  
diff --git a/mothurout.cpp b/mothurout.cpp

index fe3ea097e64faf1b0418f22fa3b248b026ad4dd1..88a18ed452314c38c5d2cf6909ea44db465d61af 100644 (file)
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -257,6 +257,8 @@ int MothurOut::mem_usage(double& vm_usage, double& resident_set) {
  /***********************************************************************/
  int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
         try {
+               fileName = getFullPathName(fileName);
+               
                 fileHandle.open(fileName.c_str(), ios::app);
                 if(!fileHandle) {
                         mothurOut("[ERROR]: Could not open " + fileName); mothurOutEndLine();
@@ -445,6 +447,9 @@ string MothurOut::getExtension(string longName){
  /***********************************************************************/
  bool MothurOut::isBlank(string fileName){
         try {
+               
+               fileName = getFullPathName(fileName);
+               
                 ifstream fileHandle;
                 fileHandle.open(fileName.c_str());
                 if(!fileHandle) {
@@ -829,7 +834,8 @@ vector<unsigned long int> MothurOut::setFilePosFasta(string filename, int& num)
  /**************************************************************************************************/
  vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& num) {
         try {
-
+                       filename = getFullPathName(filename);
+                       
                         vector<unsigned long int> positions;
                         ifstream in;
                         openInputFile(filename, in);
@@ -851,7 +857,7 @@ vector<unsigned long int> MothurOut::setFilePosEachLine(string filename, int& nu
                 
                         FILE * pFile;
                         unsigned long int size;
-               
+                       
                         //get num bytes in file
                         pFile = fopen (filename.c_str(),"rb");
                         if (pFile==NULL) perror ("Error opening file");
@@ -881,6 +887,8 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                 FILE * pFile;
                 unsigned long int size;
                 
+               filename = getFullPathName(filename);
+               
                 //get num bytes in file
                 pFile = fopen (filename.c_str(),"rb");
                 if (pFile==NULL) perror ("Error opening file");
@@ -893,7 +901,7 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                 //estimate file breaks
                 unsigned long int chunkSize = 0;
                 chunkSize = size / proc;
-               
+       
                 //file to small to divide by processors
                 if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
         
@@ -914,15 +922,16 @@ vector<unsigned long int> MothurOut::divideFile(string filename, int& proc) {
                         
                         //there was not another sequence before the end of the file
                         unsigned long int sanityPos = in.tellg();
-                       if (sanityPos = -1) {   break;  }
-                       else {   filePos.push_back(newSpot);  }
+
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(newSpot);  }
                         
                         in.close();
                 }
                 
                 //save end pos
                 filePos.push_back(size);
-               
+
                 //sanity check filePos
                 for (int i = 0; i < (filePos.size()-1); i++) {
                         if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
@@ -1112,7 +1121,21 @@ void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbo
  //This function parses the estimator options and puts them in a vector
  void MothurOut::splitAtDash(string& estim, vector<string>& container) {
         try {
-               string individual;
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == '-'){
+                               container.push_back(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
+                       }
+               }
+               container.push_back(individual);
+
+       
+       /*      string individual;
                 
                 while (estim.find_first_of('-') != -1) {
                         individual = estim.substr(0,estim.find_first_of('-'));
@@ -1122,7 +1145,7 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
                         }
                 }
                 //get last one
-               container.push_back(estim);
+               container.push_back(estim); */
         }
         catch(exception& e) {
                 errorOut(e, "MothurOut", "splitAtDash");
@@ -1134,17 +1157,31 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
  //This function parses the label options and puts them in a set
  void MothurOut::splitAtDash(string& estim, set<string>& container) {
         try {
-               string individual;
-               
-               while (estim.find_first_of('-') != -1) {
-                       individual = estim.substr(0,estim.find_first_of('-'));
-                       if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
-                               estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == '-'){
                                 container.insert(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
                         }
                 }
+               container.insert(individual);
+
+       //      string individual;
+               
+       //      while (estim.find_first_of('-') != -1) {
+       //              individual = estim.substr(0,estim.find_first_of('-'));
+       //              if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string
+       //                      estim = estim.substr(estim.find_first_of('-')+1, estim.length());
+       //                      container.insert(individual);
+       //              }
+       //      }
                 //get last one
-               container.insert(estim);
+       //      container.insert(estim);
+       
         }
         catch(exception& e) {
                 errorOut(e, "MothurOut", "splitAtDash");
diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp

index 619a7f7786cbe9852f87ea5953ff52459284f5b3..98002e7f9ebbffff8ea4064c87d09ec8197a16ea 100644 (file)
--- a/phylodiversitycommand.cpp
+++ b/phylodiversitycommand.cpp
@@ -183,17 +183,19 @@ int PhyloDiversityCommand::execute(){
                 
                                 //initialize counts
                                 map<string, int> counts;
-                               for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; }
+                               map< string, set<int> > countedBranch;  
+                               for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
                                 
                                 for(int k = 0; k < numLeafNodes; k++){
                                                 
                                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
                                         
                                         //calc branch length of randomLeaf k
-                                       float br = calcBranchLength(trees[i], randomLeaf[k]);
+                                       float br = calcBranchLength(trees[i], randomLeaf[k], countedBranch);
                         
                                         //for each group in the groups update the total branch length accounting for the names file
                                         vector<string> groups = trees[i]->tree[randomLeaf[k]].getGroup();
+                                       
                                         for (int j = 0; j < groups.size(); j++) {
                                                 int numSeqsInGroupJ = 0;
                                                 map<string, int>::iterator it;
@@ -201,9 +203,11 @@ int PhyloDiversityCommand::execute(){
                                                 if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
                                                         numSeqsInGroupJ = it->second;
                                                 }
-                                       
-                                               for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
-                                                       diversity[groups[j]][s] = diversity[groups[j]][s-1] + ((float) numSeqsInGroupJ * br);
+                                               
+                                               if (numSeqsInGroupJ != 0) {     diversity[groups[j]][(counts[groups[j]]+1)] = diversity[groups[j]][counts[groups[j]]] + br;  }
+                                               
+                                               for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+                                                       diversity[groups[j]][s] = diversity[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
                                                 }
                                                 counts[groups[j]] += numSeqsInGroupJ;
                                         }
@@ -268,6 +272,7 @@ void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofst
                                         else            {       score = div[globaldata->Groups[j]][numSampled] / (float)numIters;       }
                                         
                                         out << setprecision(4) << score << '\t';
+       
                                 }else { out << "NA" << '\t'; }
                         }
                         out << endl;
@@ -318,26 +323,34 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
         }
  }
  //**********************************************************************************************************************
-float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){
+float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
         try {
  
                 //calc the branch length
                 //while you aren't at root
                 float sum = 0.0;
                 int index = leaf;
-
+               
+               vector<string> groups = t->tree[leaf].getGroup();
+                                       
                 while(t->tree[index].getParent() != -1){
                         
                         //if you have a BL
                         if(t->tree[index].getBranchLength() != -1){
-                               sum += abs(t->tree[index].getBranchLength());
+                               if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+                                       sum += abs(t->tree[index].getBranchLength());
+                                       for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+                               }
                         }
                         index = t->tree[index].getParent();
                 }
                         
                 //get last breanch length added
                 if(t->tree[index].getBranchLength() != -1){
-                       sum += abs(t->tree[index].getBranchLength());
+                       if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+                               sum += abs(t->tree[index].getBranchLength());
+                               for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+                       }
                 }
                 
                 return sum;
diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h

index 76c996c405a92fdb7f5c51d67728c46cf9c902b0..a7e0d2e5d090f3e567446fe4e76d9ee3dd7d776b 100644 (file)
--- a/phylodiversitycommand.h
+++ b/phylodiversitycommand.h
@@ -33,7 +33,7 @@ class PhyloDiversityCommand : public Command {
                 
                 void printData(set<int>&, map< string, vector<float> >&, ofstream&, int);
                 void printSumData(map< string, vector<float> >&, ofstream&, int);
-               float calcBranchLength(Tree*, int);
+               float calcBranchLength(Tree*, int, map< string, set<int> >&);
  };
  
  #endif
diff --git a/preclustercommand.cpp b/preclustercommand.cpp

index 0bd91328f0b6aab5a1f8f20cdaaefab2876cc15d..a7f211345697a72c18b6fa0aaa1ac40200c46d94 100644 (file)
--- a/preclustercommand.cpp
+++ b/preclustercommand.cpp
@@ -125,69 +125,65 @@ int PreClusterCommand::execute(){
                 if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); return 0;  }
                 if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); return 0;  }
                 
-               string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
-               string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
-               string newNamesFile = fileroot + "precluster.names";
-               ofstream outFasta;
-               ofstream outNames;
-               
-               m->openOutputFile(newFastaFile, outFasta);
-               m->openOutputFile(newNamesFile, outNames);
-
+               //clear sizes since you only needed this info to build the alignSeqs seqPNode structs
+//             sizes.clear();
+       
                 //sort seqs by number of identical seqs
-               alignSeqs.sort(comparePriority);
-
+               sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
+       
                 int count = 0;
-               int i = 0;
+
                 //think about running through twice...
-               list<seqPNode>::iterator itList;
-               list<seqPNode>::iterator itList2;
-               for (itList = alignSeqs.begin(); itList != alignSeqs.end();) {
+               for (int i = 0; i < numSeqs; i++) {
                         
-                       //try to merge it with all smaller seqs
-                       for (itList2 = alignSeqs.begin(); itList2 != alignSeqs.end();) {
-                               
-                               if (m->control_pressed) { outFasta.close(); outNames.close(); remove(newFastaFile.c_str()); remove(newNamesFile.c_str());  return 0; }
-                               
+                       //are you active
+                       //                      itActive = active.find(alignSeqs[i].seq.getName());
                         
-                               if (itList->seq.getName() != itList2->seq.getName()) { //you don't want to merge with yourself
-                                       //are you within "diff" bases
+                       if (alignSeqs[i].active) {  //this sequence has not been merged yet
+                               
+                               //try to merge it with all smaller seqs
+                               for (int j = i+1; j < numSeqs; j++) {
                                         
-                                       int mismatch = calcMisMatches((*itList).seq.getAligned(), (*itList2).seq.getAligned());
-
-                                       if (mismatch <= diffs) {
-                                               //merge
-                                               (*itList).names += ',' + (*itList2).names;
-                                               (*itList).numIdentical += (*itList2).numIdentical;
+                                       if (m->control_pressed) { return 0; }
+                                       
+                                       if (alignSeqs[j].active) {  //this sequence has not been merged yet
+                                               //are you within "diff" bases
+                                               int mismatch = calcMisMatches(alignSeqs[i].seq.getAligned(), alignSeqs[j].seq.getAligned());
                                                 
-                                               itList2 = alignSeqs.erase(itList2); //itList2--;
-                                               count++;
-                                       }else{ itList2++; }
-                               }else{ itList2++; }
-
-                       }
+                                               if (mismatch <= diffs) {
+                                                       //merge
+                                                       alignSeqs[i].names += ',' + alignSeqs[j].names;
+                                                       alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
  
-                       //ouptut this sequence
-                       printData(outFasta, outNames, (*itList));
-                       
-                       //remove sequence
-                       itList = alignSeqs.erase(itList); 
-                       
-                       i++;
+                                                       alignSeqs[j].active = 0;
+                                                       alignSeqs[j].numIdentical = 0;
+                                                       count++;
+                                               }
+                                       }//end if j active
+                               }//end if i != j
                         
+                       //remove from active list 
+                               alignSeqs[i].active = 0;
+                               
+                       }//end if active i
                         if(i % 100 == 0)        { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); }
                 }
-                               
-               if(i % 100 != 0)        { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); }
                 
-               outFasta.close();
-               outNames.close();
+               if(numSeqs % 100 != 0)  { m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine();   }
+       
                 
-               if (m->control_pressed) {  remove(newFastaFile.c_str()); remove(newNamesFile.c_str());  return 0; }
-
-               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences.");
-               m->mothurOut("Total number of sequences before precluster was " + toString(numSeqs) + "."); m->mothurOutEndLine();
-               m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); 
+               string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
+               
+               string newFastaFile = fileroot + "precluster" + m->getExtension(fastafile);
+               string newNamesFile = fileroot + "precluster.names";
+               
+               if (m->control_pressed) { return 0; }
+               
+               m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
+               m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
+               printData(newFastaFile, newNamesFile);
+               
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
                 
                 if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; }
                 
@@ -287,10 +283,25 @@ int PreClusterCommand::calcMisMatches(string seq1, string seq2){
  
  /**************************************************************************************************/
  
-void PreClusterCommand::printData(ofstream& outFasta, ofstream& outNames, seqPNode thisSeq){
+void PreClusterCommand::printData(string newfasta, string newname){
         try {
-               thisSeq.seq.printSequence(outFasta); 
-               outNames << thisSeq.seq.getName() << '\t' << thisSeq.names << endl;
+               ofstream outFasta;
+               ofstream outNames;
+               
+               m->openOutputFile(newfasta, outFasta);
+               m->openOutputFile(newname, outNames);
+                               
+               
+               for (int i = 0; i < alignSeqs.size(); i++) {
+                       if (alignSeqs[i].numIdentical != 0) {
+                               alignSeqs[i].seq.printSequence(outFasta); 
+                               outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+                       }
+               }
+               
+               outFasta.close();
+               outNames.close();
+               
         }
         catch(exception& e) {
                 m->errorOut(e, "PreClusterCommand", "printData");
diff --git a/preclustercommand.h b/preclustercommand.h

index a26fda41cba8e316fd8649604e6934a5960f22f4..de6a5727575a7f35d2605239968a37435faa388f 100644 (file)
--- a/preclustercommand.h
+++ b/preclustercommand.h
@@ -20,8 +20,9 @@ struct seqPNode {
         int numIdentical;
         Sequence seq;
         string names;
+       bool active;
         seqPNode() {}
-       seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm) {}
+       seqPNode(int n, Sequence s, string nm) : numIdentical(n), seq(s), names(nm), active(1) {}
         ~seqPNode() {}
  };
  /************************************************************/
@@ -38,7 +39,7 @@ private:
         int diffs, length;
         bool abort;
         string fastafile, namefile, outputDir;
-       list<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
+       vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
         map<string, string> names; //represents the names file first column maps to second column
         map<string, int> sizes;  //this map a seq name to the number of identical seqs in the names file
         map<string, int>::iterator itSize; 
@@ -48,7 +49,7 @@ private:
         void readNameFile();
         //int readNamesFASTA();
         int calcMisMatches(string, string);
-       void printData(ofstream&, ofstream&, seqPNode); //fasta filename, names file name
+       void printData(string, string); //fasta filename, names file name
  };
  
  /************************************************************/
diff --git a/readotu.cpp b/readotu.cpp

index 3952abca652ef1711532e07dcc859062eeb193c2..00448f8a24cced84a346dfedc434ffa838698371 100644 (file)
--- a/readotu.cpp
+++ b/readotu.cpp
@@ -35,7 +35,7 @@ void ReadOTUFile::read(GlobalData* globaldata){
                 //memory leak prevention
                 //if (globaldata->ginput != NULL) { delete globaldata->ginput;  }
                 globaldata->ginput = input;     //saving to be used by collector and rarefact commands.
-       
+
                 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
  
  //cout << input << '\t' << globaldata << endl;
diff --git a/readtreecommand.cpp b/readtreecommand.cpp

index 0a86f36dfdced49cb6151205e82c0372556e1209..fdfdb050f22c4ccc281646d1d56c8a5aa6b33806 100644 (file)
--- a/readtreecommand.cpp
+++ b/readtreecommand.cpp
@@ -154,46 +154,47 @@ int ReadTreeCommand::execute(){
                 }
  
                 
-//             Sarah, isn't this checking already done when assigning the sequences to the groups?  it makes read.tree
-//             wicked slow...  For some reason my tree is coming in here eventhough the number of sequences in the tree
-//             agrees with the number of lines in the name file and the number of sequences represented by the name file
-//             agrees with the number of sequences in the group file
-
-               //output any names that are in group file but not in tree
+               //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
+               int numNamesInTree;
+               if (namefile != "")  {  
+                       if (numUniquesInName == globaldata->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       else {   numNamesInTree = globaldata->Treenames.size();  }
+               }else {  numNamesInTree = globaldata->Treenames.size();  }
                 
-//             if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
-//                     cout << "in here" << endl;
-//                     for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
-//                             //is that name in the tree?
-//                             int count = 0;
-//                             for (int j = 0; j < globaldata->Treenames.size(); j++) {
-//                                     if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
-//                                     count++;
-//                             }
-//                             
-//                             if (m->control_pressed) {  
-//                                     for (int i = 0; i < T.size(); i++) {  delete T[i];  }
-//                                     globaldata->gTree.clear();
-//                                     delete globaldata->gTreemap;
-//                                     return 0;
-//                             }
-//                             
-//                             //then you did not find it so report it 
-//                             if (count == globaldata->Treenames.size()) { 
-//                                     //if it is in your namefile then don't remove
-//                                     map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
-//                                     
-//                                     if (it == nameMap.end()) {
-//                                             m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-//                                             treeMap->removeSeq(treeMap->namesOfSeqs[i]);
-//                                             i--; //need this because removeSeq removes name from namesOfSeqs
-//                                     }
-//                             }
-//                     }
-//                     
-//                     globaldata->gTreemap = treeMap;
-//             }
                 
+               //output any names that are in group file but not in tree
+               if (numNamesInTree < treeMap->getNumSeqs()) {
+                       for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
+                               //is that name in the tree?
+                               int count = 0;
+                               for (int j = 0; j < globaldata->Treenames.size(); j++) {
+                                       if (treeMap->namesOfSeqs[i] == globaldata->Treenames[j]) { break; } //found it
+                                       count++;
+                               }
+                               
+                               if (m->control_pressed) {  
+                                       for (int i = 0; i < T.size(); i++) {  delete T[i];  }
+                                       globaldata->gTree.clear();
+                                       delete globaldata->gTreemap;
+                                       return 0;
+                               }
+                               
+                               //then you did not find it so report it 
+                               if (count == globaldata->Treenames.size()) { 
+                                       //if it is in your namefile then don't remove
+                                       map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
+                                       
+                                       if (it == nameMap.end()) {
+                                               m->mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                                               treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
+                                       }
+                               }
+                       }
+                       
+                       globaldata->gTreemap = treeMap;
+               }
+
                 return 0;
         }
         catch(exception& e) {
@@ -205,6 +206,7 @@ int ReadTreeCommand::execute(){
  int ReadTreeCommand::readNamesFile() {
         try {
                 globaldata->names.clear();
+               numUniquesInName = 0;
                 
                 ifstream in;
                 m->openInputFile(namefile, in);
@@ -215,6 +217,8 @@ int ReadTreeCommand::readNamesFile() {
                 while(!in.eof()) {
                         in >> first >> second; m->gobble(in);
                         
+                       numUniquesInName++;
+
                         itNames = globaldata->names.find(first);
                         if (itNames == globaldata->names.end()) {  
                                 globaldata->names[first] = second; 
@@ -224,7 +228,7 @@ int ReadTreeCommand::readNamesFile() {
                                 m->splitAtComma(second, dupNames);
                                 
                                 for (int i = 0; i < dupNames.size(); i++) {     nameMap[dupNames[i]] = dupNames[i];  }
-                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); return 1; }                        
+                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); globaldata->names.clear(); namefile = ""; return 1; }                 
                 }
                 in.close();
                 
diff --git a/readtreecommand.h b/readtreecommand.h

index b0d10f70b57e9fa8ec27622ee9566b67d5c2853c..5d5b9b5eb74f17983d58fd55610218d2c23a8866 100644 (file)
--- a/readtreecommand.h
+++ b/readtreecommand.h
@@ -32,6 +32,7 @@ private:
         map<string, string> nameMap;
         
         int readNamesFile();
+       int numUniquesInName;
  
  };
  
diff --git a/sharedcommand.cpp b/sharedcommand.cpp

index 2c1b6af3004e7351085e4538f7023e342bc181f9..fb3691e662a7ee2b378d8e1dd13e4221b698106d 100644 (file)
--- a/sharedcommand.cpp
+++ b/sharedcommand.cpp
@@ -259,6 +259,7 @@ int SharedCommand::execute(){
                 globaldata->setGroupFile("");
                 globaldata->setSharedFile(filename);
                 
+               
                 if (m->control_pressed) { 
                                 delete input;  globaldata->ginput = NULL; 
                                 remove(filename.c_str()); 
@@ -286,11 +287,15 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                 if (order.size() == 0) { //user has not specified an order so do aplabetically
                         sort(thislookup.begin(), thislookup.end(), compareSharedRabunds);
                         
+                       globaldata->Groups.clear();
+                       
                         //initialize bin values
                         for (int i = 0; i < thislookup.size(); i++) {
                                 out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
                                 thislookup[i]->print(out);
                                 
+                               globaldata->Groups.push_back(thislookup[i]->getGroup());
+                               
                                 RAbundVector rav = thislookup[i]->getRAbundVector();
                                 m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()]));
                                 rav.print(*(filehandles[thislookup[i]->getGroup()]));
@@ -305,6 +310,7 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                                 myMap[thislookup[i]->getGroup()] = thislookup[i];
                         }
                         
+                       globaldata->Groups.clear();
                         
                         //loop through ordered list and print the rabund
                         for (int i = 0; i < order.size(); i++) {
@@ -313,6 +319,8 @@ void SharedCommand::printSharedData(vector<SharedRAbundVector*> thislookup) {
                                 if(myIt != myMap.end()) { //we found it
                                         out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t';
                                         (myIt->second)->print(out);
+                                       
+                                       globaldata->Groups.push_back((myIt->second)->getGroup());
                                 
                                         RAbundVector rav = (myIt->second)->getRAbundVector();
                                         m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()]));
diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp

index c15dfac5a1cde0b8fb08173702c44b227e06c2c0..6259ba2b03796ff2dc71bf9125217287eb545f4c 100644 (file)
--- a/sharedrabundvector.cpp
+++ b/sharedrabundvector.cpp
@@ -112,6 +112,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
                         
                         if (globaldata->gGroupmap == NULL) { 
                                 //save group in groupmap
+       
                                 groupmap->namesOfGroups.push_back(groupN);
                                 groupmap->groupIndex[groupN] = count;
                         }
diff --git a/summarycommand.cpp b/summarycommand.cpp

index 70470b015684b386947056686789da5077000565..208544adf1bd7ec2f0f59603c4eb86840275cf80 100644 (file)
--- a/summarycommand.cpp
+++ b/summarycommand.cpp
@@ -445,7 +445,7 @@ vector<string> SummaryCommand::parseSharedFile(string filename) {
         }
  }
  //**********************************************************************************************************************
-string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string> outputNames) {
+string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<string>& outputNames) {
         try {
                 
                 ofstream out;
@@ -501,7 +501,9 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<
                 }       
                 
                 //close each groups summary file
-               for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  }
+               for (int i=0; i<outputNames.size(); i++) {  (*(filehandles[outputNames[i]])).close();  remove(outputNames[i].c_str());  }
+               outputNames.clear();
+               
                 out.close();
                 
                 //return combine file name
diff --git a/summarycommand.h b/summarycommand.h

index 6b3fa69c67e46cd92d44f0064404d2743dd29b7a..7c7ee2c8eea829a9c6dc95fa794df5fcccc7bda1 100644 (file)
--- a/summarycommand.h
+++ b/summarycommand.h
@@ -43,7 +43,7 @@ private:
         vector<string> groups;
         
         vector<string> parseSharedFile(string);
-       string createGroupSummaryFile(int, int, vector<string>);
+       string createGroupSummaryFile(int, int, vector<string>&);
  
  
  };
diff --git a/tree.cpp b/tree.cpp

index a104477d0a90280a528d5cea4deb80ba2133ba09..dbb3f44403a124679f2e44f0de490faaacbc36ee 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -70,7 +70,7 @@ void Tree::addNamesToCounts() {
                                 
                 //go through each leaf and update its pcounts and pgroups
                 
-               float A = clock();
+               //float A = clock();
  
                 for (int i = 0; i < numLeaves; i++) {
  
@@ -137,8 +137,8 @@ void Tree::addNamesToCounts() {
                         }//end else
                 }//end for              
                 
-               float B = clock();
-               cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;       
+               //float B = clock();
+               //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;     
  
         }
         catch(exception& e) {
@@ -175,7 +175,7 @@ void Tree::setIndex(string searchName, int index) {
  /*****************************************************************/
  int Tree::assembleTree() {
         try {
-               float A = clock();
+               //float A = clock();
  
                 //if user has given a names file we want to include that info in the pgroups and pcount info.
                 if(globaldata->names.size() != 0) {  addNamesToCounts();  }
@@ -187,8 +187,8 @@ int Tree::assembleTree() {
                         tree[i].pGroups = (mergeGroups(i));
                         tree[i].pcount = (mergeGcounts(i));
                 }
-               float B = clock();
-               cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+               //float B = clock();
+               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
                 return 0;
         }
         catch(exception& e) {
diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp

index eb3c33481d4ef68199f2b42e452ea93c1679bbf4..9594767f81ad234a969db760d7cc0be6e1575111 100644 (file)
--- a/treegroupscommand.cpp
+++ b/treegroupscommand.cpp
@@ -244,7 +244,8 @@ int TreeGroupCommand::execute(){
                 if (format == "sharedfile") {
                         //if the users entered no valid calculators don't execute command
                         if (treeCalculators.size() == 0) { m->mothurOut("You have given no valid calculators."); m->mothurOutEndLine(); return 0; }
-
+                       
+                       if (globaldata->gGroupmap != NULL) {  delete globaldata->gGroupmap;   globaldata->gGroupmap = NULL;  }
                         //you have groups
                         read = new ReadOTUFile(globaldata->inputFileName);      
                         read->read(&*globaldata); 
@@ -258,17 +259,17 @@ int TreeGroupCommand::execute(){
                         //used in tree constructor 
                         globaldata->runParse = false;
                         
-                       //clear globaldatas old tree names if any
-                       globaldata->Treenames.clear();
-               
-                       //fills globaldatas tree names
-                       globaldata->Treenames = globaldata->Groups;
-               
                         //create treemap class from groupmap for tree class to use
                         tmap = new TreeMap();
                         tmap->makeSim(globaldata->gGroupmap);
                         globaldata->gTreemap = tmap;
                         
+                       //clear globaldatas old tree names if any
+                       globaldata->Treenames.clear();
+                       
+                       //fills globaldatas tree names
+                       globaldata->Treenames = globaldata->Groups;
+               
                         if (m->control_pressed) { return 0; }
                         
                         //create tree file
@@ -606,7 +607,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                                                                 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
                                                                 
                                                                 data = treeCalculators[i]->getValues(subset); //saves the calculator outputs
-                                                               
+                                               //cout << thisLookup[k]->getGroup() << '\t' << thisLookup[l]->getGroup() << '\t' << (1.0 - data[0]) << endl;
                                                                 if (m->control_pressed) { return 1; }
                                                                 
                                                                 //save values in similarity matrix
author	westcott <westcott>
	Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
committer	westcott <westcott>
	Fri, 27 Aug 2010 13:50:03 +0000 (13:50 +0000)
aligncommand.cpp		patch \| blob \| history
clearcutcommand.cpp		patch \| blob \| history
makefile		patch \| blob \| history
mothurout.cpp		patch \| blob \| history
phylodiversitycommand.cpp		patch \| blob \| history
phylodiversitycommand.h		patch \| blob \| history
preclustercommand.cpp		patch \| blob \| history
preclustercommand.h		patch \| blob \| history
readotu.cpp		patch \| blob \| history
readtreecommand.cpp		patch \| blob \| history
readtreecommand.h		patch \| blob \| history
sharedcommand.cpp		patch \| blob \| history
sharedrabundvector.cpp		patch \| blob \| history
summarycommand.cpp		patch \| blob \| history
summarycommand.h		patch \| blob \| history
tree.cpp		patch \| blob \| history
treegroupscommand.cpp		patch \| blob \| history