]> git.donarmstrong.com Git - mothur.git/commitdiff
added some debug stuff to chimera.uchime. optimized phylo.diversity to improve raref...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 11 May 2012 17:47:32 +0000 (13:47 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 11 May 2012 17:47:32 +0000 (13:47 -0400)
chimerauchimecommand.cpp
phylodiversitycommand.cpp
phylodiversitycommand.h

index 026e91b01f188ce180c3b645de8451138efe0e02..e28775f016c4e992d2d8f2d23904c93c6e3f74ef 100644 (file)
@@ -12,7 +12,7 @@
 //#include "uc.h"
 #include "sequence.hpp"
 #include "referencedb.h"
-
+#include "systemcommand.h"
 
 //**********************************************************************************************************************
 vector<string> ChimeraUchimeCommand::setParameters(){  
@@ -463,10 +463,20 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        string uchimeCommand;
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
+            if (m->debug) { 
+                m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); 
+                Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine();
+                newCommand->execute();
+                delete newCommand;
+                m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); 
+                newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
+                newCommand->execute();
+                delete newCommand;
+            }
 #else
                        uchimeCommand = path + "uchime.exe";
 #endif
-                       
+        
                        //test to make sure uchime exists
                        ifstream in;
                        uchimeCommand = m->getFullPathName(uchimeCommand);
index 3db101a89f3c60e93ebb7961ac75bdea5524f630..29e2ef0ba39ba4fbe73bf88949bf7a4756d50578 100644 (file)
@@ -212,6 +212,8 @@ int PhyloDiversityCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
+        int start = time(NULL);
+        
                m->setTreeFile(treefile);
         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
         vector<Tree*> trees = reader->getTrees();
@@ -323,6 +325,9 @@ int PhyloDiversityCommand::execute(){
        
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
 
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
+
+        
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
@@ -429,39 +434,50 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, ma
        try {
                int numLeafNodes = randomLeaf.size();
                vector<string> mGroups = m->getGroups();
-       
+        
+        map<string, int> rootForGroup = getRootForGroups(t); //maps groupName to root node in tree. "root" for group may not be the trees root and we don't want to include the extra branches.
+        
                for (int l = 0; l < numIters; l++) {
                                random_shuffle(randomLeaf.begin(), randomLeaf.end());
-               
+            cout << l << endl;
                                //initialize counts
                                map<string, int> counts;
-                               map< string, set<int> > countedBranch;  
-                               for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0; countedBranch[mGroups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
+                vector< map<string, bool> > countedBranch;
+                for (int i = 0; i < t->getNumNodes(); i++) {
+                    map<string, bool> temp;
+                    for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
+                    countedBranch.push_back(temp);
+                }
+            
+                               for (int j = 0; j < mGroups.size(); j++) {  counts[mGroups[j]] = 0;   }  
                                
                                for(int k = 0; k < numLeafNodes; k++){
                                                
                                        if (m->control_pressed) { return 0; }
                                        
                                        //calc branch length of randomLeaf k
-                                       vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
+                    vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
                        
                                        //for each group in the groups update the total branch length accounting for the names file
                                        vector<string> groups = t->tree[randomLeaf[k]].getGroup();
                                        
                                        for (int j = 0; j < groups.size(); j++) {
-                                               int numSeqsInGroupJ = 0;
-                                               map<string, int>::iterator it;
-                                               it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
-                                               if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
-                                                       numSeqsInGroupJ = it->second;
-                                               }
-                                               
-                                               if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
-                                               
-                                               for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
-                                                       div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
-                                               }
-                                               counts[groups[j]] += numSeqsInGroupJ;
+                        
+                        if (m->inUsersGroups(groups[j], mGroups)) {
+                            int numSeqsInGroupJ = 0;
+                            map<string, int>::iterator it;
+                            it = t->tree[randomLeaf[k]].pcount.find(groups[j]);
+                            if (it != t->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
+                                numSeqsInGroupJ = it->second;
+                            }
+                            
+                            if (numSeqsInGroupJ != 0) {        div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
+                            
+                            for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+                                div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
+                            }
+                            counts[groups[j]] += numSeqsInGroupJ;
+                        }
                                        }
                                }
                                
@@ -556,9 +572,9 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
 }
 //**********************************************************************************************************************
 //need a vector of floats one branch length for every group the node represents.
-vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
+vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots){
        try {
-
+        
                //calc the branch length
                //while you aren't at root
                vector<float> sums; 
@@ -567,80 +583,38 @@ vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st
                vector<string> groups = t->tree[leaf].getGroup();
                sums.resize(groups.size(), 0.0);
                
-               map<string, map<int, double> > tempTotals; //maps node to total Branch Length
-               map< string, set<int> > tempCounted;
-               set<int>::iterator it;
-       
-               //you are a leaf
+        
+        //you are a leaf
                if(t->tree[index].getBranchLength() != -1){     
                        for (int k = 0; k < groups.size(); k++) { 
                                sums[k] += abs(t->tree[index].getBranchLength());       
-                               counted[groups[k]].insert(index);
                        }
                }
-               
-               for (int k = 0; k < groups.size(); k++) { 
-                       tempTotals[groups[k]][index] = 0.0;     
-               }
-               
-               index = t->tree[index].getParent();     
-                       
+        
+        
+        index = t->tree[index].getParent();    
+        
                //while you aren't at root
                while(t->tree[index].getParent() != -1){
-
+            
                        if (m->control_pressed) {  return sums; }
                        
-                       int pcountSize = 0;     
                        for (int k = 0; k < groups.size(); k++) {
-                               map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
-                               if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
-                       
-                               //do both your chidren have have descendants from the users groups? 
-                               int lc = t->tree[index].getLChild();
-                               int rc = t->tree[index].getRChild();
-                       
-                               int LpcountSize = 0;
-                               itGroup = t->tree[lc].pcount.find(groups[k]);
-                               if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
-                                                       
-                               int RpcountSize = 0;
-                               itGroup = t->tree[rc].pcount.find(groups[k]);
-                               if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
-                                                               
-                               //if yes, add your childrens tempTotals
-                               if ((LpcountSize != 0) && (RpcountSize != 0)) {
-                                       sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
-                                       
-                                       for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
-
-                                       //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
-                                       if (t->tree[index].getBranchLength() != -1) {
-                                               if (counted[groups[k]].count(index) == 0) {
-                                                       tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
-                                                       tempCounted[groups[k]].insert(index);
-                                               }else{
-                                                       tempTotals[groups[k]][index] = 0.0;
-                                               }
-                                       }else {
-                                               tempTotals[groups[k]][index] = 0.0;
-                                       }
-                               }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                                       tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
-                                                                       
-                                       if (counted[groups[k]].count(index) == 0) {
-                                               tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
-                                               tempCounted[groups[k]].insert(index);
-                                       }
-
-                               }
-                               //cout << "temptotal = "<< tempTotals[i] << endl;
-                       }
-                       
-                       index = t->tree[index].getParent();     
-               }
-
+                
+                if (index >= roots[groups[k]]) { counted[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
+                
+                if (!counted[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
+                    if (t->tree[index].getBranchLength() != -1) {
+                        sums[k] += abs(t->tree[index].getBranchLength());
+                    }
+                    counted[index][groups[k]] = true;
+                }
+            }
+            index = t->tree[index].getParent();        
+        }
+        
                return sums;
-
+        
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
@@ -648,6 +622,63 @@ vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< st
        }
 }
 //**********************************************************************************************************************
+map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
+       try {
+               map<string, int> roots; //maps group to root for group, may not be root of tree
+               map<string, bool> done;
+       
+               //initialize root for all groups to -1
+               for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; }
+        
+        for (int i = 0; i < t->getNumLeaves(); i++) {
+            
+            vector<string> groups = t->tree[i].getGroup();
+            
+            int index = t->tree[i].getParent();
+            
+            for (int j = 0; j < groups.size(); j++) {
+                
+                if (done[groups[j]] == false) { //we haven't found the root for this group yet
+                    
+                    done[groups[j]] = true;
+                    roots[groups[j]] = i; //set root to self to start
+                    
+                    //while you aren't at root
+                    while(t->tree[index].getParent() != -1){
+                        
+                        if (m->control_pressed) {  return roots; }
+                        
+                        //do both your chidren have have descendants from the users groups? 
+                        int lc = t->tree[index].getLChild();
+                        int rc = t->tree[index].getRChild();
+                        
+                        int LpcountSize = 0;
+                        map<string, int>:: iterator itGroup = t->tree[lc].pcount.find(groups[j]);
+                        if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
+                        
+                        int RpcountSize = 0;
+                        itGroup = t->tree[rc].pcount.find(groups[j]);
+                        if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
+                        
+                        if ((LpcountSize != 0) && (RpcountSize != 0)) { //possible root
+                            roots[groups[j]] = index;
+                        }else { ;}
+                        
+                        index = t->tree[index].getParent();    
+                    }
+                }
+            }
+        }
+        
+        return roots;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloDiversityCommand", "getRootForGroups");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 
 
index 5d0cccf249099ac51d86b58e41e5098793ef3f3c..52cd3e27953ebac12619df1148e0e9bf932c8261 100644 (file)
@@ -39,10 +39,11 @@ private:
                string groups, outputDir, treefile, groupfile, namefile;
                vector<string> Groups, outputNames; //holds groups to be used, and outputFile names
                
+        map<string, int> getRootForGroups(Tree* t);
                int readNamesFile();
                void printData(set<int>&, map< string, vector<float> >&, ofstream&, int);
                void printSumData(map< string, vector<float> >&, ofstream&, int);
-               vector<float> calcBranchLength(Tree*, int, map< string, set<int> >&);
+        vector<float> calcBranchLength(Tree*, int, vector< map<string, bool> >&, map<string, int>);
                int driver(Tree*, map< string, vector<float> >&, map<string, vector<float> >&, int, int, vector<int>&, set<int>&, ofstream&, ofstream&, bool);
                int createProcesses(vector<int>&, Tree*, map< string, vector<float> >&, map<string, vector<float> >&, int, int, vector<int>&, set<int>&, ofstream&, ofstream&);