]> git.donarmstrong.com Git - mothur.git/blobdiff - phylodiversitycommand.cpp
working on testing for 1.13
[mothur.git] / phylodiversitycommand.cpp
index 5a2d33fd724d94949bb03adb617639a1ce28450c..6be1fddb597dd5ab1c61f69ce59cf590c2307e37 100644 (file)
@@ -34,7 +34,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                        }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = hasPath(globaldata->getTreeFile());         }
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(globaldata->getTreeFile());              }
                        
                        if (globaldata->gTree.size() == 0) {//no trees were read
                                m->mothurOut("You must execute the read.tree command, before you may execute the phylo.diversity command."); m->mothurOutEndLine(); abort = true;  }
@@ -47,22 +47,22 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                        convert(temp, iters); 
                        
                        temp = validParameter.validFile(parameters, "rarefy", false);                   if (temp == "not found") { temp = "F"; }
-                       rarefy = isTrue(temp);
+                       rarefy = m->isTrue(temp);
                        if (!rarefy) { iters = 1;  }
                        
                        temp = validParameter.validFile(parameters, "summary", false);                  if (temp == "not found") { temp = "T"; }
-                       summary = isTrue(temp);
+                       summary = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "scale", false);                    if (temp == "not found") { temp = "F"; }
-                       scale = isTrue(temp);
+                       scale = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "collect", false);                  if (temp == "not found") { temp = "F"; }
-                       collect = isTrue(temp);
+                       collect = m->isTrue(temp);
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = ""; Groups = globaldata->gTreemap->namesOfGroups;  globaldata->Groups = Groups;  }
                        else { 
-                               splitAtDash(groups, Groups);
+                               m->splitAtDash(groups, Groups);
                                globaldata->Groups = Groups;
                        }
                        
@@ -124,20 +124,20 @@ int PhyloDiversityCommand::execute(){
                        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
                        
                        ofstream outSum, outRare, outCollect;
-                       string outSumFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv.summary";
-                       string outRareFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv.rarefaction";
-                       string outCollectFile = outputDir + getRootName(getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv";
+                       string outSumFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv.summary";
+                       string outRareFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv.rarefaction";
+                       string outCollectFile = outputDir + m->getRootName(m->getSimpleName(globaldata->getTreeFile()))  + toString(i+1) + ".phylodiv";
                        
-                       if (summary)    { openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);                                }
-                       if (rarefy)             { openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);                             }
-                       if (collect)    { openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile);    }
+                       if (summary)    { m->openOutputFile(outSumFile, outSum); outputNames.push_back(outSumFile);                             }
+                       if (rarefy)             { m->openOutputFile(outRareFile, outRare); outputNames.push_back(outRareFile);                          }
+                       if (collect)    { m->openOutputFile(outCollectFile, outCollect); outputNames.push_back(outCollectFile); }
                        
                        int numLeafNodes = trees[i]->getNumLeaves();
                        
                        //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
                        vector<int> randomLeaf;
                        for (int j = 0; j < numLeafNodes; j++) {  
-                               if (inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
+                               if (m->inUsersGroups(trees[i]->tree[j].getGroup(), globaldata->Groups) == true) { //is this a node from the group the user selected.
                                        randomLeaf.push_back(j); 
                                }
                        }
@@ -183,17 +183,19 @@ int PhyloDiversityCommand::execute(){
                
                                //initialize counts
                                map<string, int> counts;
-                               for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; }
+                               map< string, set<int> > countedBranch;  
+                               for (int j = 0; j < globaldata->Groups.size(); j++) {  counts[globaldata->Groups[j]] = 0; countedBranch[globaldata->Groups[j]].insert(-2);  }  //add dummy index to initialize countedBranch sets
                                
                                for(int k = 0; k < numLeafNodes; k++){
                                                
                                        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());         } return 0; }
                                        
                                        //calc branch length of randomLeaf k
-                                       float br = calcBranchLength(trees[i], randomLeaf[k]);
+                                       float br = calcBranchLength(trees[i], randomLeaf[k], countedBranch);
                        
                                        //for each group in the groups update the total branch length accounting for the names file
                                        vector<string> groups = trees[i]->tree[randomLeaf[k]].getGroup();
+                                       
                                        for (int j = 0; j < groups.size(); j++) {
                                                int numSeqsInGroupJ = 0;
                                                map<string, int>::iterator it;
@@ -201,9 +203,11 @@ int PhyloDiversityCommand::execute(){
                                                if (it != trees[i]->tree[randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
                                                        numSeqsInGroupJ = it->second;
                                                }
-                                       
-                                               for (int s = (counts[groups[j]]+1); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
-                                                       diversity[groups[j]][s] = diversity[groups[j]][s-1] + ((float) numSeqsInGroupJ * br);
+                                               
+                                               if (numSeqsInGroupJ != 0) {     diversity[groups[j]][(counts[groups[j]]+1)] = diversity[groups[j]][counts[groups[j]]] + br;  }
+                                               
+                                               for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+                                                       diversity[groups[j]][s] = diversity[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
                                                }
                                                counts[groups[j]] += numSeqsInGroupJ;
                                        }
@@ -246,33 +250,22 @@ int PhyloDiversityCommand::execute(){
 void PhyloDiversityCommand::printSumData(map< string, vector<float> >& div, ofstream& out, int numIters){
        try {
                
-               out << "numSampled\t";
-               for (int i = 0; i < globaldata->Groups.size(); i++) { out << globaldata->Groups[i] << '\t';  }
-               out << endl;
+               out << "Groups\tnumSampled\tphyloDiversity" << endl;
                
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-               
-               set<int> num;
-               //find end points to output
-               for (map<string, vector<float> >::iterator itEnds = div.begin(); itEnds != div.end(); itEnds++) {       num.insert(itEnds->second.size()-1);  }
-               
-               for (set<int>::iterator it = num.begin(); it != num.end(); it++) {  
-                       int numSampled = *it;
-                       
-                       out << numSampled << '\t';  
                        
-                       for (int j = 0; j < globaldata->Groups.size(); j++) {
-                               if (numSampled < div[globaldata->Groups[j]].size()) { 
-                                       float score;
-                                       if (scale)      {  score = (div[globaldata->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;      }
-                                       else            {       score = div[globaldata->Groups[j]][numSampled] / (float)numIters;       }
-                                       
-                                       out << setprecision(4) << score << '\t';
-                               }else { out << "NA" << '\t'; }
-                       }
-                       out << endl;
-               }
+               for (int j = 0; j < globaldata->Groups.size(); j++) {
+                       int numSampled = (div[globaldata->Groups[j]].size()-1);
+                       out << globaldata->Groups[j] << '\t' << numSampled << '\t';
                
+                        
+                       float score;
+                       if (scale)      {  score = (div[globaldata->Groups[j]][numSampled] / (float)numIters) / (float)numSampled;      }
+                       else            {       score = div[globaldata->Groups[j]][numSampled] / (float)numIters;       }
+                               
+                       out << setprecision(4) << score << endl;
+               }
+                                       
                out.close();
                
        }
@@ -318,26 +311,34 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
        }
 }
 //**********************************************************************************************************************
-float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf){
+float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
        try {
 
                //calc the branch length
                //while you aren't at root
                float sum = 0.0;
                int index = leaf;
-
+               
+               vector<string> groups = t->tree[leaf].getGroup();
+                                       
                while(t->tree[index].getParent() != -1){
                        
                        //if you have a BL
                        if(t->tree[index].getBranchLength() != -1){
-                               sum += abs(t->tree[index].getBranchLength());
+                               if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+                                       sum += abs(t->tree[index].getBranchLength());
+                                       for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+                               }
                        }
                        index = t->tree[index].getParent();
                }
                        
                //get last breanch length added
                if(t->tree[index].getBranchLength() != -1){
-                       sum += abs(t->tree[index].getBranchLength());
+                       if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
+                               sum += abs(t->tree[index].getBranchLength());
+                               for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+                       }
                }
                
                return sum;