]> git.donarmstrong.com Git - mothur.git/commitdiff
sped up unifrac unweighted.
authorwestcott <westcott>
Mon, 30 Aug 2010 15:14:18 +0000 (15:14 +0000)
committerwestcott <westcott>
Mon, 30 Aug 2010 15:14:18 +0000 (15:14 +0000)
sharedutilities.cpp
unifracunweightedcommand.cpp
unifracweightedcommand.cpp
unweighted.cpp

index 4339a254efbfd9af51796e5c1d473fffa9ffaefa..6dffee95e1c9a2e17bbda649424d0cb53402e513 100644 (file)
@@ -226,6 +226,7 @@ void SharedUtil::setGroups(vector<string>& userGroups, vector<string>& allGroups
                        }
                        //rip extra - off allgroups 
                        label = label.substr(0, label.length()-1);
+                       if ((mode != "weighted") && (allGroups.size() > 10)) {  label = "merged";  }
                }
                
                if (mode == "weighted") {
index 5fd26df8ccdd6e5510856ca26bd7574caa11ed43..68c943cb4e4c57efcc23eafe45f0da47fa6d4495 100644 (file)
@@ -58,7 +58,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "distance", false);                  if (temp == "not found") { temp = "false"; }
                        phylip = m->isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "true"; }
+                       temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
                        random = m->isTrue(temp);
                        
                        if (!random) {  iters = 0;  } //turn off random calcs
@@ -105,7 +105,7 @@ void UnifracUnweightedCommand::help(){
                m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
                m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
                m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
-               m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is true, meaning compare your trees with randomly generated trees.\n");
+               m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n");
                m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n");
                m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n");
                m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
@@ -199,21 +199,23 @@ int UnifracUnweightedCommand::execute() {
                                        validScores[randomData[k]] = randomData[k];
                                }
                        }
-               
+       
                        for(int a = 0; a < numComp; a++) {
                                float rcumul = 1.0000;
-                               //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
-                               for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
-                                       //make rscoreFreq map and rCumul
-                                       map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
-                                       rCumul[a][it->first] = rcumul;
-                                       //get percentage of random trees with that info
-                                       if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
-                                       else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
-                               }
                                
-                               if (random) {   UWScoreSig[a].push_back(rCumul[a][userData[a]]);        }
-                               else            {       UWScoreSig[a].push_back(0.0);                                           }
+                               if (random) {
+                                       //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
+                                       for (map<float,float>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
+                                               //make rscoreFreq map and rCumul
+                                               map<float,float>::iterator it2 = rscoreFreq[a].find(it->first);
+                                               rCumul[a][it->first] = rcumul;
+                                               //get percentage of random trees with that info
+                                               if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul-= it2->second;  }
+                                               else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
+                                       }
+                                       UWScoreSig[a].push_back(rCumul[a][userData[a]]);
+                               }else           {       UWScoreSig[a].push_back(0.0);                                           }
+       
                        }
                        
                        
index 9cda8f2ec14c4accb670c174602fb5c9ab2d5502..0201776b041e3637351bd3402c43cbdebf0223ce 100644 (file)
@@ -58,7 +58,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        string temp = validParameter.validFile(parameters, "distance", false);                  if (temp == "not found") { temp = "false"; }
                        phylip = m->isTrue(temp);
                
-                       temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "true"; }
+                       temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "F"; }
                        random = m->isTrue(temp);
                        
                        if (!random) {  iters = 0;  } //turn off random calcs
@@ -97,7 +97,7 @@ void UnifracWeightedCommand::help(){
                m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
                m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
                m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
-               m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is true, meaning compare your trees with randomly generated trees.\n");
+               m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n");
                m->mothurOut("The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n");
                m->mothurOut("Example unifrac.weighted(groups=A-B-C, iters=500).\n");
                m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
index 1f6ad9632bc1dccba63c3308cbb4065b6dc4eca6..d9b3b586ad7487b0782277955c3ad53e40642851 100644 (file)
@@ -19,9 +19,6 @@ EstOutput Unweighted::getValues(Tree* t) {
                double UniqueBL;  //a branch length is unique if it's chidren are from the same group
                double totalBL; //all branch lengths
                double UW;              //Unweighted Value = UniqueBL / totalBL;
-               map<string, int>::iterator it;  //iterator to traverse pgroups
-               map<string, int> copyIpcount;
-
        
                //if the users enters no groups then give them the score of all groups
                int numGroups = globaldata->Groups.size();
@@ -43,7 +40,7 @@ EstOutput Unweighted::getValues(Tree* t) {
                                UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
                                totalBL = 0.00; //all branch lengths
                                UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
-                               copyIpcount.clear();
+                               //copyIpcount.clear();
                                
                                //groups in this combo
                                groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
@@ -51,27 +48,25 @@ EstOutput Unweighted::getValues(Tree* t) {
                                for(int i=0;i<t->getNumNodes();i++){
                                        if (m->control_pressed) {  return data; }
        
-                                       copyIpcount = t->tree[i].pcount;
-                                       for (it = copyIpcount.begin(); it != copyIpcount.end();) {
-                                               if (m->inUsersGroups(it->first, groups) != true) {      
-                                                       copyIpcount.erase(it++);        
-                                               }else { it++;  }
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
+                                       
+                                       int pcountSize = 0;
+                                       for (int j = 0; j < groups.size(); j++) {
+                                               map<string, int>::iterator itGroup = t->tree[i].pcount.find(groups[j]);
+                                               if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                        }
+                                       
+                                       if (pcountSize == 0) { }
+                                       else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
                        
-                                       //if i's children are from the same group then i's pcount size will be 1 
-                                       //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want
-                                       if (copyIpcount.size() == 0) { }
-                                       else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
-                       
-                                       //add i's BL to total if it is from the groups the user wants
-                                       if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) {  
+                                       if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
                                                totalBL += abs(t->tree[i].getBranchLength()); 
                                        }
-                       
                                }
                
                                UW = (UniqueBL / totalBL);  
-               //cout << globaldata->Groups[a] << globaldata->Groups[l] << '\t' << UniqueBL << '\t' << totalBL << endl;
        
                                if (isnan(UW) || isinf(UW)) { UW = 0; }
        
@@ -99,29 +94,27 @@ EstOutput Unweighted::getValues(Tree* t) {
                        UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
                        totalBL = 0.00; //all branch lengths
                        UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
-                       copyIpcount.clear();
                                
                        for(int i=0;i<t->getNumNodes();i++){
                        
                                if (m->control_pressed) {  return data; }
                                
-                               copyIpcount = t->tree[i].pcount;
-                               for (it = copyIpcount.begin(); it != copyIpcount.end();) {
-                                       if (m->inUsersGroups(it->first, groups) != true) {      
-                                               copyIpcount.erase(it++);        
-                                       }else {  it++;  }
+                               //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                               //pcountSize = 2, not unique to one group
+                               //pcountSize = 1, unique to one group
+                               
+                               int pcountSize = 0;
+                               for (int j = 0; j < groups.size(); j++) {
+                                       map<string, int>::iterator itGroup = t->tree[i].pcount.find(groups[j]);
+                                       if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                }
-                       
-                               //if i's children are from the same group then i's pcount size will be 1 
-                               //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want
-                               if (copyIpcount.size() == 0) { }
-                               else if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
-                       
-                               //add i's BL to total if it is from the groups the user wants
-                               if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) {  
+                               
+                               if (pcountSize == 0) { }
+                               else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
+                                       
+                               if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
                                        totalBL += abs(t->tree[i].getBranchLength()); 
                                }
-                       
                        }
                
                        UW = (UniqueBL / totalBL);  
@@ -150,8 +143,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                double UniqueBL;  //a branch length is unique if it's chidren are from the same group
                double totalBL; //all branch lengths
                double UW;              //Unweighted Value = UniqueBL / totalBL;
-               map<string, int>::iterator it;  //iterator to traverse pgroups
-               map<string, int> copyIpcount;
                copyTree = new Tree;
 
                //if the users enters no groups then give them the score of all groups
@@ -174,7 +165,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
                                totalBL = 0.00; //all branch lengths
                                UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
-                               copyIpcount.clear();
                                
                                //copy random tree passed in
                                copyTree->getCopy(t);
@@ -187,33 +177,29 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                
                                if (m->control_pressed) { delete copyTree; return data; }
                                
-                               //copyTree->createNewickFile("random"+groupA+toString(count));
-                               
                                for(int i=0;i<copyTree->getNumNodes();i++){
+                       
+                                       if (m->control_pressed) {  return data; }
                                        
-                                       if (m->control_pressed) { delete copyTree; return data; }
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
                                        
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are non leaf
-                                       copyIpcount = copyTree->tree[i].pcount;
-                                       for (it = copyIpcount.begin(); it != copyIpcount.end();) {
-                                               if (m->inUsersGroups(it->first, groups) != true) {      
-                                                       copyIpcount.erase(it++);        
-                                               }else { it++;  }
+                                       int pcountSize = 0;
+                                       for (int j = 0; j < groups.size(); j++) {
+                                               map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]);
+                                               if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                        }
-                       
-                                       //if i's children are from the same group then i's pcount size will be 1 
-                                       //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want
-                                       if (copyIpcount.size() == 0) { }
-                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
-                       
-                                       //add i's BL to total if it is from the groups the user wants
-                                       if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) {  
+                                       
+                                       if (pcountSize == 0) { }
+                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
+                                               
+                                       if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
                                                totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                        }
-                       
                                }
-               
+
+                               
                                UW = (UniqueBL / totalBL);  
        
                                if (isnan(UW) || isinf(UW)) { UW = 0; }
@@ -242,7 +228,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                        UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
                        totalBL = 0.00; //all branch lengths
                        UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
-                       copyIpcount.clear();
                
                        //copy random tree passed in
                        copyTree->getCopy(t);
@@ -253,25 +238,25 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                        if (m->control_pressed) { delete copyTree; return data; }
 
                        for(int i=0;i<copyTree->getNumNodes();i++){
-                               if (m->control_pressed) { delete copyTree; return data; }
                        
-                               copyIpcount = copyTree->tree[i].pcount;
-                               for (it = copyIpcount.begin(); it != copyIpcount.end();) {
-                                               if (m->inUsersGroups(it->first, groups) != true) {      
-                                                       copyIpcount.erase(it++);        
-                                               }else { it++;  }
+                               if (m->control_pressed) {  return data; }
+                               
+                               //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                               //pcountSize = 2, not unique to one group
+                               //pcountSize = 1, unique to one group
+                               
+                               int pcountSize = 0;
+                               for (int j = 0; j < groups.size(); j++) {
+                                       map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]);
+                                       if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                }
-                       
-                               //if i's children are from the same group then i's pcount size will be 1 
-                               //if copyIpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want
-                               if (copyIpcount.size() == 0) { }
-                               else if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() == 1)) {  abs(UniqueBL += copyTree->tree[i].getBranchLength());     }
-                       
-                               //add i's BL to total if it is from the groups the user wants
-                               if ((copyTree->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) {  
+                               
+                               if (pcountSize == 0) { }
+                               else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
+                                       
+                               if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
                                        totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                }
-                       
                        }
                
                        UW = (UniqueBL / totalBL);