]> git.donarmstrong.com Git - mothur.git/blobdiff - unweighted.cpp
working on parallelizing unifrac.unweighted.
[mothur.git] / unweighted.cpp
index 1a2573b6d5738055ddd7d69448a321369d72bb53..ba88049d7e9914231b326861c3abbccdf2af01b2 100644 (file)
 
 /**************************************************************************************************/
 
-EstOutput Unweighted::getValues(Tree* t) {
+EstOutput Unweighted::getValues(Tree* t, int p, string o) {
        try {
                globaldata = GlobalData::getInstance();
+               processors = p;
+               outputDir = o;
+                       
+               //if the users enters no groups then give them the score of all groups
+               int numGroups = globaldata->Groups.size();
                
-               //clear out old values
-               data.resize(1,0); 
+               //calculate number of comparsions
+               int numComp = 0;
+               vector< vector<string> > namesOfGroupCombos;
+               for (int r=0; r<numGroups; r++) { 
+                       for (int l = r+1; l < numGroups; l++) {
+                               numComp++;
+                               vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
+                               namesOfGroupCombos.push_back(groups);
+                       }
+               }
                
-               float UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
-               float totalBL = 0.00;   //all branch lengths
-               float UW = 0.00;                //Unweighted Value = UniqueBL / totalBL;
+               if (numComp != 1) {
+                       vector<string> groups;
+                       if (numGroups == 0) {
+                               //get score for all users groups
+                               for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               groups.push_back(tmap->namesOfGroups[i]);
+                                       }
+                               }
+                               namesOfGroupCombos.push_back(groups);
+                       }else {
+                               for (int i = 0; i < globaldata->Groups.size(); i++) {
+                                       groups.push_back(globaldata->Groups[i]);
+                               }
+                               namesOfGroupCombos.push_back(groups);
+                       }
+               }
+
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+                       }else{
+                               int numPairs = namesOfGroupCombos.size();
+                               
+                               int numPairsPerProcessor = numPairs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       int startPos = i * numPairsPerProcessor;
+                                       if(i == processors - 1){
+                                               numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numPairsPerProcessor));
+                               }
+
+                               data = createProcesses(t, namesOfGroupCombos);
+                               
+                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       }
+               #else
+                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+               #endif
                
-               map<string, int>::iterator it;  //iterator to traverse pgroups
-               map<string, int> copyLCpcount;
-               map<string, int> copyRCpcount;
-               map<string, int> copyIpcount;
-       
-               for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
+               return data;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Unweighted", "getValues");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 1;
+               int num = 0;
+               vector<int> processIDS;
                
-                       int lc = t->tree[i].getLChild();  //lc = vector index of left child
-                       int rc = t->tree[i].getRChild();  //rc = vector index of right child
+               EstOutput results;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
                        
-                       /**********************************************************************/
-                       //This section adds in all lengths that are non leaf
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               EstOutput myresults;
+                               myresults = driver(t, namesOfGroupCombos, lines[process]->start, lines[process]->num);
+                               
+                               if (m->control_pressed) { exit(0); }
+                               
+                               m->mothurOut("Merging results."); m->mothurOutEndLine();
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = outputDir + toString(getpid()) + ".unweighted.results.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << myresults.size() << endl;
+                               for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
+                               out.close();
+                               
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               results = driver(t, namesOfGroupCombos, lines[0]->start, lines[0]->num);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<(processors-1);i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               if (m->control_pressed) { return results; }
+               
+               //get data created by processes
+               for (int i=0;i<(processors-1);i++) { 
+                       ifstream in;
+                       string s = outputDir + toString(processIDS[i]) + ".unweighted.results.temp";
+                       m->openInputFile(s, in);
                        
-                       //copy left childs pGroups and remove groups that the user doesn't want
-                       copyIpcount = t->tree[i].pcount;
-                       for (it = copyIpcount.begin(); it != copyIpcount.end(); it++) {
-                               if (inUsersGroups(it->first, globaldata->Groups) != true) {     copyIpcount.erase(it->first);   }
+                       //get quantiles
+                       if (!in.eof()) {
+                               int num;
+                               in >> num; m->gobble(in);
+                               
+                               if (m->control_pressed) { break; }
+                               
+                               double w; 
+                               for (int j = 0; j < num; j++) {
+                                       in >> w;
+                                       results.push_back(w);
+                               }
+                               m->gobble(in);
                        }
+                       in.close();
+                       remove(s.c_str());
+               }
+               
+               m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
+               
+               return results;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Unweighted", "createProcesses");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
+ try {
+               
+               EstOutput results; results.resize(num);
+               
+               int count = 0;
+               int total = num;
+               int twentyPercent = (total * 0.20);
 
-                       //copy left childs pGroups and remove groups that the user doesn't want
-                       copyLCpcount = t->tree[lc].pcount;
-                       for (it = copyLCpcount.begin(); it != copyLCpcount.end(); it++) {
-                               if (inUsersGroups(it->first, globaldata->Groups) != true) {     copyLCpcount.erase(it->first);  }
+               for (int h = start; h < (start+num); h++) {
+               
+                       if (m->control_pressed) { return results; }
+               
+                       double UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
+                       double totalBL = 0.00;  //all branch lengths
+                       double UW = 0.00;               //Unweighted Value = UniqueBL / totalBL;
+                               
+                       for(int i=0;i<t->getNumNodes();i++){
+                       
+                               if (m->control_pressed) {  return data; }
+                               
+                               //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                               //pcountSize = 2, not unique to one group
+                               //pcountSize = 1, unique to one group
+                               
+                               int pcountSize = 0;
+                               for (int j = 0; j < namesOfGroupCombos[h].size(); j++) {
+                                       map<string, int>::iterator itGroup = t->tree[i].pcount.find(namesOfGroupCombos[h][j]);
+                                       if (itGroup != t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
+                               }
+                               
+                               if (pcountSize == 0) { }
+                               else if ((t->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(t->tree[i].getBranchLength());   }
+                                       
+                               if ((t->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
+                                       totalBL += abs(t->tree[i].getBranchLength()); 
+                               }
                        }
+               
+                       UW = (UniqueBL / totalBL);  
+       
+                       if (isnan(UW) || isinf(UW)) { UW = 0; }
+       
+                       results[count] = UW;
+                       count++;
 
-                       //copy right childs pGroups and remove groups that the user doesn't want
-                       copyRCpcount = t->tree[rc].pcount;
-                       for (it = copyRCpcount.begin(); it != copyRCpcount.end(); it++) {
-                               if (inUsersGroups(it->first, globaldata->Groups) != true) {     copyRCpcount.erase(it->first);  }
+                       //report progress
+                       if((count) % twentyPercent == 0){       m->mothurOut("Percentage complete: " + toString(int((count / (float)total) * 100.0))); m->mothurOutEndLine();           }
+               }
+               
+               m->mothurOut("Percentage complete: 100"); m->mothurOutEndLine();
+               
+               return results; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Unweighted", "driver");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) { 
+ try {
+       globaldata = GlobalData::getInstance();
+               
+               vector<string> groups;
+               double UniqueBL;  //a branch length is unique if it's chidren are from the same group
+               double totalBL; //all branch lengths
+               double UW;              //Unweighted Value = UniqueBL / totalBL;
+               copyTree = new Tree;
+
+               //if the users enters no groups then give them the score of all groups
+               int numGroups = globaldata->Groups.size();
+
+               //calculate number of comparsions
+               int numComp = 0;
+               for (int r=0; r<numGroups; r++) { 
+                       for (int l = r+1; l < numGroups; l++) {
+                               numComp++;
                        }
+               }
+
+               //numComp+1 for AB, AC, BC, ABC
+               data.resize(numComp+1,0);
+               
+               int count = 0;
+               for (int a=0; a<numGroups; a++) { 
+                       for (int l = a+1; l < numGroups; l++) {
+                               UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
+                               totalBL = 0.00; //all branch lengths
+                               UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
+                               
+                               //copy random tree passed in
+                               copyTree->getCopy(t);
+                                                               
+                               //groups in this combo
+                               groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
+                               
+                               //swap labels in the groups you want to compare
+                               copyTree->assembleRandomUnifracTree(groups[0], groups[1]);
+                               
+                               if (m->control_pressed) { delete copyTree; return data; }
+                               
+                               for(int i=0;i<copyTree->getNumNodes();i++){
+                       
+                                       if (m->control_pressed) {  return data; }
+                                       
+                                       //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                                       //pcountSize = 2, not unique to one group
+                                       //pcountSize = 1, unique to one group
+                                       
+                                       int pcountSize = 0;
+                                       for (int j = 0; j < groups.size(); j++) {
+                                               map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]);
+                                               if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; } 
+                                       }
+                                       
+                                       if (pcountSize == 0) { }
+                                       else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
+                                               
+                                       if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
+                                               totalBL += abs(copyTree->tree[i].getBranchLength()); 
+                                       }
+                               }
+
+                               
+                               UW = (UniqueBL / totalBL);  
+       
+                               if (isnan(UW) || isinf(UW)) { UW = 0; }
        
-                       //if i's children are from the same group and i has a BL then add i's length to unique
-                       //if copyRCpcount.size() = 0 && copyLCpcount.size() = 0 they are from a branch that is entirely from a group the user doesn't want
-                       if ((copyRCpcount.size() == 0) && (copyLCpcount.size() == 0)) { }
-                       else {
-                               if ((copyRCpcount == copyLCpcount) && (t->tree[i].getBranchLength() != -1)) {  UniqueBL += t->tree[i].getBranchLength();        }
-                               //if either childs groups = 0 then all of there groups were not valid making the parent unique
-                               else if (((copyRCpcount.size() == 0) || (copyLCpcount.size() == 0)) && (t->tree[i].getBranchLength() != -1)) {  UniqueBL += t->tree[i].getBranchLength();       }
+                               data[count] = UW;
+                               count++;
+                               groups.clear();
                        }
-                       
-                       //add i's BL to total if it is from the groups the user wants
-                       if ((t->tree[i].getBranchLength() != -1) && (copyIpcount.size() != 0)) {  
-                               totalBL += t->tree[i].getBranchLength(); 
+               }
+               
+               
+               if (numComp != 1) {
+                       if (numGroups == 0) {
+                               //get score for all users groups
+                               for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
+                                       if (tmap->namesOfGroups[i] != "xxx") {
+                                               groups.push_back(tmap->namesOfGroups[i]);
+                                       }
+                               }
+                       }else {
+                               for (int i = 0; i < globaldata->Groups.size(); i++) {
+                                       groups.push_back(globaldata->Groups[i]);
+                               }
                        }
+               
+                       UniqueBL=0.0000;  //a branch length is unique if it's chidren are from the same group
+                       totalBL = 0.00; //all branch lengths
+                       UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
+               
+                       //copy random tree passed in
+                       copyTree->getCopy(t);
+                               
+                       //swap labels in all the groups you want to compare
+                       copyTree->assembleRandomUnifracTree(groups);
                        
-                       /**********************************************************************/
-                       //This section adds in all lengths that are leaf
+                       if (m->control_pressed) { delete copyTree; return data; }
+
+                       for(int i=0;i<copyTree->getNumNodes();i++){
                        
-                       //if i's chidren are leaves
-                       if (t->tree[rc].getRChild() == -1) {
-                               //if rc is a valid group and rc has a BL
-                               if ((inUsersGroups(t->tree[rc].getGroup(), globaldata->Groups) == true) && (t->tree[rc].getBranchLength() != -1)) {
-                                       UniqueBL += t->tree[rc].getBranchLength();
-                                       totalBL += t->tree[rc].getBranchLength(); 
+                               if (m->control_pressed) {  return data; }
+                               
+                               //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+                               //pcountSize = 2, not unique to one group
+                               //pcountSize = 1, unique to one group
+                               
+                               int pcountSize = 0;
+                               for (int j = 0; j < groups.size(); j++) {
+                                       map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(groups[j]);
+                                       if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } } 
                                }
-                       }
-                       
-                       if (t->tree[lc].getLChild() == -1) {
-                               //if lc is a valid group and lc has a BL
-                               if ((inUsersGroups(t->tree[lc].getGroup(), globaldata->Groups) == true) && (t->tree[lc].getBranchLength() != -1)) {
-                                       UniqueBL += t->tree[lc].getBranchLength();
-                                       totalBL += t->tree[lc].getBranchLength(); 
+                               
+                               if (pcountSize == 0) { }
+                               else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1)) {  UniqueBL += abs(copyTree->tree[i].getBranchLength());     }
+                                       
+                               if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0)) {  
+                                       totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                }
                        }
-                       
-                       /**********************************************************************/
-               
-               }
                
-               UW = (UniqueBL / totalBL);  
+                       UW = (UniqueBL / totalBL);  
        
-               if (isnan(UW) || isinf(UW)) { UW = 0; }
+                       if (isnan(UW) || isinf(UW)) { UW = 0; }
        
-               data[0] = UW;
+                       data[count] = UW;
+               }
+               
+               delete copyTree;
                
                return data;
        
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the Unweighted class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "Unweighted", "getValues");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the Unweighted class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-
 }
 
+/**************************************************************************************************/
+
+