]> git.donarmstrong.com Git - mothur.git/blobdiff - unifracweightedcommand.cpp
parsimony command now uses groups, fixed bug with unweighted groups
[mothur.git] / unifracweightedcommand.cpp
index ff2c2450bdd35c005c1f29c3be03741a26da834c..fef08ff5268517744f2b185317352df28e607dd2 100644 (file)
@@ -22,8 +22,30 @@ UnifracWeightedCommand::UnifracWeightedCommand() {
                openOutputFile(sumFile, outSum);
                distFile = globaldata->getTreeFile() + ".wdistrib";
                openOutputFile(distFile, outDist);
-
-               numGroups = tmap->getNumGroups();
+               
+               //if the user has not entered specific groups to analyze then do them all
+               if (globaldata->Groups.size() == 0) {
+                       numGroups = tmap->getNumGroups();
+               }else {
+                       //check that groups are valid
+                       for (int i = 0; i < globaldata->Groups.size(); i++) {
+                               if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
+                                       cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
+                                       // erase the invalid group from globaldata->Groups
+                                       globaldata->Groups.erase (globaldata->Groups.begin()+i);
+                               }
+                       }
+                       
+                       //if the user only entered invalid groups
+                       if (globaldata->Groups.size() == 0) { 
+                               numGroups = tmap->getNumGroups();
+                               cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
+                       }else if (globaldata->Groups.size() == 1) { 
+                               cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
+                               numGroups = tmap->getNumGroups();
+                               globaldata->Groups.clear();
+                       }else { numGroups = globaldata->Groups.size(); }
+               }
                
                //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
                numComp = 0;
@@ -32,7 +54,11 @@ UnifracWeightedCommand::UnifracWeightedCommand() {
                        numComp += i; 
                        for (int l = n; l < numGroups; l++) {
                                //set group comparison labels
-                               groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
+                               if (globaldata->Groups.size() != 0) {
+                                       groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]);
+                               }else {
+                                       groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
+                               }
                        }
                        n++;
                }
@@ -96,7 +122,7 @@ int UnifracWeightedCommand::execute() {
                        //copy T[i]'s info.
                        randT->getCopy(T[i]); 
                        
-                       //get pscores for random trees
+                       //get scores for random trees
                        for (int j = 0; j < iters; j++) {
                                //create a random tree with same topology as T[i], but different labels
                                randT->assembleRandomUnifracTree();
@@ -125,14 +151,14 @@ int UnifracWeightedCommand::execute() {
                        
                        //find the signifigance of the score for summary file
                        for (int t = 0; t < numComp; t++) {
-                               float rcumul = 0.0000;
+                               float rcumul = 1.0000;
                                for (it = validScores[t].begin(); it != validScores[t].end(); it++) { 
                                        //make rscoreFreq map and rCumul
                                        it2 = rscoreFreq[t].find(it->first);
+                                       rCumul[t][it->first] = rcumul;
                                        //get percentage of random trees with that info
-                                       if (it2 != rscoreFreq[t].end()) {  rscoreFreq[t][it->first] /= iters; rcumul+= it2->second;  }
+                                       if (it2 != rscoreFreq[t].end()) {  rscoreFreq[t][it->first] /= iters; rcumul-= it2->second;  }
                                        else { rscoreFreq[t][it->first] = 0.0000; } //no random trees with that score
-                                       rCumul[t][it->first] = rcumul;
                                }
                        }
                        
@@ -149,24 +175,24 @@ int UnifracWeightedCommand::execute() {
                
                rCumul.resize(numComp);
                for (int b = 0; b < numComp; b++) {
-                       float ucumul = 0.0000;
-                       float rcumul = 0.0000;
+                       float ucumul = 1.0000;
+                       float rcumul = 1.0000;
                        //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
                        for (it = validScores[b].begin(); it != validScores[b].end(); it++) { 
                                it2 = uscoreFreq[b].find(it->first);
-                               //user data has that score 
-                               if (it2 != uscoreFreq[b].end()) { uscoreFreq[b][it->first] /= T.size(); ucumul+= it2->second;  }
-                               else { uscoreFreq[b][it->first] = 0.0000; } //no user trees with that score
                                //make uCumul map
                                uCumul[b][it->first] = ucumul;
+                               //user data has that score 
+                               if (it2 != uscoreFreq[b].end()) { uscoreFreq[b][it->first] /= T.size(); ucumul-= it2->second;  }
+                               else { uscoreFreq[b][it->first] = 0.0000; } //no user trees with that score
                        
                                //make rscoreFreq map and rCumul
                                it2 = totalrscoreFreq[b].find(it->first);
+                               rCumul[b][it->first] = rcumul;
                                //get percentage of random trees with that info
-                               if (it2 != totalrscoreFreq[b].end()) {  totalrscoreFreq[b][it->first] /= (iters * T.size()); rcumul+= it2->second;  }
+                               if (it2 != totalrscoreFreq[b].end()) {  totalrscoreFreq[b][it->first] /= (iters * T.size()); rcumul-= it2->second;  }
                                else { totalrscoreFreq[b][it->first] = 0.0000; } //no random trees with that score
-                               rCumul[b][it->first] = rcumul;
-                       }
+                                                       }
                }
                
                printWeightedFile();
@@ -175,6 +201,9 @@ int UnifracWeightedCommand::execute() {
                //reset randomTree parameter to 0
                globaldata->setRandomTree("0");
                
+               //clear out users groups
+               globaldata->Groups.clear();
+               
                delete randT;
                
                return 0;