]> git.donarmstrong.com Git - mothur.git/commitdiff
changed unifrac.weighted()
authorwestcott <westcott>
Tue, 24 Feb 2009 16:34:41 +0000 (16:34 +0000)
committerwestcott <westcott>
Tue, 24 Feb 2009 16:34:41 +0000 (16:34 +0000)
globaldata.cpp
globaldata.hpp
parsimony.cpp
parsimonycommand.cpp
parsimonycommand.h
unifracweightedcommand.cpp
unifracweightedcommand.h
weighted.cpp

index 66a44f25b7246af237194e634eeeeb9514623899..015293d727e5e94c51f1ce2bbe5a7ff6c6a7aee9 100644 (file)
@@ -110,7 +110,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                                if (key == "fileroot" )         { fileroot = value;             }
                                if (key == "abund" )        { abund = value;        }
                                if (key == "random" )           { randomtree = value;   }
-                               if (key == "groups" )           { groups = value;       }
                                if (key == "calc")                      { calc = value;         }
 
                                
@@ -160,7 +159,6 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        if (key == "fileroot" )         { fileroot = value;             }
                        if (key == "abund" )        { abund = value;        }
                        if (key == "random" )           { randomtree = value;   }
-                       if (key == "groups" )           { groups = value;       }
                        if (key == "calc")                      { calc = value;         }
 
 
@@ -261,6 +259,7 @@ string GlobalData::getJumble()                      {       return jumble;          }
 string GlobalData::getFreq()                   {       return freq;            }
 string GlobalData::getAbund()           {   return abund;       }
 string GlobalData::getRandomTree()             {       return randomtree;      }
+string GlobalData::getGroups()                 {       return groups;          }
 void GlobalData::setListFile(string file)      {       listfile = file;        inputFileName = file;}
 void GlobalData::setRabundFile(string file)    {       rabundfile = file;      inputFileName = file;}
 void GlobalData::setSabundFile(string file)    {       sabundfile = file;      inputFileName = file;}
@@ -269,7 +268,8 @@ void GlobalData::setColumnFile(string file) {       columnfile = file;    inputFileNam
 void GlobalData::setNameFile(string file)              {       namefile = file;                }
 void GlobalData::setFormat(string Format)              {       format = Format;                }
 void GlobalData::setRandomTree(string Random)  {       randomtree = Random;    }
-void GlobalData::setCalc(string Calc)                  {       calc = Calc;    }
+void GlobalData::setGroups(string g)                   {       groups = g;                             }
+void GlobalData::setCalc(string Calc)                  {       calc = Calc;                    }
 
 /*******************************************************/
 
index 1bccc34f58fa6fcaf136bab32cd74f5093babda7..309ab4a09c712132617c1fab8bd96e7849f4643b 100644 (file)
@@ -57,6 +57,7 @@ public:
        string getFreq();
        string getAbund();
        string getRandomTree();
+       string getGroups();
 
        void setListFile(string);
        void setPhylipFile(string);
@@ -66,6 +67,7 @@ public:
        void setSabundFile(string);
        void setFormat(string);
        void setRandomTree(string);
+       void setGroups(string);
        void setCalc(string);
 
        
index 93efe2858aba11e852bd716f365bc653beea8b0d..da8412ce71d590fe57366fb453126db00ff0f715 100644 (file)
@@ -22,7 +22,7 @@ EstOutput Parsimony::getValues(Tree* t) {
                for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
                        t->tree[i].pGroups = (t->mergeUserGroups(i));
                }
-               //hjkl
+               
                for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
                        int lc = t->tree[i].getLChild();
                        int rc = t->tree[i].getRChild();
@@ -42,8 +42,8 @@ EstOutput Parsimony::getValues(Tree* t) {
                        t->tree[i].printNode();
                } 
                
-               string hold;
-               cin >> hold;
+               //string hold;
+               //cin >> hold;
                
                data[0] = score;
                
index 59dca88751edfb115cc3c83fcc5c85935b52b9d3..1a493bd86df990266d4504a5e938a7e78e721f44 100644 (file)
@@ -25,8 +25,6 @@ ParsimonyCommand::ParsimonyCommand() {
                        openOutputFile(parsFile, out);
                        sumFile = globaldata->getTreeFile() + ".psummary";
                        openOutputFile(sumFile, outSum);
-                       distFile = globaldata->getTreeFile() + ".pdistrib";
-                       openOutputFile(distFile, outDist);
                        //set users groups to analyze
                        setGroups();
                }else { //user wants random distribution
@@ -57,10 +55,6 @@ int ParsimonyCommand::execute() {
                userData.resize(1,0);  //data[0] = pscore.
                randomData.resize(1,0);  //data[0] = pscore.
                
-               //format output
-               outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-               outDist << "RandomTree#" << '\t' << "ParsScore" << endl;
-               
                if (randomtree == "") {
                        copyUserTree = new Tree();
                        //get pscores for users trees
@@ -104,9 +98,6 @@ int ParsimonyCommand::execute() {
                                //add randoms score to validscores
                                validScores[randomData[0]] = randomData[0];
                                
-                               //output info to pdistrib file
-                               outDist << j+1 << '\t'<< '\t' << randomData[0] << endl;
-                                       
                                delete randT;
                        }
                }else {
@@ -170,7 +161,7 @@ int ParsimonyCommand::execute() {
                //reset randomTree parameter to ""
                globaldata->setRandomTree("");
                //reset groups parameter
-               globaldata->Groups.clear();
+               globaldata->Groups.clear();  globaldata->setGroups("");
                
                return 0;
                
@@ -297,23 +288,29 @@ void ParsimonyCommand::setGroups() {
        try {
                //if the user has not entered specific groups to analyze then do them all
                if (globaldata->Groups.size() != 0) {
-                       //check that groups are valid
-                       for (int i = 0; i < globaldata->Groups.size(); i++) {
-                               if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
-                                       cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
-                                       // erase the invalid group from globaldata->Groups
-                                       globaldata->Groups.erase (globaldata->Groups.begin()+i);
+                       if (globaldata->Groups[0] != "all") {
+                               //check that groups are valid
+                               for (int i = 0; i < globaldata->Groups.size(); i++) {
+                                       if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
+                                               cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
+                                               // erase the invalid group from globaldata->Groups
+                                               globaldata->Groups.erase (globaldata->Groups.begin()+i);
+                                       }
                                }
-                       }
                        
-                       //if the user only entered invalid groups
-                       if (globaldata->Groups.size() == 0) { 
-                               cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
+                               //if the user only entered invalid groups
+                               if (globaldata->Groups.size() == 0) { 
+                                       cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
+                                       for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
+                                               globaldata->Groups.push_back(tmap->namesOfGroups[i]);
+                                       }
+                               }
+                       }else{//user has enter "all" and wants the default groups
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
                                        globaldata->Groups.push_back(tmap->namesOfGroups[i]);
                                }
+                               globaldata->setGroups("");
                        }
-                                       
                }else {
                        for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
                                globaldata->Groups.push_back(tmap->namesOfGroups[i]);
index 2881dc61e200540588691de3f205420139510561..ba30d4016886123c1d8b33967899d699619b30b4 100644 (file)
@@ -32,7 +32,7 @@ class ParsimonyCommand : public Command {
                TreeMap* tmap; 
                TreeMap* savetmap;
                Parsimony* pars;
-               string parsFile, sumFile, distFile, randomtree;
+               string parsFile, sumFile, randomtree;
                int iters, numGroups;
                vector<int> numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib.
                vector<float> userTreeScores; //scores for users trees
@@ -47,7 +47,7 @@ class ParsimonyCommand : public Command {
                map<int, float>::iterator it;
                map<int, float>::iterator it2;
                
-               ofstream out, outSum, outDist;
+               ofstream out, outSum;
                
                void printParsimonyFile();  
                void printUSummaryFile();
index fef08ff5268517744f2b185317352df28e607dd2..4ec2773d38eba93933015d8be8a77b8d078124bc 100644 (file)
@@ -20,49 +20,8 @@ UnifracWeightedCommand::UnifracWeightedCommand() {
                openOutputFile(weightedFile, out);
                sumFile = globaldata->getTreeFile() + ".wsummary";
                openOutputFile(sumFile, outSum);
-               distFile = globaldata->getTreeFile() + ".wdistrib";
-               openOutputFile(distFile, outDist);
-               
-               //if the user has not entered specific groups to analyze then do them all
-               if (globaldata->Groups.size() == 0) {
-                       numGroups = tmap->getNumGroups();
-               }else {
-                       //check that groups are valid
-                       for (int i = 0; i < globaldata->Groups.size(); i++) {
-                               if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
-                                       cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
-                                       // erase the invalid group from globaldata->Groups
-                                       globaldata->Groups.erase (globaldata->Groups.begin()+i);
-                               }
-                       }
-                       
-                       //if the user only entered invalid groups
-                       if (globaldata->Groups.size() == 0) { 
-                               numGroups = tmap->getNumGroups();
-                               cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
-                       }else if (globaldata->Groups.size() == 1) { 
-                               cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
-                               numGroups = tmap->getNumGroups();
-                               globaldata->Groups.clear();
-                       }else { numGroups = globaldata->Groups.size(); }
-               }
-               
-               //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
-               numComp = 0;
-               int n = 1;
-               for (int i=1; i<numGroups; i++) { 
-                       numComp += i; 
-                       for (int l = n; l < numGroups; l++) {
-                               //set group comparison labels
-                               if (globaldata->Groups.size() != 0) {
-                                       groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]);
-                               }else {
-                                       groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
-                               }
-                       }
-                       n++;
-               }
-                       
+                               
+               setGroups();    //sets the groups the user wants to analyze                     
                convert(globaldata->getIters(), iters);  //how many random trees to generate
                weighted = new Weighted(tmap);
 
@@ -88,11 +47,6 @@ int UnifracWeightedCommand::execute() {
                totalrscoreFreq.resize(numComp); 
                uCumul.resize(numComp);         
                
-               //format output
-               outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-               outDist << "Tree#" << '\t' << "Iter" << '\t' << "Groups"<< '\t' << "WScore" << endl;
-
-               
                //create new tree with same num nodes and leaves as users
                randT = new Tree();
                
@@ -141,9 +95,6 @@ int UnifracWeightedCommand::execute() {
                                        
                                        //add random score to valid scores
                                        validScores[p][randomData[p]] = randomData[p];
-                                       
-                                       //output info to uwdistrib file
-                                       outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << groupComb[p] << '\t'<< randomData[p] << endl;
                                }
                        }
                        
@@ -198,9 +149,6 @@ int UnifracWeightedCommand::execute() {
                printWeightedFile();
                printWSummaryFile();
                
-               //reset randomTree parameter to 0
-               globaldata->setRandomTree("0");
-               
                //clear out users groups
                globaldata->Groups.clear();
                
@@ -255,6 +203,7 @@ void UnifracWeightedCommand::printWSummaryFile() {
        try {
                //column headers
                outSum << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" <<  endl;
+               cout << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" <<  endl;
                
                //format output
                outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
@@ -264,6 +213,7 @@ void UnifracWeightedCommand::printWSummaryFile() {
                for (int i = 0; i < T.size(); i++) { 
                        for (int j = 0; j < numComp; j++) {
                                outSum << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl; 
+                               cout << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl; 
                                count++;
                        }
                }
@@ -307,3 +257,61 @@ void UnifracWeightedCommand::saveRandomScores() {
 }
 
 /***********************************************************/
+void UnifracWeightedCommand::setGroups() {
+       try {
+               //if the user has not entered specific groups to analyze then do them all
+               if (globaldata->Groups.size() == 0) {
+                       numGroups = tmap->getNumGroups();
+               }else {
+                       if (globaldata->getGroups() != "all") {
+                               //check that groups are valid
+                               for (int i = 0; i < globaldata->Groups.size(); i++) {
+                                       if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
+                                               cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
+                                               // erase the invalid group from globaldata->Groups
+                                               globaldata->Groups.erase (globaldata->Groups.begin()+i);
+                                       }
+                               }
+                       
+                               //if the user only entered invalid groups
+                               if (globaldata->Groups.size() == 0) { 
+                                       numGroups = tmap->getNumGroups();
+                                       cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl; 
+                               }else if (globaldata->Groups.size() == 1) { 
+                                       cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
+                                       numGroups = tmap->getNumGroups();
+                                       globaldata->Groups.clear();
+                               }else { numGroups = globaldata->Groups.size(); }
+                       }else { //users wants all groups
+                               numGroups = tmap->getNumGroups();
+                               globaldata->Groups.clear();
+                               globaldata->setGroups("");
+                       }
+               }
+               
+               //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
+               numComp = 0;
+               int n = 1;
+               for (int i=1; i<numGroups; i++) { 
+                       numComp += i; 
+                       for (int l = n; l < numGroups; l++) {
+                               //set group comparison labels
+                               if (globaldata->Groups.size() != 0) {
+                                       groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]);
+                               }else {
+                                       groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
+                               }
+                       }
+                       n++;
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+
index 1b8b12bf789da9892a78017e7f888e0ad54ace01..9deef732bfe100e27b99fb2e30551de0256e453e 100644 (file)
@@ -34,7 +34,7 @@ class UnifracWeightedCommand : public Command {
                Tree* randT;  //random tree
                TreeMap* tmap;
                Weighted* weighted;
-               string weightedFile, sumFile, distFile;
+               string weightedFile, sumFile;
                int iters, numGroups, numComp;
                EstOutput userData;                     //weighted score info for user tree
                EstOutput randomData;           //weighted score info for random trees
@@ -47,11 +47,12 @@ class UnifracWeightedCommand : public Command {
                map<float, float>::iterator it;
                map<float, float>::iterator it2;
                
-               ofstream outSum, outDist, out;
+               ofstream outSum, out;
                
                void printWSummaryFile();
                void printWeightedFile();  
-               void saveRandomScores(); 
+               void saveRandomScores();
+               void setGroups(); 
 };
 
 
index bcdee111a4763db3dae42d5750066e7ab47417cd..b8e8974657127994a919490bc02cf2564e4179a8 100644 (file)
@@ -15,6 +15,7 @@ EstOutput Weighted::getValues(Tree* t) {
     try {
                globaldata = GlobalData::getInstance();
                int numGroups;
+               vector<double> D;
                
                //if the user has not entered specific groups to analyze then do them all
                if (globaldata->Groups.size() == 0) {
@@ -30,56 +31,66 @@ EstOutput Weighted::getValues(Tree* t) {
                                //initialize weighted scores
                                if (globaldata->Groups.size() == 0) {
                                        WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] = 0.0;
+                                       D.push_back(0.0000);  //initialize a spot in D for each combination
                                }else {
                                        WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] = 0.0;
+                                       D.push_back(0.0000); //initialize a spot in D for each combination
                                }
                        }
                }
-
-               data.clear(); //clear out old values
-       
-               double D = 0.0000;
-       
-               for(int i=0;i<t->getNumLeaves();i++){
-                       int index = i;
-                       double sum = 0.0000;
                
-                       //while you aren't at root
-                       while(t->tree[index].getParent() != -1){
-               
-                               if(t->tree[index].pGroups.size() != 0){
-                                       sum += t->tree[index].getBranchLength();
-                               }
-                       
-                               //old_index = you
-                               int old_index = index; 
-                               //index = your parent   
-                               index = t->tree[index].getParent();
-                       
-                               //if you grandparent is the root 
-                               if(t->tree[index].getParent() == -1 && t->tree[old_index].pGroups.size() != 0){
-                                       int lc = t->tree[t->tree[index].getLChild()].pGroups.size();
-                                       int rc = t->tree[t->tree[index].getRChild()].pGroups.size();
-                               
-                               
-                                       if(lc == 0 || rc == 0){
-                                               sum -= t->tree[old_index].getBranchLength();
-                                       }
-                               }
-                       }
-       
-                       if(t->tree[i].getGroup() != ""){
-                               sum /= (double)tmap->seqsPerGroup[t->tree[i].getGroup()];
-                               D += sum; 
-                       }
-               }
-       
+               data.clear(); //clear out old values
        
                for(int i=0;i<t->getNumNodes();i++){
                        //calculate weighted score for each of the group comb i.e. with groups A,B,C = AB, AC, BC.
                        n = 1;
                        for (int b=1; b<numGroups; b++) { 
                                for (int l = n; l < numGroups; l++) {
+                               
+                               /********************************************************/
+                               //calculate a D value for each group combo
+                                       for(int v=0;v<t->getNumLeaves();v++){
+                                               int index = v;
+                                               double sum = 0.0000;
+               
+                                               //while you aren't at root
+                                               while(t->tree[index].getParent() != -1){
+                                                       
+                                                       //if you have a BL
+                                                       if(t->tree[index].getBranchLength() != -1){
+                                                               sum += t->tree[index].getBranchLength();
+                                                       }
+                       
+                                                       index = t->tree[index].getParent();
+                                               }
+                                               
+                                               //get last breanch length added
+                                               if(t->tree[index].getBranchLength() != -1){
+                                                               sum += t->tree[index].getBranchLength();
+                                               }
+                                               
+                                               if (globaldata->Groups.size() == 0) {
+                                                       //is this sum from a sequence which is in one of the users groups
+                                                       if (inUsersGroups(t->tree[v].getGroup(), tmap->namesOfGroups) == true) {
+                                                               //is this sum from a sequence which is in this groupCombo
+                                                               if ((t->tree[v].getGroup() == tmap->namesOfGroups[b-1]) || (t->tree[v].getGroup() == tmap->namesOfGroups[l])) {
+                                                                       sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+                                                                       D[n-1] += sum; 
+                                                               }
+                                                       }
+                                               }else {
+                                                       //is this sum from a sequence which is in one of the users groups
+                                                       if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
+                                                               //is this sum from a sequence which is in this groupCombo
+                                                               if ((t->tree[v].getGroup() == globaldata->Groups[b-1]) || (t->tree[v].getGroup() == globaldata->Groups[l])) {
+                                                                       sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+                                                                       D[n-1] += sum; 
+                                                               }
+                                                       }
+                                               }
+                                       }
+                               /*********************************************************/
+                               //calculate a u value for each combo
                                        double u;
                                        //the user has not entered specific groups
                                        if (globaldata->Groups.size() == 0) {
@@ -123,6 +134,7 @@ EstOutput Weighted::getValues(Tree* t) {
                                                //save groupcombs u value
                                                WScore[globaldata->Groups[b-1]+globaldata->Groups[l]] += u;
                                        }
+                               /*********************************************************/
                                }
                                n++;
                        }
@@ -135,9 +147,9 @@ EstOutput Weighted::getValues(Tree* t) {
                        for (int l = n; l < numGroups; l++) {
                                //the user has not entered specific groups
                                if (globaldata->Groups.size() == 0) {
-                                       UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D);
+                                       UN = (WScore[tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]] / D[n-1]);
                                }else {//they have entered specific groups
-                                       UN = (WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] / D);
+                                       UN = (WScore[globaldata->Groups[i-1]+globaldata->Groups[l]] / D[n-1]);
                                }
                                if (isnan(UN) || isinf(UN)) { UN = 0; } 
                                data.push_back(UN);