]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed craig nelsons weighted bug and paralellized parsimony
authorwestcott <westcott>
Fri, 3 Dec 2010 11:08:08 +0000 (11:08 +0000)
committerwestcott <westcott>
Fri, 3 Dec 2010 11:08:08 +0000 (11:08 +0000)
bayesian.cpp
bayesian.h
classifyseqscommand.cpp
mothur
parsimony.cpp
parsimony.h
parsimonycommand.cpp
parsimonycommand.h
weighted.cpp
weighted.h

index 1eee7f22fe3d75f206eacec5169c999432336ff4..df83cde86fa61b87b5498c0eb35569a92bb9bef4 100644 (file)
@@ -15,7 +15,7 @@
 Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) : 
 Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
        try {
-                                       
+               
                /************calculate the probablity that each word will be in a specific taxonomy*************/
                string tfileroot = tfile.substr(0,tfile.find_last_of(".")+1);
                string tempfileroot = m->getRootName(m->getSimpleName(tempFile));
@@ -205,6 +205,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
 /**************************************************************************************************/
 Bayesian::~Bayesian() {
        try {
+               
                 delete phyloTree; 
                 if (database != NULL) {  delete database; }
        }
@@ -223,7 +224,7 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                //get words contained in query
                //getKmerString returns a string where the index in the string is hte kmer number 
                //and the character at that index can be converted to be the number of times that kmer was seen
-
+               
                string queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
 
                vector<int> queryKmers;
@@ -235,14 +236,16 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                
                if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; }
                
+               
                int index = getMostProbableTaxonomy(queryKmers);
                
                if (m->control_pressed) { return tax; }
-//cout << seq->getName() << '\t' << index << endl;                                     
+                                       
                //bootstrap - to set confidenceScore
                int numToSelect = queryKmers.size() / 8;
+       
                tax = bootstrapResults(queryKmers, index, numToSelect);
-                                               
+                               
                return tax;     
        }
        catch(exception& e) {
@@ -255,6 +258,17 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
        try {
                                
                map<int, int> confidenceScores; 
+               
+               //initialize confidences to 0 
+               int seqIndex = tax;
+               TaxNode seq = phyloTree->get(tax);
+               confidenceScores[tax] = 0;
+               
+               while (seq.level != 0) { //while you are not at the root
+                       seqIndex = seq.parent;
+                       confidenceScores[seqIndex] = 0;
+                       seq = phyloTree->get(seq.parent);
+               }
                                
                map<int, int>::iterator itBoot;
                map<int, int>::iterator itBoot2;
@@ -264,7 +278,6 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                        if (m->control_pressed) { return "control"; }
                        
                        vector<int> temp;
-                                               
                        for (int j = 0; j < numToSelect; j++) {
                                int index = int(rand() % kmers.size());
                                
@@ -274,17 +287,15 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                        
                        //get taxonomy
                        int newTax = getMostProbableTaxonomy(temp);
+                       //int newTax = 1;
                        TaxNode taxonomyTemp = phyloTree->get(newTax);
-       
+                       
                        //add to confidence results
                        while (taxonomyTemp.level != 0) { //while you are not at the root
-                               
                                itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on
                                
-                               if (itBoot2 == confidenceScores.end()) { //not already in confidence scores
-                                       confidenceScores[newTax] = 1;
-                               }else{
-                                       confidenceScores[newTax]++;
+                               if (itBoot2 != confidenceScores.end()) { //this is a classification we need a confidence for
+                                       (itBoot2->second)++;
                                }
                                
                                newTax = taxonomyTemp.parent;
@@ -305,7 +316,7 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                
                                int confidence = 0;
                                if (itBoot2 != confidenceScores.end()) { //already in confidence scores
-                                       confidence = confidenceScores[seqTaxIndex];
+                                       confidence = itBoot2->second;
                                }
                                
                                if (((confidence/(float)iters) * 100) >= confidenceThreshold) {
@@ -339,7 +350,7 @@ int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
                        for (int i = 0; i < queryKmer.size(); i++) {
                                prob += wordGenusProb[queryKmer[i]][k];
                        }
-               
+                       
                        //is this the taxonomy with the greatest probability?
                        if (prob > maxProbability) { 
                                indexofGenus = genusNodes[k];
index 012def96d8d2d04c8c258963668e261f125a9871..a1f693bb8f815dd0a6cb1d6155e9c032caf6fcea 100644 (file)
@@ -25,7 +25,7 @@ public:
        
 private:
        vector< vector<float> > wordGenusProb;  //vector of maps from genus to probability
-                                                                                               //wordGenusProb[0][392] = probability that a sequence within genus that's index in the tree is 392 would contain kmer 0;
+                                                                               //wordGenusProb[0][392] = probability that a sequence within genus that's index in the tree is 392 would contain kmer 0;
        
        vector<int> genusTotals;
        vector<int> genusNodes;  //indexes in phyloTree where genus' are located
index 2d3838774afe52a382c52426e6db4243ebd64c87..9030948074003f663978ab10ff0128adc0ad7c37 100644 (file)
@@ -829,13 +829,14 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
 
                bool done = false;
                int count = 0;
-       
+               
                while (!done) {
                        if (m->control_pressed) { return 0; }
                
                        Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
                        
                        if (candidateSeq->getName() != "") {
+                       
                                taxonomy = classify->getTaxonomy(candidateSeq);
                                
                                if (m->control_pressed) { delete candidateSeq; return 0; }
@@ -867,7 +868,7 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                }
                //report progress
                if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
-                               
+                       
                inFASTA.close();
                outTax.close();
                outTaxSimple.close();
diff --git a/mothur b/mothur
index 6bdaec62890bdc8c83758a2145d2c63e9b7302d1..39270c06604caadaf65f3297a8bb8839064fcbe7 100755 (executable)
Binary files a/mothur and b/mothur differ
index efc7d3a4dfcdcb9db47dc24de5a7a412953a7aba..4ec5e1ba7d31495ebef7b5efaf2b7e6bbc82ad7c 100644 (file)
 
 /**************************************************************************************************/
 
-EstOutput Parsimony::getValues(Tree* t) {
+EstOutput Parsimony::getValues(Tree* t, int p, string o) {
        try {
                globaldata = GlobalData::getInstance();
-               vector<string> groups;
-               
-               copyTree = new Tree();
+               processors = p;
+               outputDir = o;
                
                //if the users enters no groups then give them the score of all groups
                int numGroups = globaldata->Groups.size();
                
                //calculate number of comparsions
                int numComp = 0;
+               vector< vector<string> > namesOfGroupCombos;
                for (int r=0; r<numGroups; r++) { 
                        for (int l = r+1; l < numGroups; l++) {
                                numComp++;
+                               vector<string> groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]);
+                               //cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl;
+                               namesOfGroupCombos.push_back(groups);
                        }
                }
 
                //numComp+1 for AB, AC, BC, ABC
-               data.resize(numComp+1,0);
-               
-               int count = 0;
-               for (int a=0; a<numGroups; a++) { 
-                       for (int l = 0; l < a; l++) {
-                               int score = 0;
-                               
-                               //groups in this combo
-                               groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
-                               
-                               //copy users tree so that you can redo pgroups 
-                               copyTree->getCopy(t);
-
-                               //create pgroups that reflect the groups the user want to use
-                               for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-                                       copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups));
-                               }
-               
-                               for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-                               
-                                       if (m->control_pressed) { return data; }
-                                       
-                                       int lc = copyTree->tree[i].getLChild();
-                                       int rc = copyTree->tree[i].getRChild();
-                       
-                                       int iSize = copyTree->tree[i].pGroups.size();
-                                       int rcSize = copyTree->tree[rc].pGroups.size();
-                                       int lcSize = copyTree->tree[lc].pGroups.size();
-               
-                                       //if isize are 0 then that branch is to be ignored
-                                       if (iSize == 0) { }
-                                       else if ((rcSize == 0) || (lcSize == 0)) { }
-                                       //if you have more groups than either of your kids then theres been a change.
-                                       else if(iSize > rcSize || iSize > lcSize){
-                                               score++;
-                                       }
-                               } 
-                               
-                               data[count] = score;
-                               count++;
-                               groups.clear();
-                       }
-               }
-               
                if (numComp != 1) {
+                       vector<string> groups;
                        if (numGroups == 0) {
                                //get score for all users groups
                                for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
                                        if (tmap->namesOfGroups[i] != "xxx") {
                                                groups.push_back(tmap->namesOfGroups[i]);
+                                               //cout << tmap->namesOfGroups[i] << endl;
                                        }
                                }
+                               namesOfGroupCombos.push_back(groups);
                        }else {
                                for (int i = 0; i < globaldata->Groups.size(); i++) {
                                        groups.push_back(globaldata->Groups[i]);
+                                       //cout << globaldata->Groups[i] << endl;
+                               }
+                               namesOfGroupCombos.push_back(groups);
+                       }
+               }
+               
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               if(processors == 1){
+                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+               }else{
+                       lines.clear();
+                       int numPairs = namesOfGroupCombos.size();
+                       
+                       int numPairsPerProcessor = numPairs / processors;
+                       
+                       for (int i = 0; i < processors; i++) {
+                               int startPos = i * numPairsPerProcessor;
+                               
+                               if(i == processors - 1){
+                                       numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
                                }
+                               
+                               lines.push_back(linePair(startPos, numPairsPerProcessor));
                        }
                        
+                       data = createProcesses(t, namesOfGroupCombos);
+               }
+       #else
+               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size());
+       #endif
+               
+               return data;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Parsimony", "getValues");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 1;
+               int num = 0;
+               vector<int> processIDS;
+               
+               EstOutput results;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               EstOutput myresults;
+                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num);
+                               
+                               if (m->control_pressed) { exit(0); }
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = outputDir + toString(getpid()) + ".pars.results.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << myresults.size() << endl;
+                               for (int i = 0; i < myresults.size(); i++) {  out << myresults[i] << '\t';  } out << endl;
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0); 
+                       }
+               }
+               
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               if (m->control_pressed) { return results; }
+                       
+               //get data created by processes
+               for (int i=0;i<processIDS.size();i++) { 
+                       ifstream in;
+                       string s = outputDir + toString(processIDS[i]) + ".pars.results.temp";
+                       m->openInputFile(s, in);
+                       
+                       //get scores
+                       if (!in.eof()) {
+                               int num;
+                               in >> num; m->gobble(in);
+                               
+                               if (m->control_pressed) { break; }
+                               
+                               double w; 
+                               for (int j = 0; j < num; j++) {
+                                       in >> w;
+                                       results.push_back(w);
+                               }
+                               m->gobble(in);
+                       }
+                       in.close();
+                       remove(s.c_str());
+               }
+               
+               return results;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Parsimony", "createProcesses");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num) { 
+       try {
+               
+               EstOutput results; results.resize(num);
+               
+               Tree* copyTree = new Tree();
+               int count = 0;
+               
+               for (int h = start; h < (start+num); h++) {
+                                       
+                       if (m->control_pressed) { delete copyTree; return results; }
+       
+                       int score = 0;
+                       
+                       //groups in this combo
+                       vector<string> groups = namesOfGroupCombos[h];
+                       
                        //copy users tree so that you can redo pgroups 
                        copyTree->getCopy(t);
-                       int score = 0;
-               
+                       
                        //create pgroups that reflect the groups the user want to use
                        for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
                                copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups));
                        }
-               
-//                     map<string,int>::iterator it;
                        
                        for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-                       
+                               
                                if (m->control_pressed) { return data; }
                                
                                int lc = copyTree->tree[i].getLChild();
                                int rc = copyTree->tree[i].getRChild();
-                       
+                               
                                int iSize = copyTree->tree[i].pGroups.size();
                                int rcSize = copyTree->tree[rc].pGroups.size();
                                int lcSize = copyTree->tree[lc].pGroups.size();
                                
-                                       
                                //if isize are 0 then that branch is to be ignored
                                if (iSize == 0) { }
                                else if ((rcSize == 0) || (lcSize == 0)) { }
@@ -119,17 +213,17 @@ EstOutput Parsimony::getValues(Tree* t) {
                                        score++;
                                }
                        } 
-               
-                       data[count] = score;
-
+                       
+                       results[count] = score;
+                       count++;
                }
-               
+                                       
                delete copyTree;
-               
-               return data;
+                       
+               return results; 
        }
        catch(exception& e) {
-               m->errorOut(e, "Parsimony", "getValues");
+               m->errorOut(e, "Parsimony", "driver");
                exit(1);
        }
 }
index 74ebefdc2d185eaa325a8ec4beff12fc04733cff..fc905f2d3c432ab1ce4a05007e13ca0f14264fd0 100644 (file)
@@ -22,15 +22,25 @@ class Parsimony : public TreeCalculator  {
        public:
                Parsimony(TreeMap* t) : tmap(t) {};
                ~Parsimony() {};
-               EstOutput getValues(Tree*);
+               EstOutput getValues(Tree*, int, string);
                //EstOutput getValues(Tree*, string, string) { return data; }
                
        private:
+               struct linePair {
+                       int start;
+                       int num;
+                       linePair(int i, int j) : start(i), num(j) {}
+               };
+               vector<linePair> lines;
+       
                GlobalData* globaldata;
-               Tree* copyTree;
                EstOutput data;
                TreeMap* tmap;
-               map<string, int>::iterator it;
+               int processors;
+               string outputDir;
+       
+               EstOutput driver(Tree*, vector< vector<string> >, int, int); 
+               EstOutput createProcesses(Tree*, vector< vector<string> >);
 };
 
 /***********************************************************************/
index 3d6818b30bac00f2a7137e2f158cb0e415106a9b..c97e92ed7253cf1cdf00882a6e1155c1b86fe60a 100644 (file)
@@ -12,7 +12,7 @@
 //**********************************************************************************************************************
 vector<string> ParsimonyCommand::getValidParameters(){ 
        try {
-               string Array[] =  {"random","groups","iters","outputdir","inputdir"};
+               string Array[] =  {"random","groups","iters","processors","outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -69,7 +69,7 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"random","groups","iters","outputdir","inputdir"};
+                       string Array[] =  {"random","groups","processors","iters","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -96,7 +96,7 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                        }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; }
+                       string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found"){  outputDir = ""; if (randomtree == "")  { outputDir += m->hasPath(globaldata->inputFileName); } }
                        
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
@@ -109,6 +109,9 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                                
                        itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
                        convert(itersString, iters); 
+                       
+                       string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = "1";                             }
+                       convert(temp, processors); 
                                                
                        if (abort == false) {
                                //randomtree will tell us if user had their own treefile or if they just want the random distribution
@@ -162,10 +165,11 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
 void ParsimonyCommand::help(){
        try {
                m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n");
-               m->mothurOut("The parsimony command parameters are random, groups and iters.  No parameters are required.\n");
+               m->mothurOut("The parsimony command parameters are random, groups, processors and iters.  No parameters are required.\n");
                m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
                m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
                m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n");
+               m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
                m->mothurOut("Example parsimony(random=out, iters=500).\n");
                m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n");
                m->mothurOut("and iters is 1000.  The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n");
@@ -209,7 +213,7 @@ int ParsimonyCommand::execute() {
                if (randomtree == "") {
                        //get pscores for users trees
                        for (int i = 0; i < T.size(); i++) {
-                               userData = pars->getValues(T[i]);  //data = AB, AC, BC, ABC.
+                               userData = pars->getValues(T[i], processors, outputDir);  //data = AB, AC, BC, ABC.
                                
                                if (m->control_pressed) { 
                                        delete reading; delete pars; delete util; delete output;
@@ -247,7 +251,7 @@ int ParsimonyCommand::execute() {
                                randT->assembleRandomTree();
 
                                //get pscore of random tree
-                               randomData = pars->getValues(randT);
+                               randomData = pars->getValues(randT, processors, outputDir);
                                
                                if (m->control_pressed) { 
                                        delete reading; delete pars; delete util; delete output; delete randT;
@@ -296,7 +300,7 @@ int ParsimonyCommand::execute() {
 
 
                                //get pscore of random tree
-                               randomData = pars->getValues(randT);
+                               randomData = pars->getValues(randT, processors, outputDir);
                                
                                if (m->control_pressed) { 
                                        delete reading; delete pars; delete util; delete output; delete randT;
index 04acd302891f36df23399ac9d9b1694535c7005e..9e756b884fb087f95ca0d43c23027c8caaa33b3f 100644 (file)
@@ -44,7 +44,7 @@ private:
        Parsimony* pars;
        vector<string> groupComb; // AB. AC, BC...
        string sumFile, randomtree, allGroups, outputDir;
-       int iters, numGroups, numComp, counter;
+       int iters, numGroups, numComp, counter, processors;
        vector<int> numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib.
        vector< vector<float> > userTreeScores; //scores for users trees for each comb.
        vector< vector<float> > UScoreSig;  //tree score signifigance when compared to random trees - percentage of random trees with that score or lower.
index 1ac274619bf2c8971ea7f17ddb03c8cf98dde0ed..1ce6230df933eab46bee132a76ec32c2c4e9a666 100644 (file)
@@ -184,7 +184,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                        
                                D[count] += weightedSum;
                        }
-                       
+
                        //adding the wieghted sums from group l
                        for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
                                map<string, int>::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB);
@@ -195,6 +195,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                        
                                D[count] += weightedSum;
                        }
+                       
                        count++;
                }
                
@@ -205,8 +206,6 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                        m->mothurOut("Processing combo: " + toString(h)); m->mothurOutEndLine();
                        
                        int numLeaves = t->getNumLeaves();
-                       map<int, double> tempTotals; //maps node to total Branch Length
-                       map<int, int> nodePcountSize; //maps node to pcountSize
                        
                        string groupA = namesOfGroupCombos[h][0]; 
                        string groupB = namesOfGroupCombos[h][1];
@@ -217,13 +216,12 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                                if (m->control_pressed) { return data; }
                                
                                double u;
-                               int pcountSize = 0;
+                               //int pcountSize = 0;
                                //does this node have descendants from groupA
                                it = t->tree[i].pcount.find(groupA);
                                //if it does u = # of its descendants with a certain group / total number in tree with a certain group
                                if (it != t->tree[i].pcount.end()) {
                                        u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
-                                       pcountSize++;
                                }else { u = 0.00; }
                                
                                
@@ -232,40 +230,14 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                                //if it does subtract their percentage from u
                                if (it != t->tree[i].pcount.end()) {
                                        u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
-                                       pcountSize++;
                                }
                                
-                               u = abs(u * t->tree[i].getBranchLength());
-                               
-                               nodePcountSize[i] = pcountSize;
-                               
-                               //if you are a leaf from a users group add to total
-                               if (i < numLeaves) {
-                                       if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
-                                               //cout << "added to total" << endl; 
-                                               WScore[(groupA+groupB)] += u; 
-                                       }
-                                       tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
-                               }else{ //if you are not a leaf 
-                                       //do both your chidren have have descendants from the users groups? 
-                                       int lc = t->tree[i].getLChild();
-                                       int rc = t->tree[i].getRChild();
-                                       
-                                       //if yes, add your childrens tempTotals
-                                       if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
-                                               WScore[(groupA+groupB)] += tempTotals[lc] + tempTotals[rc]; 
-                                               //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
-                                               if (t->tree[i].getBranchLength() != -1) {
-                                                       tempTotals[i] = u;
-                                               }else {
-                                                       tempTotals[i] = 0.0;
-                                               }
-                                       }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
-                                       }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                                               tempTotals[i] = tempTotals[lc] + tempTotals[rc] + u; 
-                                       }
-                                       //cout << "temptotal = "<< tempTotals[i] << endl;
+                               //if this is not the root then add it
+                               if (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0) {
+                                       u = abs(u * t->tree[i].getBranchLength());
+                                       WScore[(groupA+groupB)] += u; 
                                }
+                               
                        }
                }
                
@@ -327,66 +299,35 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                }
                
                int numLeaves = t->getNumLeaves();
-               map<int, double> tempTotals; //maps node to total Branch Length
-               map<int, int> nodePcountSize; //maps node to pcountSize
                
                //calculate u for the group comb 
                for(int i=0;i<t->getNumNodes();i++){
-               
+                
                        if (m->control_pressed) { return data; }
                        
                        double u;
-                       int pcountSize = 0;
+                       //int pcountSize = 0;
                        //does this node have descendants from groupA
                        it = t->tree[i].pcount.find(groupA);
                        //if it does u = # of its descendants with a certain group / total number in tree with a certain group
                        if (it != t->tree[i].pcount.end()) {
                                u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
-                               pcountSize++;
                        }else { u = 0.00; }
-               
-                                               
+                       
+                       
                        //does this node have descendants from group l
                        it = t->tree[i].pcount.find(groupB);
                        //if it does subtract their percentage from u
                        if (it != t->tree[i].pcount.end()) {
                                u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
-                               pcountSize++;
                        }
-                                               
-                       u = abs(u * t->tree[i].getBranchLength());
                        
-                       nodePcountSize[i] = pcountSize;
-                               
-                       //if you are a leaf from a users group add to total
-                       if (i < numLeaves) {
-                               if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { 
-                               //cout << "added to total" << endl; 
-                                       WScore[(groupA+groupB)] += u; 
-                               }
-                               tempTotals[i] = 0.0;  //we don't care about you, or we have already added you
-                       }else{ //if you are not a leaf 
-                               //do both your chidren have have descendants from the users groups? 
-                               int lc = t->tree[i].getLChild();
-                               int rc = t->tree[i].getRChild();
-                               
-                               //if yes, add your childrens tempTotals
-                               if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
-                                       WScore[(groupA+groupB)] += tempTotals[lc] + tempTotals[rc]; 
-                                       //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
-                                       if (t->tree[i].getBranchLength() != -1) {
-                                               tempTotals[i] = u;
-                                       }else {
-                                               tempTotals[i] = 0.0;
-                                       }
-                               }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0;  //we don't care about you
-                               }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                                       tempTotals[i] = tempTotals[lc] + tempTotals[rc] + u; 
-                               }
-                               //cout << "temptotal = "<< tempTotals[i] << endl;
+                       //if this is not the root then add it
+                       if (rootForGrouping[groups].count(i) == 0) {
+                               u = abs(u * t->tree[i].getBranchLength());
+                               WScore[(groupA+groupB)] += u; 
                        }
-               }
-               
+               }               
                /********************************************************/
                
                //calculate weighted score for the group combination
@@ -408,72 +349,66 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) {
        try {
                
                double sum = 0.0;
-               map<int, double> tempTotals; //maps node to total Branch Length
-               map<int, int> nodePcountSize; //maps node to pcountSize
-               map<int, int>::iterator itCount;
-
                int index = v;
        
                //you are a leaf
                if(t->tree[index].getBranchLength() != -1){     sum += abs(t->tree[index].getBranchLength());   }
-               tempTotals[index] = 0.0;
+               double tempTotal = 0.0;
                index = t->tree[index].getParent();     
+               
+               vector<string> grouping; grouping.push_back(groupA); grouping.push_back(groupB);
+               
+               rootForGrouping[grouping].insert(index);
                        
                //while you aren't at root
                while(t->tree[index].getParent() != -1){
 
                        if (m->control_pressed) {  return sum; }
                                
-                       int pcountSize = 0;
-                       map<string, int>::iterator itGroup = t->tree[index].pcount.find(groupA);
-                       if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
-                       itGroup = t->tree[index].pcount.find(groupB);
-                       if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
-               
-                       nodePcountSize[index] = pcountSize;
-                       
-                       //do both your chidren have have descendants from the users groups? 
-                       int lc = t->tree[index].getLChild();
-                       int rc = t->tree[index].getRChild();
+                       //am I the root for this grouping? if so I want to stop "early"
+                       //does my sibling have descendants from the users groups? 
+                       int parent = t->tree[index].getParent();
+                       int lc = t->tree[parent].getLChild();
+                       int rc = t->tree[parent].getRChild();
                        
-                       itCount = nodePcountSize.find(lc); 
-                       if (itCount == nodePcountSize.end()) { 
-                               int LpcountSize = 0;
-                               itGroup = t->tree[lc].pcount.find(groupA);
-                               if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
-                               itGroup = t->tree[lc].pcount.find(groupB);
-                               if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
-                               nodePcountSize[lc] = LpcountSize;
-                       }
+                       int sib = lc;
+                       if (lc == index) { sib = rc; }
                        
-                       itCount = nodePcountSize.find(rc); 
-                       if (itCount == nodePcountSize.end()) { 
-                               int RpcountSize = 0;
-                               itGroup = t->tree[rc].pcount.find(groupA);
-                               if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
-                               itGroup = t->tree[rc].pcount.find(groupB);
-                               if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
-                               nodePcountSize[rc] = RpcountSize;
-                       }
-                               
-                       //if yes, add your childrens tempTotals
-                       if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) {
-                               sum += tempTotals[lc] + tempTotals[rc]; 
-
-                               //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
+                       map<string, int>::iterator itGroup;
+                       int pcountSize = 0;
+                       itGroup = t->tree[sib].pcount.find(groupA);
+                       if (itGroup != t->tree[sib].pcount.end()) { pcountSize++;  } 
+                       itGroup = t->tree[sib].pcount.find(groupB);
+                       if (itGroup != t->tree[sib].pcount.end()) { pcountSize++;  } 
+                                                       
+                       //if yes, I am not the root so add me
+                       if (pcountSize != 0) {
                                if (t->tree[index].getBranchLength() != -1) {
-                                       tempTotals[index] = abs(t->tree[index].getBranchLength());
+                                       sum += abs(t->tree[index].getBranchLength()) + tempTotal;
+                                       tempTotal = 0.0;
                                }else {
-                                       tempTotals[index] = 0.0;
+                                       sum += tempTotal;
+                                       tempTotal = 0.0;
+                               }
+                               rootForGrouping[grouping].clear();
+                               rootForGrouping[grouping].insert(parent);
+                       }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent
+                               if (t->tree[index].getBranchLength() != -1) {
+                                       tempTotal += abs(t->tree[index].getBranchLength()); 
                                }
-                       }else { //if no, your tempTotal is your childrens temp totals + your branch length
-                               tempTotals[index] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[index].getBranchLength()); 
                        }
-                       //cout << "temptotal = "<< tempTotals[i] << endl;
                        
-                       index = t->tree[index].getParent();     
+                       index = parent; 
                }
-
+               
+               //get all nodes above the root to add so we don't add their u values above
+               index = *(rootForGrouping[grouping].begin());
+               while(t->tree[index].getParent() != -1){
+                       int parent = t->tree[index].getParent();
+                       rootForGrouping[grouping].insert(parent);
+                       index = parent;
+               }
+               
                return sum;
        }
        catch(exception& e) {
@@ -484,4 +419,3 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) {
 /**************************************************************************************************/
 
 
-
index ca9f0378ffc96b545912742e10aab2242651558a..11cd26b2d9642cb2679a0f3dfc06d930c15127f0 100644 (file)
@@ -40,6 +40,7 @@ class Weighted : public TreeCalculator  {
                map<string, double> WScore; //a score for each group combination i.e. AB, AC, BC.
                int processors;
                string outputDir;
+               map< vector<string>, set<int> > rootForGrouping;  //maps a grouping combo to the root for that combo
                
                EstOutput driver(Tree*, vector< vector<string> >, int, int); 
                EstOutput createProcesses(Tree*, vector< vector<string> >);