From: westcott Date: Fri, 3 Dec 2010 11:08:08 +0000 (+0000) Subject: fixed craig nelsons weighted bug and paralellized parsimony X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=c53ef46b40b97c00e32bfd8c3924ce8c51b5cd7b fixed craig nelsons weighted bug and paralellized parsimony --- diff --git a/bayesian.cpp b/bayesian.cpp index 1eee7f2..df83cde 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -15,7 +15,7 @@ Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) : Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { try { - + /************calculate the probablity that each word will be in a specific taxonomy*************/ string tfileroot = tfile.substr(0,tfile.find_last_of(".")+1); string tempfileroot = m->getRootName(m->getSimpleName(tempFile)); @@ -205,6 +205,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { /**************************************************************************************************/ Bayesian::~Bayesian() { try { + delete phyloTree; if (database != NULL) { delete database; } } @@ -223,7 +224,7 @@ string Bayesian::getTaxonomy(Sequence* seq) { //get words contained in query //getKmerString returns a string where the index in the string is hte kmer number //and the character at that index can be converted to be the number of times that kmer was seen - + string queryKmerString = kmer.getKmerString(seq->getUnaligned()); vector queryKmers; @@ -235,14 +236,16 @@ string Bayesian::getTaxonomy(Sequence* seq) { if (queryKmers.size() == 0) { m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; } + int index = getMostProbableTaxonomy(queryKmers); if (m->control_pressed) { return tax; } -//cout << seq->getName() << '\t' << index << endl; + //bootstrap - to set confidenceScore int numToSelect = queryKmers.size() / 8; + tax = bootstrapResults(queryKmers, index, numToSelect); - + return tax; } catch(exception& e) { @@ -255,6 +258,17 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { try { map confidenceScores; + + //initialize confidences to 0 + int seqIndex = tax; + TaxNode seq = phyloTree->get(tax); + confidenceScores[tax] = 0; + + while (seq.level != 0) { //while you are not at the root + seqIndex = seq.parent; + confidenceScores[seqIndex] = 0; + seq = phyloTree->get(seq.parent); + } map::iterator itBoot; map::iterator itBoot2; @@ -264,7 +278,6 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { if (m->control_pressed) { return "control"; } vector temp; - for (int j = 0; j < numToSelect; j++) { int index = int(rand() % kmers.size()); @@ -274,17 +287,15 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { //get taxonomy int newTax = getMostProbableTaxonomy(temp); + //int newTax = 1; TaxNode taxonomyTemp = phyloTree->get(newTax); - + //add to confidence results while (taxonomyTemp.level != 0) { //while you are not at the root - itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on - if (itBoot2 == confidenceScores.end()) { //not already in confidence scores - confidenceScores[newTax] = 1; - }else{ - confidenceScores[newTax]++; + if (itBoot2 != confidenceScores.end()) { //this is a classification we need a confidence for + (itBoot2->second)++; } newTax = taxonomyTemp.parent; @@ -305,7 +316,7 @@ string Bayesian::bootstrapResults(vector kmers, int tax, int numToSelect) { int confidence = 0; if (itBoot2 != confidenceScores.end()) { //already in confidence scores - confidence = confidenceScores[seqTaxIndex]; + confidence = itBoot2->second; } if (((confidence/(float)iters) * 100) >= confidenceThreshold) { @@ -339,7 +350,7 @@ int Bayesian::getMostProbableTaxonomy(vector queryKmer) { for (int i = 0; i < queryKmer.size(); i++) { prob += wordGenusProb[queryKmer[i]][k]; } - + //is this the taxonomy with the greatest probability? if (prob > maxProbability) { indexofGenus = genusNodes[k]; diff --git a/bayesian.h b/bayesian.h index 012def9..a1f693b 100644 --- a/bayesian.h +++ b/bayesian.h @@ -25,7 +25,7 @@ public: private: vector< vector > wordGenusProb; //vector of maps from genus to probability - //wordGenusProb[0][392] = probability that a sequence within genus that's index in the tree is 392 would contain kmer 0; + //wordGenusProb[0][392] = probability that a sequence within genus that's index in the tree is 392 would contain kmer 0; vector genusTotals; vector genusNodes; //indexes in phyloTree where genus' are located diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 2d38387..9030948 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -829,13 +829,14 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT bool done = false; int count = 0; - + while (!done) { if (m->control_pressed) { return 0; } Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA); if (candidateSeq->getName() != "") { + taxonomy = classify->getTaxonomy(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 0; } @@ -867,7 +868,7 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT } //report progress if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); } - + inFASTA.close(); outTax.close(); outTaxSimple.close(); diff --git a/mothur b/mothur index 6bdaec6..39270c0 100755 Binary files a/mothur and b/mothur differ diff --git a/parsimony.cpp b/parsimony.cpp index efc7d3a..4ec5e1b 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -11,106 +11,200 @@ /**************************************************************************************************/ -EstOutput Parsimony::getValues(Tree* t) { +EstOutput Parsimony::getValues(Tree* t, int p, string o) { try { globaldata = GlobalData::getInstance(); - vector groups; - - copyTree = new Tree(); + processors = p; + outputDir = o; //if the users enters no groups then give them the score of all groups int numGroups = globaldata->Groups.size(); //calculate number of comparsions int numComp = 0; + vector< vector > namesOfGroupCombos; for (int r=0; r groups; groups.push_back(globaldata->Groups[r]); groups.push_back(globaldata->Groups[l]); + //cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl; + namesOfGroupCombos.push_back(groups); } } //numComp+1 for AB, AC, BC, ABC - data.resize(numComp+1,0); - - int count = 0; - for (int a=0; aGroups[a]); groups.push_back(globaldata->Groups[l]); - - //copy users tree so that you can redo pgroups - copyTree->getCopy(t); - - //create pgroups that reflect the groups the user want to use - for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ - copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups)); - } - - for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ - - if (m->control_pressed) { return data; } - - int lc = copyTree->tree[i].getLChild(); - int rc = copyTree->tree[i].getRChild(); - - int iSize = copyTree->tree[i].pGroups.size(); - int rcSize = copyTree->tree[rc].pGroups.size(); - int lcSize = copyTree->tree[lc].pGroups.size(); - - //if isize are 0 then that branch is to be ignored - if (iSize == 0) { } - else if ((rcSize == 0) || (lcSize == 0)) { } - //if you have more groups than either of your kids then theres been a change. - else if(iSize > rcSize || iSize > lcSize){ - score++; - } - } - - data[count] = score; - count++; - groups.clear(); - } - } - if (numComp != 1) { + vector groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { if (tmap->namesOfGroups[i] != "xxx") { groups.push_back(tmap->namesOfGroups[i]); + //cout << tmap->namesOfGroups[i] << endl; } } + namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < globaldata->Groups.size(); i++) { groups.push_back(globaldata->Groups[i]); + //cout << globaldata->Groups[i] << endl; + } + namesOfGroupCombos.push_back(groups); + } + } + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + }else{ + lines.clear(); + int numPairs = namesOfGroupCombos.size(); + + int numPairsPerProcessor = numPairs / processors; + + for (int i = 0; i < processors; i++) { + int startPos = i * numPairsPerProcessor; + + if(i == processors - 1){ + numPairsPerProcessor = numPairs - i * numPairsPerProcessor; } + + lines.push_back(linePair(startPos, numPairsPerProcessor)); } + data = createProcesses(t, namesOfGroupCombos); + } + #else + data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size()); + #endif + + return data; + + } + catch(exception& e) { + m->errorOut(e, "Parsimony", "getValues"); + exit(1); + } +} +/**************************************************************************************************/ + +EstOutput Parsimony::createProcesses(Tree* t, vector< vector > namesOfGroupCombos) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 1; + int num = 0; + vector processIDS; + + EstOutput results; + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + EstOutput myresults; + myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num); + + if (m->control_pressed) { exit(0); } + + //pass numSeqs to parent + ofstream out; + string tempFile = outputDir + toString(getpid()) + ".pars.results.temp"; + m->openOutputFile(tempFile, out); + out << myresults.size() << endl; + for (int i = 0; i < myresults.size(); i++) { out << myresults[i] << '\t'; } out << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num); + + //force parent to wait until all the processes are done + for (int i=0;icontrol_pressed) { return results; } + + //get data created by processes + for (int i=0;iopenInputFile(s, in); + + //get scores + if (!in.eof()) { + int num; + in >> num; m->gobble(in); + + if (m->control_pressed) { break; } + + double w; + for (int j = 0; j < num; j++) { + in >> w; + results.push_back(w); + } + m->gobble(in); + } + in.close(); + remove(s.c_str()); + } + + return results; +#endif + } + catch(exception& e) { + m->errorOut(e, "Parsimony", "createProcesses"); + exit(1); + } +} +/**************************************************************************************************/ +EstOutput Parsimony::driver(Tree* t, vector< vector > namesOfGroupCombos, int start, int num) { + try { + + EstOutput results; results.resize(num); + + Tree* copyTree = new Tree(); + int count = 0; + + for (int h = start; h < (start+num); h++) { + + if (m->control_pressed) { delete copyTree; return results; } + + int score = 0; + + //groups in this combo + vector groups = namesOfGroupCombos[h]; + //copy users tree so that you can redo pgroups copyTree->getCopy(t); - int score = 0; - + //create pgroups that reflect the groups the user want to use for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups)); } - -// map::iterator it; for(int i=copyTree->getNumLeaves();igetNumNodes();i++){ - + if (m->control_pressed) { return data; } int lc = copyTree->tree[i].getLChild(); int rc = copyTree->tree[i].getRChild(); - + int iSize = copyTree->tree[i].pGroups.size(); int rcSize = copyTree->tree[rc].pGroups.size(); int lcSize = copyTree->tree[lc].pGroups.size(); - //if isize are 0 then that branch is to be ignored if (iSize == 0) { } else if ((rcSize == 0) || (lcSize == 0)) { } @@ -119,17 +213,17 @@ EstOutput Parsimony::getValues(Tree* t) { score++; } } - - data[count] = score; - + + results[count] = score; + count++; } - + delete copyTree; - - return data; + + return results; } catch(exception& e) { - m->errorOut(e, "Parsimony", "getValues"); + m->errorOut(e, "Parsimony", "driver"); exit(1); } } diff --git a/parsimony.h b/parsimony.h index 74ebefd..fc905f2 100644 --- a/parsimony.h +++ b/parsimony.h @@ -22,15 +22,25 @@ class Parsimony : public TreeCalculator { public: Parsimony(TreeMap* t) : tmap(t) {}; ~Parsimony() {}; - EstOutput getValues(Tree*); + EstOutput getValues(Tree*, int, string); //EstOutput getValues(Tree*, string, string) { return data; } private: + struct linePair { + int start; + int num; + linePair(int i, int j) : start(i), num(j) {} + }; + vector lines; + GlobalData* globaldata; - Tree* copyTree; EstOutput data; TreeMap* tmap; - map::iterator it; + int processors; + string outputDir; + + EstOutput driver(Tree*, vector< vector >, int, int); + EstOutput createProcesses(Tree*, vector< vector >); }; /***********************************************************************/ diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 3d6818b..c97e92e 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -12,7 +12,7 @@ //********************************************************************************************************************** vector ParsimonyCommand::getValidParameters(){ try { - string Array[] = {"random","groups","iters","outputdir","inputdir"}; + string Array[] = {"random","groups","iters","processors","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -69,7 +69,7 @@ ParsimonyCommand::ParsimonyCommand(string option) { else { //valid paramters for this command - string Array[] = {"random","groups","iters","outputdir","inputdir"}; + string Array[] = {"random","groups","processors","iters","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -96,7 +96,7 @@ ParsimonyCommand::ParsimonyCommand(string option) { } //if the user changes the output directory command factory will send this info to us in the output parameter - string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; if (randomtree == "") { outputDir += m->hasPath(globaldata->inputFileName); } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... @@ -109,6 +109,9 @@ ParsimonyCommand::ParsimonyCommand(string option) { itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } convert(itersString, iters); + + string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + convert(temp, processors); if (abort == false) { //randomtree will tell us if user had their own treefile or if they just want the random distribution @@ -162,10 +165,11 @@ ParsimonyCommand::ParsimonyCommand(string option) { void ParsimonyCommand::help(){ try { m->mothurOut("The parsimony command can only be executed after a successful read.tree command, unless you use the random parameter.\n"); - m->mothurOut("The parsimony command parameters are random, groups and iters. No parameters are required.\n"); + m->mothurOut("The parsimony command parameters are random, groups, processors and iters. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n"); m->mothurOut("The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n"); m->mothurOut("The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n"); + m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n"); m->mothurOut("Example parsimony(random=out, iters=500).\n"); m->mothurOut("The default value for random is "" (meaning you want to use the trees in your inputfile, randomtree=out means you just want the random distribution of trees outputted to out.rd_parsimony),\n"); m->mothurOut("and iters is 1000. The parsimony command output two files: .parsimony and .psummary their descriptions are in the manual.\n"); @@ -209,7 +213,7 @@ int ParsimonyCommand::execute() { if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { - userData = pars->getValues(T[i]); //data = AB, AC, BC, ABC. + userData = pars->getValues(T[i], processors, outputDir); //data = AB, AC, BC, ABC. if (m->control_pressed) { delete reading; delete pars; delete util; delete output; @@ -247,7 +251,7 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); //get pscore of random tree - randomData = pars->getValues(randT); + randomData = pars->getValues(randT, processors, outputDir); if (m->control_pressed) { delete reading; delete pars; delete util; delete output; delete randT; @@ -296,7 +300,7 @@ int ParsimonyCommand::execute() { //get pscore of random tree - randomData = pars->getValues(randT); + randomData = pars->getValues(randT, processors, outputDir); if (m->control_pressed) { delete reading; delete pars; delete util; delete output; delete randT; diff --git a/parsimonycommand.h b/parsimonycommand.h index 04acd30..9e756b8 100644 --- a/parsimonycommand.h +++ b/parsimonycommand.h @@ -44,7 +44,7 @@ private: Parsimony* pars; vector groupComb; // AB. AC, BC... string sumFile, randomtree, allGroups, outputDir; - int iters, numGroups, numComp, counter; + int iters, numGroups, numComp, counter, processors; vector numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib. vector< vector > userTreeScores; //scores for users trees for each comb. vector< vector > UScoreSig; //tree score signifigance when compared to random trees - percentage of random trees with that score or lower. diff --git a/weighted.cpp b/weighted.cpp index 1ac2746..1ce6230 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -184,7 +184,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, D[count] += weightedSum; } - + //adding the wieghted sums from group l for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l map::iterator it = t->tree[t->groupNodeInfo[groupB][j]].pcount.find(groupB); @@ -195,6 +195,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, D[count] += weightedSum; } + count++; } @@ -205,8 +206,6 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, m->mothurOut("Processing combo: " + toString(h)); m->mothurOutEndLine(); int numLeaves = t->getNumLeaves(); - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize string groupA = namesOfGroupCombos[h][0]; string groupB = namesOfGroupCombos[h][1]; @@ -217,13 +216,12 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, if (m->control_pressed) { return data; } double u; - int pcountSize = 0; + //int pcountSize = 0; //does this node have descendants from groupA it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA]; - pcountSize++; }else { u = 0.00; } @@ -232,40 +230,14 @@ EstOutput Weighted::driver(Tree* t, vector< vector > namesOfGroupCombos, //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; - pcountSize++; } - u = abs(u * t->tree[i].getBranchLength()); - - nodePcountSize[i] = pcountSize; - - //if you are a leaf from a users group add to total - if (i < numLeaves) { - if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { - //cout << "added to total" << endl; - WScore[(groupA+groupB)] += u; - } - tempTotals[i] = 0.0; //we don't care about you, or we have already added you - }else{ //if you are not a leaf - //do both your chidren have have descendants from the users groups? - int lc = t->tree[i].getLChild(); - int rc = t->tree[i].getRChild(); - - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - WScore[(groupA+groupB)] += tempTotals[lc] + tempTotals[rc]; - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = u; - }else { - tempTotals[i] = 0.0; - } - }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0; //we don't care about you - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[i] = tempTotals[lc] + tempTotals[rc] + u; - } - //cout << "temptotal = "<< tempTotals[i] << endl; + //if this is not the root then add it + if (rootForGrouping[namesOfGroupCombos[h]].count(i) == 0) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; } + } } @@ -327,66 +299,35 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { } int numLeaves = t->getNumLeaves(); - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize //calculate u for the group comb for(int i=0;igetNumNodes();i++){ - + if (m->control_pressed) { return data; } double u; - int pcountSize = 0; + //int pcountSize = 0; //does this node have descendants from groupA it = t->tree[i].pcount.find(groupA); //if it does u = # of its descendants with a certain group / total number in tree with a certain group if (it != t->tree[i].pcount.end()) { u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA]; - pcountSize++; }else { u = 0.00; } - - + + //does this node have descendants from group l it = t->tree[i].pcount.find(groupB); //if it does subtract their percentage from u if (it != t->tree[i].pcount.end()) { u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB]; - pcountSize++; } - - u = abs(u * t->tree[i].getBranchLength()); - nodePcountSize[i] = pcountSize; - - //if you are a leaf from a users group add to total - if (i < numLeaves) { - if ((t->tree[i].getBranchLength() != -1) && pcountSize != 0) { - //cout << "added to total" << endl; - WScore[(groupA+groupB)] += u; - } - tempTotals[i] = 0.0; //we don't care about you, or we have already added you - }else{ //if you are not a leaf - //do both your chidren have have descendants from the users groups? - int lc = t->tree[i].getLChild(); - int rc = t->tree[i].getRChild(); - - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - WScore[(groupA+groupB)] += tempTotals[lc] + tempTotals[rc]; - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; - if (t->tree[i].getBranchLength() != -1) { - tempTotals[i] = u; - }else { - tempTotals[i] = 0.0; - } - }else if ((nodePcountSize[lc] == 0) && (nodePcountSize[rc] == 0)) { tempTotals[i] = 0.0; //we don't care about you - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[i] = tempTotals[lc] + tempTotals[rc] + u; - } - //cout << "temptotal = "<< tempTotals[i] << endl; + //if this is not the root then add it + if (rootForGrouping[groups].count(i) == 0) { + u = abs(u * t->tree[i].getBranchLength()); + WScore[(groupA+groupB)] += u; } - } - + } /********************************************************/ //calculate weighted score for the group combination @@ -408,72 +349,66 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) { try { double sum = 0.0; - map tempTotals; //maps node to total Branch Length - map nodePcountSize; //maps node to pcountSize - map::iterator itCount; - int index = v; //you are a leaf if(t->tree[index].getBranchLength() != -1){ sum += abs(t->tree[index].getBranchLength()); } - tempTotals[index] = 0.0; + double tempTotal = 0.0; index = t->tree[index].getParent(); + + vector grouping; grouping.push_back(groupA); grouping.push_back(groupB); + + rootForGrouping[grouping].insert(index); //while you aren't at root while(t->tree[index].getParent() != -1){ if (m->control_pressed) { return sum; } - int pcountSize = 0; - map::iterator itGroup = t->tree[index].pcount.find(groupA); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - itGroup = t->tree[index].pcount.find(groupB); - if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } - - nodePcountSize[index] = pcountSize; - - //do both your chidren have have descendants from the users groups? - int lc = t->tree[index].getLChild(); - int rc = t->tree[index].getRChild(); + //am I the root for this grouping? if so I want to stop "early" + //does my sibling have descendants from the users groups? + int parent = t->tree[index].getParent(); + int lc = t->tree[parent].getLChild(); + int rc = t->tree[parent].getRChild(); - itCount = nodePcountSize.find(lc); - if (itCount == nodePcountSize.end()) { - int LpcountSize = 0; - itGroup = t->tree[lc].pcount.find(groupA); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - itGroup = t->tree[lc].pcount.find(groupB); - if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } - nodePcountSize[lc] = LpcountSize; - } + int sib = lc; + if (lc == index) { sib = rc; } - itCount = nodePcountSize.find(rc); - if (itCount == nodePcountSize.end()) { - int RpcountSize = 0; - itGroup = t->tree[rc].pcount.find(groupA); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - itGroup = t->tree[rc].pcount.find(groupB); - if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } - nodePcountSize[rc] = RpcountSize; - } - - //if yes, add your childrens tempTotals - if ((nodePcountSize[lc] != 0) && (nodePcountSize[rc] != 0)) { - sum += tempTotals[lc] + tempTotals[rc]; - - //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; + map::iterator itGroup; + int pcountSize = 0; + itGroup = t->tree[sib].pcount.find(groupA); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + itGroup = t->tree[sib].pcount.find(groupB); + if (itGroup != t->tree[sib].pcount.end()) { pcountSize++; } + + //if yes, I am not the root so add me + if (pcountSize != 0) { if (t->tree[index].getBranchLength() != -1) { - tempTotals[index] = abs(t->tree[index].getBranchLength()); + sum += abs(t->tree[index].getBranchLength()) + tempTotal; + tempTotal = 0.0; }else { - tempTotals[index] = 0.0; + sum += tempTotal; + tempTotal = 0.0; + } + rootForGrouping[grouping].clear(); + rootForGrouping[grouping].insert(parent); + }else { //if no, I may be the root so add my br to tempTotal until I am proven innocent + if (t->tree[index].getBranchLength() != -1) { + tempTotal += abs(t->tree[index].getBranchLength()); } - }else { //if no, your tempTotal is your childrens temp totals + your branch length - tempTotals[index] = tempTotals[lc] + tempTotals[rc] + abs(t->tree[index].getBranchLength()); } - //cout << "temptotal = "<< tempTotals[i] << endl; - index = t->tree[index].getParent(); + index = parent; } - + + //get all nodes above the root to add so we don't add their u values above + index = *(rootForGrouping[grouping].begin()); + while(t->tree[index].getParent() != -1){ + int parent = t->tree[index].getParent(); + rootForGrouping[grouping].insert(parent); + index = parent; + } + return sum; } catch(exception& e) { @@ -484,4 +419,3 @@ double Weighted::getLengthToRoot(Tree* t, int v, string groupA, string groupB) { /**************************************************************************************************/ - diff --git a/weighted.h b/weighted.h index ca9f037..11cd26b 100644 --- a/weighted.h +++ b/weighted.h @@ -40,6 +40,7 @@ class Weighted : public TreeCalculator { map WScore; //a score for each group combination i.e. AB, AC, BC. int processors; string outputDir; + map< vector, set > rootForGrouping; //maps a grouping combo to the root for that combo EstOutput driver(Tree*, vector< vector >, int, int); EstOutput createProcesses(Tree*, vector< vector >);