X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=parsimonycommand.cpp;h=c7340c7b1704c585b70e2677c0701aaf7edb4e33;hb=9651e8e7172d86707b34af15e95ec60ad4c3c3f9;hp=4632019151fa0a93d7dba5c14017940d04aa1653;hpb=d037597badc8d18e235c59f0c1114180edb7f98f;p=mothur.git diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 4632019..c7340c7 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -15,27 +15,28 @@ ParsimonyCommand::ParsimonyCommand() { globaldata = GlobalData::getInstance(); //randomtree will tell us if user had their own treefile or if they just want the random distribution - convert(globaldata->getRandomTree(), randomtree); + randomtree = globaldata->getRandomTree(); //user has entered their own tree - if (randomtree == 0) { + if (randomtree == "") { T = globaldata->gTree; tmap = globaldata->gTreemap; parsFile = globaldata->getTreeFile() + ".parsimony"; - openOutputFile(parsFile, out); + parsFileout = globaldata->getTreeFile() + "temp" + ".parsimony"; sumFile = globaldata->getTreeFile() + ".psummary"; openOutputFile(sumFile, outSum); - distFile = globaldata->getTreeFile() + ".pdistrib"; - openOutputFile(distFile, outDist); - }else { //user wants random distribution + savetmap = globaldata->gTreemap; getUserInput(); - parsFile = "rd_parsimony"; - openOutputFile(parsFile, out); + parsFile = randomtree; + parsFileout = globaldata->getTreeFile() + "temp"; } + //set users groups to analyze + setGroups(); convert(globaldata->getIters(), iters); //how many random trees to generate pars = new Parsimony(tmap); + counter = 0; } catch(exception& e) { @@ -50,32 +51,36 @@ ParsimonyCommand::ParsimonyCommand() { /***********************************************************/ int ParsimonyCommand::execute() { try { - + //get pscore for users tree - userData.resize(1,0); //data[0] = pscore. - randomData.resize(1,0); //data[0] = pscore. - - //format output - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - outDist << "RandomTree#" << '\t' << "ParsScore" << endl; - - if (randomtree == 0) { + userData.resize(numComp,0); //data = AB, AC, BC, ABC. + randomData.resize(numComp,0); //data = AB, AC, BC, ABC. + rscoreFreq.resize(numComp); + uscoreFreq.resize(numComp); + rCumul.resize(numComp); + uCumul.resize(numComp); + userTreeScores.resize(numComp); + UScoreSig.resize(numComp); + + if (randomtree == "") { //get pscores for users trees for (int i = 0; i < T.size(); i++) { - cout << "Processing tree " << i+1 << endl; - userData = pars->getValues(T[i]); //userData[0] = pscore - //update uscoreFreq - it = uscoreFreq.find(userData[0]); - if (it == uscoreFreq.end()) {//new score - uscoreFreq[userData[0]] = 1; - }else{ uscoreFreq[userData[0]]++; } - - //add users score to valid scores - validScores[userData[0]] = userData[0]; + userData = pars->getValues(T[i]); //data = AB, AC, BC, ABC. - //save score for summary file - userTreeScores.push_back(userData[0]); - + //output scores for each combination + for(int k = 0; k < numComp; k++) { + //update uscoreFreq + it = uscoreFreq[k].find(userData[k]); + if (it == uscoreFreq[k].end()) {//new score + uscoreFreq[k][userData[k]] = 1; + }else{ uscoreFreq[k][userData[k]]++; } + + //add users score to valid scores + validScores[userData[k]] = userData[k]; + + //save score for summary file + userTreeScores[k].push_back(userData[k]); + } } //get pscores for random trees @@ -86,21 +91,20 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); //get pscore of random tree randomData = pars->getValues(randT); + + for(int r = 0; r < numComp; r++) { + //add trees pscore to map of scores + it2 = rscoreFreq[r].find(randomData[r]); + if (it2 != rscoreFreq[r].end()) {//already have that score + rscoreFreq[r][randomData[r]]++; + }else{//first time we have seen this score + rscoreFreq[r][randomData[r]] = 1; + } - //add trees pscore to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; + //add randoms score to validscores + validScores[randomData[r]] = randomData[r]; } - - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; - //output info to pdistrib file - outDist << j+1 << '\t'<< '\t' << randomData[0] << endl; - delete randT; } }else { @@ -112,54 +116,62 @@ int ParsimonyCommand::execute() { randT->assembleRandomTree(); //get pscore of random tree randomData = pars->getValues(randT); + + for(int r = 0; r < numComp; r++) { + //add trees pscore to map of scores + it2 = rscoreFreq[r].find(randomData[r]); + if (it2 != rscoreFreq[r].end()) {//already have that score + rscoreFreq[r][randomData[r]]++; + }else{//first time we have seen this score + rscoreFreq[r][randomData[r]] = 1; + } - //add trees pscore to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; + //add randoms score to validscores + validScores[randomData[r]] = randomData[r]; } - - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; - + delete randT; } } - float rcumul = 0.0000; - float ucumul = 0.0000; - - //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (it = validScores.begin(); it != validScores.end(); it++) { - if (randomtree == 0) { - it2 = uscoreFreq.find(it->first); - //user data has that score - if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second; } - else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score - //make uCumul map - uCumul[it->first] = ucumul; + for(int a = 0; a < numComp; a++) { + float rcumul = 0.0000; + float ucumul = 0.0000; + //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. + for (it = validScores.begin(); it != validScores.end(); it++) { + if (randomtree == "") { + it2 = uscoreFreq[a].find(it->first); + //user data has that score + if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second; } + else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score + //make uCumul map + uCumul[a][it->first] = ucumul; + } + + //make rscoreFreq map and rCumul + it2 = rscoreFreq[a].find(it->first); + //get percentage of random trees with that info + if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul+= it2->second; } + else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score + rCumul[a][it->first] = rcumul; } - //make rscoreFreq map and rCumul - it2 = rscoreFreq.find(it->first); - //get percentage of random trees with that info - if (it2 != rscoreFreq.end()) { rscoreFreq[it->first] /= iters; rcumul+= it2->second; } - else { rscoreFreq[it->first] = 0.0000; } //no random trees with that score - rCumul[it->first] = rcumul; + //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file + for (int h = 0; h < userTreeScores[a].size(); h++) { + UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]); + } } - //find the signifigance of each user trees score when compared to the random trees and save for printing the summary file - for (int h = 0; h < userTreeScores.size(); h++) { - UScoreSig.push_back(rCumul[userTreeScores[h]]); - } - printParsimonyFile(); - printUSummaryFile(); + if (randomtree == "") { printUSummaryFile(); } + + //reset globaldata's treemap if you just did random distrib + if (randomtree != "") { globaldata->gTreemap = savetmap; } - //reset randomTree parameter to 0 - globaldata->setRandomTree("0"); + //reset randomTree parameter to "" + globaldata->setRandomTree(""); + //reset groups parameter + globaldata->Groups.clear(); globaldata->setGroups(""); return 0; @@ -177,27 +189,29 @@ int ParsimonyCommand::execute() { /***********************************************************/ void ParsimonyCommand::printParsimonyFile() { try { - //column headers - if (randomtree == 0) { - out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; - }else { - out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; - } + vector data; //format output out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - - //print each line - for (it = validScores.begin(); it != validScores.end(); it++) { - if (randomtree == 0) { - out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - }else{ - out << setprecision(6) << it->first << '\t' << '\t' << rscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - } - } + + for(int a = 0; a < numComp; a++) { + initFile(groupComb[a]); + //print each line + for (it = validScores.begin(); it != validScores.end(); it++) { + if (randomtree == "") { + data.push_back(it->first); data.push_back(uscoreFreq[a][it->first]); data.push_back(uCumul[a][it->first]); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); + }else{ + data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); + } + output(data); + data.clear(); + } + resetFile(); + } out.close(); - + inFile.close(); + remove(parsFileout.c_str()); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function printParsimonyFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -212,14 +226,24 @@ void ParsimonyCommand::printParsimonyFile() { void ParsimonyCommand::printUSummaryFile() { try { //column headers - outSum << "Tree#" << '\t' << "ParsScore" << '\t' << '\t' << "ParsSig" << endl; + outSum << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl; + cout << "Tree#" << '\t' << "Groups" << '\t' << "ParsScore" << '\t' << "ParsSig" << endl; //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); + //print each line for (int i = 0; i< T.size(); i++) { - outSum << setprecision(6) << i+1 << '\t' << '\t' << userTreeScores[i] << '\t' << UScoreSig[i] << endl; + for(int a = 0; a < numComp; a++) { + if (UScoreSig[a][i] > (1/(float)iters)) { + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << UScoreSig[a][i] << endl; + }else { + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << "<" << (1/float(iters)) << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << userTreeScores[a][i] << setprecision(globaldata->getIters().length()) << '\t' << "<" << (1/float(iters)) << endl; + } + } } outSum.close(); @@ -254,6 +278,7 @@ void ParsimonyCommand::getUserInput() { //set tmaps seqsPerGroup tmap->seqsPerGroup[toString(i)] = num; + tmap->namesOfGroups.push_back(toString(i)); //set tmaps namesOfSeqs for (int j = 0; j < num; j++) { @@ -282,3 +307,178 @@ void ParsimonyCommand::getUserInput() { } /***********************************************************/ +void ParsimonyCommand::setGroups() { + try { + string allGroups = ""; + numGroups = 0; + //if the user has not entered specific groups to analyze then do them all + if (globaldata->Groups.size() != 0) { + if (globaldata->Groups[0] != "all") { + //check that groups are valid + for (int i = 0; i < globaldata->Groups.size(); i++) { + if (tmap->isValidGroup(globaldata->Groups[i]) != true) { + cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; + // erase the invalid group from globaldata->Groups + globaldata->Groups.erase(globaldata->Groups.begin()+i); + } + } + + //if the user only entered invalid groups + if (globaldata->Groups.size() == 0) { + cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i] + "-"; + } + allGroups = allGroups.substr(0, allGroups.length()-1); + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + allGroups += globaldata->Groups[i] + "-"; + numGroups++; + } + allGroups = allGroups.substr(0, allGroups.length()-1); + } + }else{//user has enter "all" and wants the default groups + globaldata->Groups.clear(); + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i] + "-"; + } + allGroups = allGroups.substr(0, allGroups.length()-1); + globaldata->setGroups(""); + } + }else { + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + allGroups += tmap->namesOfGroups[i] + "-"; + } + allGroups = allGroups.substr(0, allGroups.length()-1); + numGroups = 1; + } + + //calculate number of comparsions + numComp = 0; + for (int r=0; rGroups[r]+ "-" +globaldata->Groups[l]); + numComp++; + } + } + + //ABC + if (numComp != 1) { + groupComb.push_back(allGroups); + numComp++; + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ParsimonyCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} +/*****************************************************************/ + +void ParsimonyCommand::initFile(string label){ + try { + if(counter != 0){ + openOutputFile(parsFileout, out); + openInputFile(parsFile, inFile); + + string inputBuffer; + getline(inFile, inputBuffer); + + if (randomtree == "") { + out << inputBuffer << '\t' << label + "Score" << '\t' << label + "UserFreq" << '\t' << label + "UserCumul" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl; + }else { + out << inputBuffer << '\t' << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + } + }else{ + openOutputFile(parsFileout, out); + //column headers + if (randomtree == "") { + out << label + "Score" << '\t' << label + "UserFreq" << '\t' << label + "UserCumul" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl; + }else { + out << "Score" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + } + } + + out.setf(ios::fixed, ios::floatfield); + out.setf(ios::showpoint); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ParsimonyCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + +void ParsimonyCommand::output(vector data){ + try { + if(counter != 0){ + string inputBuffer; + getline(inFile, inputBuffer); + + if (randomtree == "") { + out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << '\t' << data[3] << '\t' << data[4] << endl; + }else{ + out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl; + } + } + else{ + if (randomtree == "") { + out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << '\t' << data[3] << '\t' << data[4] << endl; + }else{ + out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl; + } + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ParsimonyCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + +void ParsimonyCommand::resetFile(){ + try { + if(counter != 0){ + out.close(); + inFile.close(); + } + else{ + out.close(); + } + counter = 1; + + remove(parsFile.c_str()); + rename(parsFileout.c_str(), parsFile.c_str()); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ParsimonyCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ParsimonyCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +