X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=unifracunweightedcommand.cpp;h=9d170631592cc9faf9c54fbd5bde5c710efe05f6;hb=c5c7502f435e1413c19e373dab1dfebcaa67588d;hp=8adff2d896f811d46e80b806acf45261d759c69b;hpb=d037597badc8d18e235c59f0c1114180edb7f98f;p=mothur.git diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 8adff2d..9d17063 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -16,13 +16,21 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { T = globaldata->gTree; tmap = globaldata->gTreemap; - unweightedFile = globaldata->getTreeFile() + ".unweighted"; - openOutputFile(unweightedFile, out); sumFile = globaldata->getTreeFile() + ".uwsummary"; openOutputFile(sumFile, outSum); - distFile = globaldata->getTreeFile() + ".uwdistrib"; - openOutputFile(distFile, outDist); + util = new SharedUtil(); + util->setGroups(globaldata->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze + util->getCombos(groupComb, globaldata->Groups, numComp); + globaldata->setGroups(""); + + //ABC + if (numComp != 1) { + groupComb.push_back(allGroups); + numComp++; + } + + convert(globaldata->getIters(), iters); //how many random trees to generate unweighted = new Unweighted(tmap); @@ -39,105 +47,86 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { /***********************************************************/ int UnifracUnweightedCommand::execute() { try { + + userData.resize(numComp,0); //data[0] = unweightedscore + randomData.resize(numComp,0); //data[0] = unweightedscore + //create new tree with same num nodes and leaves as users - //get unweighted for users tree - userData.resize(1,0); //data[0] = unweightedscore - randomData.resize(1,0); //data[0] = unweightedscore - - //format output - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - outDist << "Tree#" << '\t' << "Iter" << '\t' << "UWScore" << endl; + outSum << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" <<'\t' << "UWSig" << endl; + cout << "Tree#" << '\t' << "Groups" << '\t' << "UWScore" << '\t' << "UWSig" << endl; - //create new tree with same num nodes and leaves as users - randT = new Tree(); - //get pscores for users trees for (int i = 0; i < T.size(); i++) { - cout << "Processing tree " << i+1 << endl; - userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore + counter = 0; + unweightedFile = globaldata->getTreeFile() + toString(i+1) + ".unweighted"; + unweightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".unweighted"; - //update uscoreFreq - it = uscoreFreq.find(userData[0]); - if (it == uscoreFreq.end()) {//new score - uscoreFreq[userData[0]] = 1; - }else{ uscoreFreq[userData[0]]++; } + outSum << i+1 << '\t'; + cout << i+1 << '\t'; - //add users score to valid scores - validScores[userData[0]] = userData[0]; - - //saves users score - utreeScores.push_back(userData[0]); + //get unweighted for users tree + rscoreFreq.resize(numComp); + rCumul.resize(numComp); + utreeScores.resize(numComp); + UWScoreSig.resize(numComp); + + userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore - //copy T[i]'s info. - randT->getCopy(T[i]); + //output scores for each combination + for(int k = 0; k < numComp; k++) { + //saves users score + utreeScores[k].push_back(userData[k]); + + } //get unweighted scores for random trees for (int j = 0; j < iters; j++) { - //create a random tree with same topology as T[i], but different labels - randT->assembleRandomUnifracTree(); - //get pscore of random tree - randomData = unweighted->getValues(randT); - - //add trees unweighted score to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; - } + //we need a different getValues because when we swap the labels we only want to swap those in each parwise comparison + randomData = unweighted->getValues(T[i], "", ""); - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; + for(int k = 0; k < numComp; k++) { + //add trees unweighted score to map of scores + it2 = rscoreFreq[k].find(randomData[k]); + if (it2 != rscoreFreq[k].end()) {//already have that score + rscoreFreq[k][randomData[k]]++; + }else{//first time we have seen this score + rscoreFreq[k][randomData[k]] = 1; + } + + //add randoms score to validscores + validScores[randomData[k]] = randomData[k]; + } - //output info to uwdistrib file - outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl; } - - //find the signifigance of the score - float rcumul = 0.0000; - for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { + + for(int a = 0; a < numComp; a++) { + float rcumul = 1.0000; + //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. + for (it = validScores.begin(); it != validScores.end(); it++) { + //make rscoreFreq map and rCumul + it2 = rscoreFreq[a].find(it->first); + rCumul[a][it->first] = rcumul; //get percentage of random trees with that info - rscoreFreq[it->first] /= iters; - rcumul+= it->second; - rCumul[it->first] = rcumul; + if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; } + else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score } - - //save the signifigance of the users score for printing later - UWScoreSig.push_back(rCumul[userData[0]]); - - saveRandomScores(); //save all random scores for unweighted file - - //clear random data - rscoreFreq.clear(); //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees. - rCumul.clear(); + UWScoreSig[a].push_back(rCumul[a][userData[a]]); } - float ucumul = 0.0000; - float rcumul = 0.0000; - //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (it = validScores.begin(); it != validScores.end(); it++) { - it2 = uscoreFreq.find(it->first); - //user data has that score - if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul+= it2->second; } - else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score - //make uCumul map - uCumul[it->first] = ucumul; - - //make rscoreFreq map and rCumul - it2 = totalrscoreFreq.find(it->first); - //get percentage of random trees with that info - if (it2 != totalrscoreFreq.end()) { totalrscoreFreq[it->first] /= (iters*T.size()); rcumul+= it2->second; } - else { totalrscoreFreq[it->first] = 0.0000; } //no random trees with that score - rCumul[it->first] = rcumul; - } + printUnweightedFile(); printUWSummaryFile(); - //reset randomTree parameter to 0 - globaldata->setRandomTree("0"); - - delete randT; + rscoreFreq.clear(); + rCumul.clear(); + validScores.clear(); + utreeScores.clear(); + UWScoreSig.clear(); + } + //reset groups parameter + globaldata->Groups.clear(); + outSum.close(); return 0; @@ -154,19 +143,22 @@ int UnifracUnweightedCommand::execute() { /***********************************************************/ void UnifracUnweightedCommand::printUnweightedFile() { try { - //column headers + vector data; - out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; - - //format output - out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - - //print each line - for (it = validScores.begin(); it != validScores.end(); it++) { - out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << totalrscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - } + for(int a = 0; a < numComp; a++) { + initFile(groupComb[a]); + //print each line + for (it = validScores.begin(); it != validScores.end(); it++) { + data.push_back(it->first); data.push_back(rscoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]); + output(data); + data.clear(); + } + resetFile(); + } out.close(); + inFile.close(); + remove(unweightedFileout.c_str()); } catch(exception& e) { @@ -182,18 +174,22 @@ void UnifracUnweightedCommand::printUnweightedFile() { /***********************************************************/ void UnifracUnweightedCommand::printUWSummaryFile() { try { - //column headers - outSum << "Tree#" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl; - + //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); - + //print each line - for (int i = 0; i< T.size(); i++) { - outSum << setprecision(6) << i+1 << '\t' << '\t' << utreeScores[i] << '\t' << UWScoreSig[i] << endl; + + for(int a = 0; a < numComp; a++) { + if (UWScoreSig[a][0] > (1/(float)iters)) { + outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << UWScoreSig[a][0] << endl; + cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << UWScoreSig[a][0] << endl; + }else { + outSum << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; + cout << setprecision(6) << groupComb[a] << '\t' << utreeScores[a][0] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << endl; + } } - outSum.close(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -204,29 +200,85 @@ void UnifracUnweightedCommand::printUWSummaryFile() { exit(1); } } -/***********************************************************/ -void UnifracUnweightedCommand::saveRandomScores() { +/*****************************************************************/ + +void UnifracUnweightedCommand::initFile(string label){ try { - //update total map with new random scores - for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { - //does this score already exist in the total map - it2 = totalrscoreFreq.find(it->first); - //if yes then add them - if (it2 != totalrscoreFreq.end()) { - it2->second += it->second; - }else{ //its a new score - totalrscoreFreq[it->first] = 1; - } + if(counter != 0){ + openOutputFile(unweightedFileout, out); + openInputFile(unweightedFile, inFile); + + string inputBuffer; + getline(inFile, inputBuffer); + + out << inputBuffer << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl; + }else{ + openOutputFile(unweightedFileout, out); + out << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl; } + + out.setf(ios::fixed, ios::floatfield); + out.setf(ios::showpoint); } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the UnifracUnweightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the UnifracUnweightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } -/***********************************************************/ \ No newline at end of file +/***********************************************************************/ + +void UnifracUnweightedCommand::output(vector data){ + try { + if(counter != 0){ + string inputBuffer; + getline(inFile, inputBuffer); +// out << inputBuffer << setprecision(6) << '\t' << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl; + + out << inputBuffer << '\t' << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl; + } + else{ + out << setprecision(6) << data[0] << setprecision(globaldata->getIters().length()) << '\t' << data[1] << '\t' << data[2] << endl; + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the UnifracUnweightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +}; + +/***********************************************************************/ + +void UnifracUnweightedCommand::resetFile(){ + try { + if(counter != 0){ + out.close(); + inFile.close(); + } + else{ + out.close(); + } + counter = 1; + + remove(unweightedFile.c_str()); + rename(unweightedFileout.c_str(), unweightedFile.c_str()); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the UnifracUnweightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +