X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=unifracunweightedcommand.cpp;h=c345ca16bc478ee9635846c54abb8ab68ec67b69;hb=fc7cf3aac8fd6106fd725b43baa8ab5ca6f836f8;hp=9a1ad0c5c0b548fbedd5f453bffa07cbf3784b6e;hpb=cfeefe1f7d36e9ffd3fba5f2f2b906724393e442;p=mothur.git diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 9a1ad0c..c345ca1 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -18,10 +18,14 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { tmap = globaldata->gTreemap; unweightedFile = globaldata->getTreeFile() + ".unweighted"; openOutputFile(unweightedFile, out); + //column headers + out << "Comb" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + sumFile = globaldata->getTreeFile() + ".uwsummary"; openOutputFile(sumFile, outSum); - distFile = globaldata->getTreeFile() + ".uwdistrib"; - openOutputFile(distFile, outDist); + //column headers + outSum << "Tree#" << '\t' << "Comb" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl; + setGroups(); //sets users groups to analyze convert(globaldata->getIters(), iters); //how many random trees to generate unweighted = new Unweighted(tmap); @@ -39,112 +43,104 @@ UnifracUnweightedCommand::UnifracUnweightedCommand() { /***********************************************************/ int UnifracUnweightedCommand::execute() { try { - - //get unweighted for users tree - userData.resize(1,0); //data[0] = unweightedscore - randomData.resize(1,0); //data[0] = unweightedscore - - //format output - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - - outDist << "Groups Used "; - for (int m = 0; m < globaldata->Groups.size(); m++) { - outDist << globaldata->Groups[m] << " "; - } - outDist << endl; - - outDist << "Tree#" << '\t' << "Iter" << '\t' << "UWScore" << endl; - + + userData.resize(numComp,0); //data[0] = unweightedscore + randomData.resize(numComp,0); //data[0] = unweightedscore //create new tree with same num nodes and leaves as users randT = new Tree(); //get pscores for users trees for (int i = 0; i < T.size(); i++) { + //get unweighted for users tree + rscoreFreq.resize(numComp); + uscoreFreq.resize(numComp); + rCumul.resize(numComp); + uCumul.resize(numComp); + validScores.resize(numComp); + utreeScores.resize(numComp); + UWScoreSig.resize(numComp); + cout << "Processing tree " << i+1 << endl; + outSum << "Tree#" << i+1 << endl; + out << "Tree#" << i+1 << endl; userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore - //update uscoreFreq - it = uscoreFreq.find(userData[0]); - if (it == uscoreFreq.end()) {//new score - uscoreFreq[userData[0]] = 1; - }else{ uscoreFreq[userData[0]]++; } + //output scores for each combination + for(int k = 0; k < numComp; k++) { + //update uscoreFreq + it = uscoreFreq[k].find(userData[k]); + if (it == uscoreFreq[k].end()) {//new score + uscoreFreq[k][userData[k]] = 1; + }else{ uscoreFreq[k][userData[k]]++; } - //add users score to valid scores - validScores[userData[0]] = userData[0]; + //add users score to valid scores + validScores[k][userData[k]] = userData[k]; - //saves users score - utreeScores.push_back(userData[0]); + //saves users score + utreeScores[k].push_back(userData[k]); + } //copy T[i]'s info. randT->getCopy(T[i]); //get unweighted scores for random trees for (int j = 0; j < iters; j++) { - //create a random tree with same topology as T[i], but different labels - randT->assembleRandomUnifracTree(); - //get pscore of random tree - randomData = unweighted->getValues(randT); + int count = 0; + for (int r=0; rgetValues(randT, "", ""); - //add trees unweighted score to map of scores - it2 = rscoreFreq.find(randomData[0]); - if (it2 != rscoreFreq.end()) {//already have that score - rscoreFreq[randomData[0]]++; - }else{//first time we have seen this score - rscoreFreq[randomData[0]] = 1; - } + //add trees unweighted score to map of scores + it2 = rscoreFreq[count].find(randomData[count]); + if (it2 != rscoreFreq[count].end()) {//already have that score + rscoreFreq[count][randomData[count]]++; + }else{//first time we have seen this score + rscoreFreq[count][randomData[count]] = 1; + } - //add randoms score to validscores - validScores[randomData[0]] = randomData[0]; - - //output info to uwdistrib file - outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl; + //add randoms score to validscores + validScores[count][randomData[count]] = randomData[count]; + count++; + } + } } - - saveRandomScores(); //save all random scores for unweighted file - - //find the signifigance of the score + + for(int a = 0; a < numComp; a++) { + float ucumul = 1.0000; float rcumul = 1.0000; - for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { - rCumul[it->first] = rcumul; + //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. + for (it = validScores[a].begin(); it != validScores[a].end(); it++) { + it2 = uscoreFreq[a].find(it->first); + //make uCumul map + uCumul[a][it->first] = ucumul; + //user data has that score + if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul-= it2->second; } + else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score + + //make rscoreFreq map and rCumul + it2 = rscoreFreq[a].find(it->first); + rCumul[a][it->first] = rcumul; //get percentage of random trees with that info - rscoreFreq[it->first] /= iters; - rcumul-= it->second; + if (it2 != rscoreFreq[a].end()) { rscoreFreq[a][it->first] /= iters; rcumul-= it2->second; } + else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score } - - //save the signifigance of the users score for printing later - UWScoreSig.push_back(rCumul[userData[0]]); - - - //clear random data - rscoreFreq.clear(); //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees. - rCumul.clear(); - } - - float ucumul = 1.0000; - float rcumul = 1.0000; - //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (it = validScores.begin(); it != validScores.end(); it++) { - it2 = uscoreFreq.find(it->first); - //make uCumul map - uCumul[it->first] = ucumul; - //user data has that score - if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul-= it2->second; } - else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score - - //make rscoreFreq map and rCumul - it2 = totalrscoreFreq.find(it->first); - rCumul[it->first] = rcumul; - //get percentage of random trees with that info - if (it2 != totalrscoreFreq.end()) { totalrscoreFreq[it->first] /= (iters*T.size()); rcumul-= it2->second; } - else { totalrscoreFreq[it->first] = 0.0000; } //no random trees with that score - + UWScoreSig[a].push_back(rCumul[a][userData[a]]); } printUnweightedFile(); printUWSummaryFile(); + rscoreFreq.clear(); + uscoreFreq.clear(); + rCumul.clear(); + uCumul.clear(); + validScores.clear(); + utreeScores.clear(); + UWScoreSig.clear(); + } //reset groups parameter - globaldata->Groups.clear(); + globaldata->Groups.clear(); globaldata->setGroups(""); delete randT; @@ -163,24 +159,15 @@ int UnifracUnweightedCommand::execute() { /***********************************************************/ void UnifracUnweightedCommand::printUnweightedFile() { try { - //column headers - - out << "Groups Used "; - for (int m = 0; m < globaldata->Groups.size(); m++) { - out << globaldata->Groups[m] << " "; - } - out << endl; - - out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; - //format output out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - //print each line - for (it = validScores.begin(); it != validScores.end(); it++) { - out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << totalrscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; - } - + for(int a = 0; a < numComp; a++) { + //print each line + for (it = validScores[a].begin(); it != validScores[a].end(); it++) { + out << setprecision(6) << groupComb[a] << '\t' << it->first << '\t' << '\t' << uscoreFreq[a][it->first] << '\t' << uCumul[a][it->first] << '\t' << rscoreFreq[a][it->first] << '\t' << rCumul[a][it->first] << endl; + } + } out.close(); } @@ -197,22 +184,16 @@ void UnifracUnweightedCommand::printUnweightedFile() { /***********************************************************/ void UnifracUnweightedCommand::printUWSummaryFile() { try { - //column headers - - outSum << "Groups Used "; - for (int m = 0; m < globaldata->Groups.size(); m++) { - outSum << globaldata->Groups[m] << " "; - } - outSum << endl; - - outSum << "Tree#" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl; - + //format output outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint); //print each line for (int i = 0; i< T.size(); i++) { - outSum << setprecision(6) << i+1 << '\t' << '\t' << utreeScores[i] << '\t' << UWScoreSig[i] << endl; + for(int a = 0; a < numComp; a++) { + outSum << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << utreeScores[a][i] << '\t' << UWScoreSig[a][i] << endl; + cout << setprecision(6) << i+1 << '\t' << groupComb[a] << '\t' << '\t' << utreeScores[a][i] << '\t' << UWScoreSig[a][i] << endl; + } } outSum.close(); @@ -226,57 +207,66 @@ void UnifracUnweightedCommand::printUWSummaryFile() { exit(1); } } -/***********************************************************/ -void UnifracUnweightedCommand::saveRandomScores() { - try { - for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { - //does this score already exist in the total map - it2 = totalrscoreFreq.find(it->first); - //if yes then add them - if (it2 != totalrscoreFreq.end()) { - totalrscoreFreq[it->first] += rscoreFreq[it->first]; - }else{ //its a new score - totalrscoreFreq[it->first] = rscoreFreq[it->first]; - } - } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the UnifracUnweightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} - /***********************************************************/ void UnifracUnweightedCommand::setGroups() { try { + string allGroups = ""; + numGroups = 0; //if the user has not entered specific groups to analyze then do them all if (globaldata->Groups.size() != 0) { - //check that groups are valid - for (int i = 0; i < globaldata->Groups.size(); i++) { - if (tmap->isValidGroup(globaldata->Groups[i]) != true) { - cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; - // erase the invalid group from globaldata->Groups - globaldata->Groups.erase (globaldata->Groups.begin()+i); + if (globaldata->Groups[0] != "all") { + //check that groups are valid + for (int i = 0; i < globaldata->Groups.size(); i++) { + if (tmap->isValidGroup(globaldata->Groups[i]) != true) { + cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl; + // erase the invalid group from globaldata->Groups + globaldata->Groups.erase(globaldata->Groups.begin()+i); + } } - } - //if the user only entered invalid groups - if (globaldata->Groups.size() == 0) { - cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + //if the user only entered invalid groups + if (globaldata->Groups.size() == 0) { + cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; + for (int i = 0; i < tmap->namesOfGroups.size(); i++) { + globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i]; + } + }else { + for (int i = 0; i < globaldata->Groups.size(); i++) { + allGroups += tmap->namesOfGroups[i]; + numGroups++; + } + } + }else{//user has enter "all" and wants the default groups for (int i = 0; i < tmap->namesOfGroups.size(); i++) { globaldata->Groups.push_back(tmap->namesOfGroups[i]); + numGroups++; + allGroups += tmap->namesOfGroups[i]; } + globaldata->setGroups(""); } - }else { for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - globaldata->Groups.push_back(tmap->namesOfGroups[i]); + allGroups += tmap->namesOfGroups[i]; } + numGroups = 1; + } + + //calculate number of comparsions + numComp = 0; + for (int r=0; rGroups[r]+globaldata->Groups[l]); + numComp++; + } + } + + //ABC + if (numComp != 1) { + groupComb.push_back(allGroups); + numComp++; } } catch(exception& e) {