X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=unifracweightedcommand.cpp;h=8fc5b3bcedd56c76200dfcb8889374906c9f28b3;hp=4ec2773d38eba93933015d8be8a77b8d078124bc;hb=5a3592c6478d5d786ec20e4bee71854ad92fdb8c;hpb=8062b1d0ef60062809448074066f6b6a0f6619a8 diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 4ec2773..8fc5b3b 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -18,6 +18,9 @@ UnifracWeightedCommand::UnifracWeightedCommand() { tmap = globaldata->gTreemap; weightedFile = globaldata->getTreeFile() + ".weighted"; openOutputFile(weightedFile, out); + //column headers + out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; + sumFile = globaldata->getTreeFile() + ".wsummary"; openOutputFile(sumFile, outSum); @@ -42,111 +45,87 @@ int UnifracWeightedCommand::execute() { //get weighted for users tree userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC... randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC... - uscoreFreq.resize(numComp); - validScores.resize(numComp); - totalrscoreFreq.resize(numComp); - uCumul.resize(numComp); - + //create new tree with same num nodes and leaves as users randT = new Tree(); //get pscores for users trees for (int i = 0; i < T.size(); i++) { - rscoreFreq.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... - rCumul.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... - + rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... + uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC... + validScores.resize(numComp); + cout << "Processing tree " << i+1 << endl; userData = weighted->getValues(T[i]); //userData[0] = weightedscore //save users score for (int s=0; sgetCopy(T[i]); - //get scores for random trees for (int j = 0; j < iters; j++) { - //create a random tree with same topology as T[i], but different labels - randT->assembleRandomUnifracTree(); - //get pscore of random tree - randomData = weighted->getValues(randT); - - //save ramdoms score - for (int p=0; pgetCopy(T[i]); + + if (globaldata->Groups.size() != 0) { + //create a random tree with same topology as T[i], but different labels + randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]); + //get wscore of random tree + randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]); + }else { + //create a random tree with same topology as T[i], but different labels + randT->assembleRandomUnifracTree(tmap->namesOfGroups[r], tmap->namesOfGroups[l]); + //get wscore of random tree + randomData = weighted->getValues(randT, tmap->namesOfGroups[r], tmap->namesOfGroups[l]); + } + + //save scores + rScores[count].push_back(randomData[0]); + validScores[count][randomData[0]] = randomData[0]; + count++; } - - //add random score to valid scores - validScores[p][randomData[p]] = randomData[p]; + n++; } } - - saveRandomScores(); //save all random scores for weighted file + + removeValidScoresDuplicates(); //find the signifigance of the score for summary file - for (int t = 0; t < numComp; t++) { - float rcumul = 1.0000; - for (it = validScores[t].begin(); it != validScores[t].end(); it++) { - //make rscoreFreq map and rCumul - it2 = rscoreFreq[t].find(it->first); - rCumul[t][it->first] = rcumul; - //get percentage of random trees with that info - if (it2 != rscoreFreq[t].end()) { rscoreFreq[t][it->first] /= iters; rcumul-= it2->second; } - else { rscoreFreq[t][it->first] = 0.0000; } //no random trees with that score - } - } - - //save the signifigance of the users score for printing later for (int f = 0; f < numComp; f++) { - WScoreSig.push_back(rCumul[f][userData[f]]); - } + //sort random scores + sort(rScores[f].begin(), rScores[f].end()); + + //the index of the score higher than yours is returned + //so if you have 1000 random trees the index returned is 100 + //then there are 900 trees with a score greater then you. + //giving you a signifigance of 0.900 + int index = findIndex(userData[f]); if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code + //the signifigance is the number of trees with the users score or higher + WScoreSig.push_back((iters-index)/(float)iters); + } - //clear random data - rscoreFreq.clear(); - rCumul.clear(); - } - - rCumul.resize(numComp); - for (int b = 0; b < numComp; b++) { - float ucumul = 1.0000; - float rcumul = 1.0000; - //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print. - for (it = validScores[b].begin(); it != validScores[b].end(); it++) { - it2 = uscoreFreq[b].find(it->first); - //make uCumul map - uCumul[b][it->first] = ucumul; - //user data has that score - if (it2 != uscoreFreq[b].end()) { uscoreFreq[b][it->first] /= T.size(); ucumul-= it2->second; } - else { uscoreFreq[b][it->first] = 0.0000; } //no user trees with that score + out << "Tree# " << i << endl; + //printWeightedFile(); - //make rscoreFreq map and rCumul - it2 = totalrscoreFreq[b].find(it->first); - rCumul[b][it->first] = rcumul; - //get percentage of random trees with that info - if (it2 != totalrscoreFreq[b].end()) { totalrscoreFreq[b][it->first] /= (iters * T.size()); rcumul-= it2->second; } - else { totalrscoreFreq[b][it->first] = 0.0000; } //no random trees with that score - } + //clear data + rScores.clear(); + uScores.clear(); + validScores.clear(); } - printWeightedFile(); printWSummaryFile(); //clear out users groups @@ -166,21 +145,18 @@ int UnifracWeightedCommand::execute() { exit(1); } } -/***********************************************************/ +/*********************************************************** void UnifracWeightedCommand::printWeightedFile() { try { - //column headers - - out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl; - + //format output out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //for each group for (int e = 0; e < numComp; e++) { //print each line in that group - for (it = validScores[e].begin(); it != validScores[e].end(); it++) { - out << setprecision(6) << groupComb[e] << '\t' << it->first << '\t' << '\t' << uscoreFreq[e][it->first] << '\t' << uCumul[e][it->first] << '\t' << totalrscoreFreq[e][it->first] << '\t' << rCumul[e][it->first] << endl; + for (i = 0; i < validScores[e].size(); i++) { + out << setprecision(6) << groupComb[e] << '\t' << validScores[e][i] << '\t' << '\t' << uscoreFreq[e][it->first] << '\t' << uCumul[e][it->first] << '\t' << rscoreFreq[e][it->first] << '\t' << rCumul[e][it->first] << endl; } } @@ -230,28 +206,44 @@ void UnifracWeightedCommand::printWSummaryFile() { } /***********************************************************/ -void UnifracWeightedCommand::saveRandomScores() { +void UnifracWeightedCommand::removeValidScoresDuplicates() { try { for (int e = 0; e < numComp; e++) { - //update total map with new random scores - for (it = rscoreFreq[e].begin(); it != rscoreFreq[e].end(); it++) { - //does this score already exist in the total map - it2 = totalrscoreFreq[e].find(it->first); - //if yes then add them - if (it2 != totalrscoreFreq[e].end()) { - totalrscoreFreq[e][it->first] += rscoreFreq[e][it->first]; - }else{ //its a new score - totalrscoreFreq[e][it->first] = rscoreFreq[e][it->first]; - } + //sort valid scores + sort(validScores[e].begin(), validScores[e].end()); + + for (int i = 0; i< validScores[e].size()-1; i++) { + if (validScores[e][i] == validScores[e][i+1]) { validScores[e].erase(validScores[e].begin()+i); } + } + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the UnifracWeightedCommand class function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************/ +int UnifracWeightedCommand::findIndex(float score) { + try{ + for (int e = 0; e < numComp; e++) { + for (int i = 0; i < rScores[e].size(); i++) { +//cout << rScores[e][i] << " number " << i << endl; + if (rScores[e][i] >= score) { return i; } } } + return -1; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the UnifracWeightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } }