2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
15 globaldata = GlobalData::getInstance();
17 T = globaldata->gTree;
18 tmap = globaldata->gTreemap;
19 unweightedFile = globaldata->getTreeFile() + ".unweighted";
20 openOutputFile(unweightedFile, out);
21 sumFile = globaldata->getTreeFile() + ".uwsummary";
22 openOutputFile(sumFile, outSum);
23 distFile = globaldata->getTreeFile() + ".uwdistrib";
24 openOutputFile(distFile, outDist);
25 setGroups(); //sets users groups to analyze
26 convert(globaldata->getIters(), iters); //how many random trees to generate
27 unweighted = new Unweighted(tmap);
31 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
35 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
39 /***********************************************************/
40 int UnifracUnweightedCommand::execute() {
43 //get unweighted for users tree
44 userData.resize(1,0); //data[0] = unweightedscore
45 randomData.resize(1,0); //data[0] = unweightedscore
48 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
50 outDist << "Groups Used ";
51 for (int m = 0; m < globaldata->Groups.size(); m++) {
52 outDist << globaldata->Groups[m] << " ";
56 outDist << "Tree#" << '\t' << "Iter" << '\t' << "UWScore" << endl;
58 //create new tree with same num nodes and leaves as users
61 //get pscores for users trees
62 for (int i = 0; i < T.size(); i++) {
63 cout << "Processing tree " << i+1 << endl;
64 userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore
67 it = uscoreFreq.find(userData[0]);
68 if (it == uscoreFreq.end()) {//new score
69 uscoreFreq[userData[0]] = 1;
70 }else{ uscoreFreq[userData[0]]++; }
72 //add users score to valid scores
73 validScores[userData[0]] = userData[0];
76 utreeScores.push_back(userData[0]);
81 //get unweighted scores for random trees
82 for (int j = 0; j < iters; j++) {
83 //create a random tree with same topology as T[i], but different labels
84 randT->assembleRandomUnifracTree();
85 //get pscore of random tree
86 randomData = unweighted->getValues(randT);
88 //add trees unweighted score to map of scores
89 it2 = rscoreFreq.find(randomData[0]);
90 if (it2 != rscoreFreq.end()) {//already have that score
91 rscoreFreq[randomData[0]]++;
92 }else{//first time we have seen this score
93 rscoreFreq[randomData[0]] = 1;
96 //add randoms score to validscores
97 validScores[randomData[0]] = randomData[0];
99 //output info to uwdistrib file
100 outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl;
103 saveRandomScores(); //save all random scores for unweighted file
105 //find the signifigance of the score
106 float rcumul = 1.0000;
107 for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) {
108 rCumul[it->first] = rcumul;
109 //get percentage of random trees with that info
110 rscoreFreq[it->first] /= iters;
114 //save the signifigance of the users score for printing later
115 UWScoreSig.push_back(rCumul[userData[0]]);
119 rscoreFreq.clear(); //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees.
123 float ucumul = 1.0000;
124 float rcumul = 1.0000;
125 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
126 for (it = validScores.begin(); it != validScores.end(); it++) {
127 it2 = uscoreFreq.find(it->first);
129 uCumul[it->first] = ucumul;
130 //user data has that score
131 if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul-= it2->second; }
132 else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score
134 //make rscoreFreq map and rCumul
135 it2 = totalrscoreFreq.find(it->first);
136 rCumul[it->first] = rcumul;
137 //get percentage of random trees with that info
138 if (it2 != totalrscoreFreq.end()) { totalrscoreFreq[it->first] /= (iters*T.size()); rcumul-= it2->second; }
139 else { totalrscoreFreq[it->first] = 0.0000; } //no random trees with that score
143 printUnweightedFile();
144 printUWSummaryFile();
146 //reset groups parameter
147 globaldata->Groups.clear();
154 catch(exception& e) {
155 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
159 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
163 /***********************************************************/
164 void UnifracUnweightedCommand::printUnweightedFile() {
168 out << "Groups Used ";
169 for (int m = 0; m < globaldata->Groups.size(); m++) {
170 out << globaldata->Groups[m] << " ";
174 out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
177 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
180 for (it = validScores.begin(); it != validScores.end(); it++) {
181 out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << totalrscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;
187 catch(exception& e) {
188 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
192 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
197 /***********************************************************/
198 void UnifracUnweightedCommand::printUWSummaryFile() {
202 outSum << "Groups Used ";
203 for (int m = 0; m < globaldata->Groups.size(); m++) {
204 outSum << globaldata->Groups[m] << " ";
208 outSum << "Tree#" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl;
211 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
214 for (int i = 0; i< T.size(); i++) {
215 outSum << setprecision(6) << i+1 << '\t' << '\t' << utreeScores[i] << '\t' << UWScoreSig[i] << endl;
220 catch(exception& e) {
221 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
225 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
229 /***********************************************************/
230 void UnifracUnweightedCommand::saveRandomScores() {
232 for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) {
233 //does this score already exist in the total map
234 it2 = totalrscoreFreq.find(it->first);
235 //if yes then add them
236 if (it2 != totalrscoreFreq.end()) {
237 totalrscoreFreq[it->first] += rscoreFreq[it->first];
238 }else{ //its a new score
239 totalrscoreFreq[it->first] = rscoreFreq[it->first];
243 catch(exception& e) {
244 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
253 /***********************************************************/
255 void UnifracUnweightedCommand::setGroups() {
257 //if the user has not entered specific groups to analyze then do them all
258 if (globaldata->Groups.size() != 0) {
259 //check that groups are valid
260 for (int i = 0; i < globaldata->Groups.size(); i++) {
261 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
262 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
263 // erase the invalid group from globaldata->Groups
264 globaldata->Groups.erase (globaldata->Groups.begin()+i);
268 //if the user only entered invalid groups
269 if (globaldata->Groups.size() == 0) {
270 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl;
271 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
272 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
277 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
278 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
282 catch(exception& e) {
283 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
292 /*****************************************************************/