2 * unifracunweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracunweightedcommand.h"
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
15 globaldata = GlobalData::getInstance();
17 T = globaldata->gTree;
18 tmap = globaldata->gTreemap;
19 unweightedFile = globaldata->getTreeFile() + ".unweighted";
20 openOutputFile(unweightedFile, out);
21 sumFile = globaldata->getTreeFile() + ".uwsummary";
22 openOutputFile(sumFile, outSum);
23 distFile = globaldata->getTreeFile() + ".uwdistrib";
24 openOutputFile(distFile, outDist);
25 setGroups(); //sets users groups to analyze
26 convert(globaldata->getIters(), iters); //how many random trees to generate
27 unweighted = new Unweighted(tmap);
31 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
35 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
39 /***********************************************************/
40 int UnifracUnweightedCommand::execute() {
43 //get unweighted for users tree
44 userData.resize(1,0); //data[0] = unweightedscore
45 randomData.resize(1,0); //data[0] = unweightedscore
48 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
50 outDist << "Groups Used ";
51 for (int m = 0; m < globaldata->Groups.size(); m++) {
52 outDist << globaldata->Groups[m] << " ";
56 outDist << "Tree#" << '\t' << "Iter" << '\t' << "UWScore" << endl;
58 //create new tree with same num nodes and leaves as users
61 //get pscores for users trees
62 for (int i = 0; i < T.size(); i++) {
63 cout << "Processing tree " << i+1 << endl;
64 userData = unweighted->getValues(T[i]); //userData[0] = unweightedscore
67 it = uscoreFreq.find(userData[0]);
68 if (it == uscoreFreq.end()) {//new score
69 uscoreFreq[userData[0]] = 1;
70 }else{ uscoreFreq[userData[0]]++; }
72 //add users score to valid scores
73 validScores[userData[0]] = userData[0];
76 utreeScores.push_back(userData[0]);
81 //get unweighted scores for random trees
82 for (int j = 0; j < iters; j++) {
83 //create a random tree with same topology as T[i], but different labels
84 randT->assembleRandomUnifracTree();
85 //get pscore of random tree
86 randomData = unweighted->getValues(randT);
88 //add trees unweighted score to map of scores
89 it2 = rscoreFreq.find(randomData[0]);
90 if (it2 != rscoreFreq.end()) {//already have that score
91 rscoreFreq[randomData[0]]++;
92 }else{//first time we have seen this score
93 rscoreFreq[randomData[0]] = 1;
96 //add randoms score to validscores
97 validScores[randomData[0]] = randomData[0];
99 //output info to uwdistrib file
100 outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl;
103 saveRandomScores(); //save all random scores for unweighted file
105 //find the signifigance of the score
106 float rcumul = 1.0000;
107 for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) {
108 rCumul[it->first] = rcumul;
109 //get percentage of random trees with that info
110 rscoreFreq[it->first] /= iters;
115 //save the signifigance of the users score for printing later
116 UWScoreSig.push_back(rCumul[userData[0]]);
120 rscoreFreq.clear(); //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees.
124 float ucumul = 1.0000;
125 float rcumul = 1.0000;
126 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
127 for (it = validScores.begin(); it != validScores.end(); it++) {
128 it2 = uscoreFreq.find(it->first);
130 uCumul[it->first] = ucumul;
131 //user data has that score
132 if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul-= it2->second; }
133 else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score
135 //make rscoreFreq map and rCumul
136 it2 = totalrscoreFreq.find(it->first);
137 rCumul[it->first] = rcumul;
138 //get percentage of random trees with that info
139 if (it2 != totalrscoreFreq.end()) { totalrscoreFreq[it->first] /= (iters*T.size()); rcumul-= it2->second; }
140 else { totalrscoreFreq[it->first] = 0.0000; } //no random trees with that score
144 printUnweightedFile();
145 printUWSummaryFile();
147 //reset groups parameter
148 globaldata->Groups.clear();
155 catch(exception& e) {
156 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
160 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164 /***********************************************************/
165 void UnifracUnweightedCommand::printUnweightedFile() {
169 out << "Groups Used ";
170 for (int m = 0; m < globaldata->Groups.size(); m++) {
171 out << globaldata->Groups[m] << " ";
175 out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
178 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
181 for (it = validScores.begin(); it != validScores.end(); it++) {
182 out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << totalrscoreFreq[it->first] << '\t' << rCumul[it->first] << endl;
188 catch(exception& e) {
189 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
193 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
198 /***********************************************************/
199 void UnifracUnweightedCommand::printUWSummaryFile() {
203 outSum << "Groups Used ";
204 for (int m = 0; m < globaldata->Groups.size(); m++) {
205 outSum << globaldata->Groups[m] << " ";
209 outSum << "Tree#" << '\t' << "UWScore" << '\t' << '\t' << "UWSig" << endl;
212 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
215 for (int i = 0; i< T.size(); i++) {
216 outSum << setprecision(6) << i+1 << '\t' << '\t' << utreeScores[i] << '\t' << UWScoreSig[i] << endl;
221 catch(exception& e) {
222 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
226 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
230 /***********************************************************/
231 void UnifracUnweightedCommand::saveRandomScores() {
233 for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) {
234 //does this score already exist in the total map
235 it2 = totalrscoreFreq.find(it->first);
236 //if yes then add them
237 if (it2 != totalrscoreFreq.end()) {
238 totalrscoreFreq[it->first] += rscoreFreq[it->first];
239 }else{ //its a new score
240 totalrscoreFreq[it->first] = rscoreFreq[it->first];
244 catch(exception& e) {
245 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
254 /***********************************************************/
256 void UnifracUnweightedCommand::setGroups() {
258 //if the user has not entered specific groups to analyze then do them all
259 if (globaldata->Groups.size() != 0) {
260 //check that groups are valid
261 for (int i = 0; i < globaldata->Groups.size(); i++) {
262 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
263 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
264 // erase the invalid group from globaldata->Groups
265 globaldata->Groups.erase (globaldata->Groups.begin()+i);
269 //if the user only entered invalid groups
270 if (globaldata->Groups.size() == 0) {
271 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl;
272 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
273 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
278 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
279 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
283 catch(exception& e) {
284 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
288 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293 /*****************************************************************/