2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 /***********************************************************/
13 UnifracWeightedCommand::UnifracWeightedCommand() {
15 globaldata = GlobalData::getInstance();
17 T = globaldata->gTree;
18 tmap = globaldata->gTreemap;
19 sumFile = globaldata->getTreeFile() + ".wsummary";
20 openOutputFile(sumFile, outSum);
22 setGroups(); //sets the groups the user wants to analyze
23 convert(globaldata->getIters(), iters); //how many random trees to generate
24 weighted = new Weighted(tmap);
28 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
32 cout << "An unknown error has occurred in the UnifracWeightedCommand class function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
36 /***********************************************************/
37 int UnifracWeightedCommand::execute() {
40 //get weighted for users tree
41 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
42 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
44 //create new tree with same num nodes and leaves as users
47 //get weighted scores for users trees
48 for (int i = 0; i < T.size(); i++) {
50 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
51 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
52 weightedFile = globaldata->getTreeFile() + toString(i+1) + ".weighted";
53 weightedFileout = globaldata->getTreeFile() + "temp." + toString(i+1) + ".weighted";
55 cout << "Processing tree " << i+1 << endl;
56 userData = weighted->getValues(T[i]); //userData[0] = weightedscore
59 for (int s=0; s<numComp; s++) {
60 //add users score to vector of user scores
61 uScores[s].push_back(userData[s]);
63 //save users tree score for summary file
64 utreeScores.push_back(userData[s]);
67 //get scores for random trees
68 for (int j = 0; j < iters; j++) {
70 for (int r=0; r<numGroups; r++) {
71 for (int l = r+1; l < numGroups; l++) {
75 //create a random tree with same topology as T[i], but different labels
76 randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
77 //get wscore of random tree
78 randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
81 rScores[count].push_back(randomData[0]);
87 //removeValidScoresDuplicates();
88 //find the signifigance of the score for summary file
89 for (int f = 0; f < numComp; f++) {
91 sort(rScores[f].begin(), rScores[f].end());
93 //the index of the score higher than yours is returned
94 //so if you have 1000 random trees the index returned is 100
95 //then there are 900 trees with a score greater then you.
96 //giving you a signifigance of 0.900
97 int index = findIndex(userData[f], f); if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
99 //the signifigance is the number of trees with the users score or higher
100 WScoreSig.push_back((iters-index)/(float)iters);
103 //out << "Tree# " << i << endl;
104 calculateFreqsCumuls();
115 //clear out users groups
116 globaldata->Groups.clear();
123 catch(exception& e) {
124 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
128 cout << "An unknown error has occurred in the UnifracWeightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
132 /***********************************************************/
133 void UnifracWeightedCommand::printWeightedFile() {
137 for(int a = 0; a < numComp; a++) {
138 initFile(groupComb[a]);
140 for (it = validScores.begin(); it != validScores.end(); it++) {
141 data.push_back(it->first); data.push_back(rScoreFreq[a][it->first]); data.push_back(rCumul[a][it->first]);
150 remove(weightedFileout.c_str());
153 catch(exception& e) {
154 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
158 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164 /***********************************************************/
165 void UnifracWeightedCommand::printWSummaryFile() {
168 outSum << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" << endl;
169 cout << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" << endl;
172 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
176 for (int i = 0; i < T.size(); i++) {
177 for (int j = 0; j < numComp; j++) {
178 outSum << setprecision(globaldata->getIters().length()) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl;
179 cout << setprecision(globaldata->getIters().length()) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl;
185 catch(exception& e) {
186 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
190 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
195 /***********************************************************/
196 int UnifracWeightedCommand::findIndex(float score, int index) {
198 for (int i = 0; i < rScores[index].size(); i++) {
199 if (rScores[index][i] >= score) { return i; }
201 return rScores[index].size();
203 catch(exception& e) {
204 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
208 cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
213 /***********************************************************/
214 void UnifracWeightedCommand::setGroups() {
216 //if the user has not entered specific groups to analyze then do them all
217 if (globaldata->Groups.size() == 0) {
218 numGroups = tmap->getNumGroups();
219 for (int i=0; i < numGroups; i++) {
220 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
223 if (globaldata->getGroups() != "all") {
224 //check that groups are valid
225 for (int i = 0; i < globaldata->Groups.size(); i++) {
226 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
227 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
228 // erase the invalid group from globaldata->Groups
229 globaldata->Groups.erase (globaldata->Groups.begin()+i);
233 //if the user only entered invalid groups
234 if (globaldata->Groups.size() == 0) {
235 numGroups = tmap->getNumGroups();
236 for (int i=0; i < numGroups; i++) {
237 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
239 cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
240 }else if (globaldata->Groups.size() == 1) {
241 cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
242 numGroups = tmap->getNumGroups();
243 globaldata->Groups.clear();
244 for (int i=0; i < numGroups; i++) {
245 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
247 }else { numGroups = globaldata->Groups.size(); }
248 }else { //users wants all groups
249 numGroups = tmap->getNumGroups();
250 globaldata->Groups.clear();
251 globaldata->setGroups("");
252 for (int i=0; i < numGroups; i++) {
253 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
258 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
260 for (int i=0; i<numGroups; i++) {
262 for (int l = i+1; l < numGroups; l++) {
263 //set group comparison labels
264 groupComb.push_back(globaldata->Groups[i]+globaldata->Groups[l]);
268 catch(exception& e) {
269 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
273 cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
278 /***********************************************************/
280 void UnifracWeightedCommand::calculateFreqsCumuls() {
282 //clear out old tree values
284 rScoreFreq.resize(numComp);
286 rCumul.resize(numComp);
289 //calculate frequency
290 for (int f = 0; f < numComp; f++) {
291 for (int i = 0; i < rScores[f].size(); i++) { //looks like 0,0,1,1,1,2,4,7... you want to make a map that say rScoreFreq[0] = 2, rScoreFreq[1] = 3...
292 validScores[rScores[f][i]] = rScores[f][i];
293 it = rScoreFreq[f].find(rScores[f][i]);
294 if (it != rScoreFreq[f].end()) {
295 rScoreFreq[f][rScores[f][i]]++;
297 rScoreFreq[f][rScores[f][i]] = 1;
303 for(int a = 0; a < numComp; a++) {
304 float rcumul = 1.0000;
305 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
306 for (it = validScores.begin(); it != validScores.end(); it++) {
307 //make rscoreFreq map and rCumul
308 it2 = rScoreFreq[a].find(it->first);
309 rCumul[a][it->first] = rcumul;
310 //get percentage of random trees with that info
311 if (it2 != rScoreFreq[a].end()) { rScoreFreq[a][it->first] /= iters; rcumul-= it2->second; }
312 else { rScoreFreq[a][it->first] = 0.0000; } //no random trees with that score
317 catch(exception& e) {
318 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
322 cout << "An unknown error has occurred in the UnifracWeightedCommand class function calculateFreqsCums. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
328 /*****************************************************************/
330 void UnifracWeightedCommand::initFile(string label){
333 openOutputFile(weightedFileout, out);
334 openInputFile(weightedFile, inFile);
337 getline(inFile, inputBuffer);
339 out << inputBuffer << '\t' << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
341 openOutputFile(weightedFileout, out);
342 out << label + "Score" << '\t' << label + "RandFreq" << '\t' << label + "RandCumul" << endl;
345 out.setf(ios::fixed, ios::floatfield);
346 out.setf(ios::showpoint);
348 catch(exception& e) {
349 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
353 cout << "An unknown error has occurred in the UnifracWeightedCommand class function initFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
358 /***********************************************************************/
360 void UnifracWeightedCommand::output(vector<double> data){
364 getline(inFile, inputBuffer);
366 out << inputBuffer << setprecision(globaldata->getIters().length()) << '\t' << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
369 out << setprecision(globaldata->getIters().length()) << data[0] << '\t' << data[1] << '\t' << data[2] << endl;
373 catch(exception& e) {
374 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
378 cout << "An unknown error has occurred in the UnifracWeightedCommand class function output. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
383 /***********************************************************************/
385 void UnifracWeightedCommand::resetFile(){
396 remove(weightedFile.c_str());
397 rename(weightedFileout.c_str(), weightedFile.c_str());
399 catch(exception& e) {
400 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
404 cout << "An unknown error has occurred in the UnifracWeightedCommand class function resetFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";