2 * unifracweightedcommand.cpp
5 * Created by Sarah Westcott on 2/9/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "unifracweightedcommand.h"
12 /***********************************************************/
13 UnifracWeightedCommand::UnifracWeightedCommand() {
15 globaldata = GlobalData::getInstance();
17 T = globaldata->gTree;
18 tmap = globaldata->gTreemap;
19 weightedFile = globaldata->getTreeFile() + ".weighted";
20 openOutputFile(weightedFile, out);
22 out << "Group" << '\t' << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
24 sumFile = globaldata->getTreeFile() + ".wsummary";
25 openOutputFile(sumFile, outSum);
27 setGroups(); //sets the groups the user wants to analyze
28 convert(globaldata->getIters(), iters); //how many random trees to generate
29 weighted = new Weighted(tmap);
33 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
37 cout << "An unknown error has occurred in the UnifracWeightedCommand class function UnifracWeightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
41 /***********************************************************/
42 int UnifracWeightedCommand::execute() {
45 //get weighted for users tree
46 userData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
47 randomData.resize(numComp,0); //data[0] = weightedscore AB, data[1] = weightedscore AC...
49 //create new tree with same num nodes and leaves as users
52 //get pscores for users trees
53 for (int i = 0; i < T.size(); i++) {
54 rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
55 uScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
56 validScores.resize(numComp);
58 cout << "Processing tree " << i+1 << endl;
59 userData = weighted->getValues(T[i]); //userData[0] = weightedscore
62 for (int s=0; s<numComp; s++) {
63 //add users score to vector of user scores
64 uScores[s].push_back(userData[s]);
66 //add users score to vector of valid scores
67 validScores[s].push_back(userData[s]);
69 //save users tree score for summary file
70 utreeScores.push_back(userData[s]);
73 //get scores for random trees
74 for (int j = 0; j < iters; j++) {
77 for (int r=0; r<numGroups; r++) {
78 for (int l = r+1; l < numGroups; l++) {
82 if (globaldata->Groups.size() != 0) {
83 //create a random tree with same topology as T[i], but different labels
84 randT->assembleRandomUnifracTree(globaldata->Groups[r], globaldata->Groups[l]);
85 //get wscore of random tree
86 randomData = weighted->getValues(randT, globaldata->Groups[r], globaldata->Groups[l]);
88 //create a random tree with same topology as T[i], but different labels
89 randT->assembleRandomUnifracTree(tmap->namesOfGroups[r], tmap->namesOfGroups[l]);
90 //get wscore of random tree
91 randomData = weighted->getValues(randT, tmap->namesOfGroups[r], tmap->namesOfGroups[l]);
93 // randT->createNewickFile("hold"+toString(r)+toString(l)+toString(j));
96 rScores[count].push_back(randomData[0]);
97 validScores[count][randomData[0]] = randomData[0];
104 removeValidScoresDuplicates();
105 //find the signifigance of the score for summary file
106 for (int f = 0; f < numComp; f++) {
108 sort(rScores[f].begin(), rScores[f].end());
110 //the index of the score higher than yours is returned
111 //so if you have 1000 random trees the index returned is 100
112 //then there are 900 trees with a score greater then you.
113 //giving you a signifigance of 0.900
114 int index = findIndex(userData[f], f); if (index == -1) { cout << "error in UnifracWeightedCommand" << endl; exit(1); } //error code
116 //the signifigance is the number of trees with the users score or higher
117 WScoreSig.push_back((iters-index)/(float)iters);
120 out << "Tree# " << i << endl;
121 //printWeightedFile();
131 //clear out users groups
132 globaldata->Groups.clear();
139 catch(exception& e) {
140 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
144 cout << "An unknown error has occurred in the UnifracWeightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
148 /***********************************************************
149 void UnifracWeightedCommand::printWeightedFile() {
153 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
156 for (int e = 0; e < numComp; e++) {
157 //print each line in that group
158 for (i = 0; i < validScores[e].size(); i++) {
159 out << setprecision(6) << groupComb[e] << '\t' << validScores[e][i] << '\t' << '\t' << uscoreFreq[e][it->first] << '\t' << uCumul[e][it->first] << '\t' << rscoreFreq[e][it->first] << '\t' << rCumul[e][it->first] << endl;
166 catch(exception& e) {
167 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
171 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
177 /***********************************************************/
178 void UnifracWeightedCommand::printWSummaryFile() {
181 outSum << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" << endl;
182 cout << "Tree#" << '\t' << "Groups" << '\t' << '\t' << "WScore" << '\t' << '\t' << "WSig" << endl;
185 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
189 for (int i = 0; i < T.size(); i++) {
190 for (int j = 0; j < numComp; j++) {
191 outSum << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl;
192 cout << setprecision(6) << i+1 << '\t' << '\t' << groupComb[j] << '\t' << utreeScores[count] << '\t' << WScoreSig[count] << endl;
198 catch(exception& e) {
199 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
203 cout << "An unknown error has occurred in the UnifracWeightedCommand class function printWeightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
208 /***********************************************************/
209 void UnifracWeightedCommand::removeValidScoresDuplicates() {
211 for (int e = 0; e < numComp; e++) {
213 sort(validScores[e].begin(), validScores[e].end());
215 for (int i = 0; i< validScores[e].size()-1; i++) {
216 if (validScores[e][i] == validScores[e][i+1]) { validScores[e].erase(validScores[e].begin()+i); }
220 catch(exception& e) {
221 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
225 cout << "An unknown error has occurred in the UnifracWeightedCommand class function removeValidScoresDuplicates. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
230 /***********************************************************/
231 int UnifracWeightedCommand::findIndex(float score, int index) {
233 for (int i = 0; i < rScores[index].size(); i++) {
234 if (rScores[index][i] >= score) { return i; }
236 return rScores[index].size();
238 catch(exception& e) {
239 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
243 cout << "An unknown error has occurred in the UnifracWeightedCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
248 /***********************************************************/
249 void UnifracWeightedCommand::setGroups() {
251 //if the user has not entered specific groups to analyze then do them all
252 if (globaldata->Groups.size() == 0) {
253 numGroups = tmap->getNumGroups();
255 if (globaldata->getGroups() != "all") {
256 //check that groups are valid
257 for (int i = 0; i < globaldata->Groups.size(); i++) {
258 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
259 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
260 // erase the invalid group from globaldata->Groups
261 globaldata->Groups.erase (globaldata->Groups.begin()+i);
265 //if the user only entered invalid groups
266 if (globaldata->Groups.size() == 0) {
267 numGroups = tmap->getNumGroups();
268 cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
269 }else if (globaldata->Groups.size() == 1) {
270 cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
271 numGroups = tmap->getNumGroups();
272 globaldata->Groups.clear();
273 }else { numGroups = globaldata->Groups.size(); }
274 }else { //users wants all groups
275 numGroups = tmap->getNumGroups();
276 globaldata->Groups.clear();
277 globaldata->setGroups("");
281 //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
284 for (int i=1; i<numGroups; i++) {
286 for (int l = n; l < numGroups; l++) {
287 //set group comparison labels
288 if (globaldata->Groups.size() != 0) {
289 groupComb.push_back(globaldata->Groups[i-1]+globaldata->Groups[l]);
291 groupComb.push_back(tmap->namesOfGroups[i-1]+tmap->namesOfGroups[l]);
297 catch(exception& e) {
298 cout << "Standard Error: " << e.what() << " has occurred in the UnifracWeightedCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
302 cout << "An unknown error has occurred in the UnifracWeightedCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";