]> git.donarmstrong.com Git - mothur.git/blob - unifracunweightedcommand.cpp
parsimony command now uses groups, fixed bug with unweighted groups
[mothur.git] / unifracunweightedcommand.cpp
1 /*
2  *  unifracunweightedcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 2/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "unifracunweightedcommand.h"
11
12 /***********************************************************/
13 UnifracUnweightedCommand::UnifracUnweightedCommand() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 
17                 T = globaldata->gTree;
18                 tmap = globaldata->gTreemap;
19                 unweightedFile = globaldata->getTreeFile() + ".unweighted";
20                 openOutputFile(unweightedFile, out);
21                 sumFile = globaldata->getTreeFile() + ".uwsummary";
22                 openOutputFile(sumFile, outSum);
23                 distFile = globaldata->getTreeFile() + ".uwdistrib";
24                 openOutputFile(distFile, outDist);
25
26                 //if the user has not entered specific groups to analyze then do them all
27                 if (globaldata->Groups.size() != 0) {
28                         //check that groups are valid
29                         for (int i = 0; i < globaldata->Groups.size(); i++) {
30                                 if (tmap->isValidGroup(globaldata->Groups[i]) != true) {
31                                         cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
32                                         // erase the invalid group from globaldata->Groups
33                                         globaldata->Groups.erase (globaldata->Groups.begin()+i);
34                                 }
35                         }
36                         
37                         //if the user only entered invalid groups
38                         if (globaldata->Groups.size() == 0) { 
39                                 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
40                                 for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
41                                         globaldata->Groups.push_back(tmap->namesOfGroups[i]);
42                                 }
43                         }
44                 }else {
45                         for (int i = 0; i < tmap->namesOfGroups.size(); i++) {
46                                 globaldata->Groups.push_back(tmap->namesOfGroups[i]);
47                         }
48                 }
49                 
50                 convert(globaldata->getIters(), iters);  //how many random trees to generate
51                 unweighted = new Unweighted(tmap);
52
53         }
54         catch(exception& e) {
55                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
56                 exit(1);
57         }
58         catch(...) {
59                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function UnifracUnweightedCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
60                 exit(1);
61         }
62 }
63 /***********************************************************/
64 int UnifracUnweightedCommand::execute() {
65         try {
66                 
67                 //get unweighted for users tree
68                 userData.resize(1,0);  //data[0] = unweightedscore 
69                 randomData.resize(1,0); //data[0] = unweightedscore
70                 
71                 //format output
72                 outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
73                 outDist << "Tree#" << '\t' << "Iter" << '\t' << "UWScore" << endl;
74                 
75                 //create new tree with same num nodes and leaves as users
76                 randT = new Tree();
77                         
78                 //get pscores for users trees
79                 for (int i = 0; i < T.size(); i++) {
80                         cout << "Processing tree " << i+1 << endl;
81                         userData = unweighted->getValues(T[i]);  //userData[0] = unweightedscore
82                         
83                         //update uscoreFreq
84                         it = uscoreFreq.find(userData[0]);
85                         if (it == uscoreFreq.end()) {//new score
86                                 uscoreFreq[userData[0]] = 1;
87                         }else{ uscoreFreq[userData[0]]++; }
88                         
89                         //add users score to valid scores
90                         validScores[userData[0]] = userData[0];
91                         
92                         //saves users score
93                         utreeScores.push_back(userData[0]);
94                         
95                         //copy T[i]'s info.
96                         randT->getCopy(T[i]); 
97                         
98                         //get unweighted scores for random trees
99                         for (int j = 0; j < iters; j++) {
100                                 //create a random tree with same topology as T[i], but different labels
101                                 randT->assembleRandomUnifracTree();
102                                 //get pscore of random tree
103                                 randomData = unweighted->getValues(randT);
104                         
105                                 //add trees unweighted score to map of scores
106                                 it2 = rscoreFreq.find(randomData[0]);
107                                 if (it2 != rscoreFreq.end()) {//already have that score
108                                         rscoreFreq[randomData[0]]++;
109                                 }else{//first time we have seen this score
110                                         rscoreFreq[randomData[0]] = 1;
111                                 }
112                                 
113                                 //add randoms score to validscores
114                                 validScores[randomData[0]] = randomData[0];
115                                 
116                                 //output info to uwdistrib file
117                                 outDist << i+1 << '\t' << '\t'<< j+1 << '\t' << '\t' << randomData[0] << endl;
118                         }
119                         
120                         saveRandomScores(); //save all random scores for unweighted file
121                         
122                         //find the signifigance of the score
123                         float rcumul = 1.0000;
124                         for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { 
125                                 rCumul[it->first] = rcumul;
126                                 //get percentage of random trees with that info
127                                 rscoreFreq[it->first] /= iters; 
128                                 rcumul-= it->second;  
129                                 
130                         }
131                         
132                         //save the signifigance of the users score for printing later
133                         UWScoreSig.push_back(rCumul[userData[0]]);
134                         
135                         
136                         //clear random data
137                         rscoreFreq.clear();  //you clear this because in the summary file you want the unweighted signifinance to be relative to these 1000 trees.
138                         rCumul.clear();
139                 }
140                 
141                 float ucumul = 1.0000;
142                 float rcumul = 1.0000;
143                 //this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
144                 for (it = validScores.begin(); it != validScores.end(); it++) { 
145                         it2 = uscoreFreq.find(it->first);
146                         //make uCumul map
147                         uCumul[it->first] = ucumul;
148                         //user data has that score 
149                         if (it2 != uscoreFreq.end()) { uscoreFreq[it->first] /= T.size(); ucumul-= it2->second;  }
150                         else { uscoreFreq[it->first] = 0.0000; } //no user trees with that score
151                                                 
152                         //make rscoreFreq map and rCumul
153                         it2 = totalrscoreFreq.find(it->first);
154                         rCumul[it->first] = rcumul;
155                         //get percentage of random trees with that info
156                         if (it2 != totalrscoreFreq.end()) {  totalrscoreFreq[it->first] /= (iters*T.size()); rcumul-= it2->second;  }
157                         else { totalrscoreFreq[it->first] = 0.0000; } //no random trees with that score
158                         
159                 }
160                 
161                 printUnweightedFile();
162                 printUWSummaryFile();
163                 
164                 //reset groups parameter
165                 globaldata->Groups.clear();
166                 
167                 delete randT;
168                 
169                 return 0;
170                 
171         }
172         catch(exception& e) {
173                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176         catch(...) {
177                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178                 exit(1);
179         }
180 }
181 /***********************************************************/
182 void UnifracUnweightedCommand::printUnweightedFile() {
183         try {
184                 //column headers
185                 
186                 out << "Score" << '\t' << "UserFreq" << '\t' << "UserCumul" << '\t' << "RandFreq" << '\t' << "RandCumul" << endl;
187                                 
188                 //format output
189                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
190                 
191                 //print each line
192                 for (it = validScores.begin(); it != validScores.end(); it++) { 
193                         out << setprecision(6) << it->first << '\t' << '\t' << uscoreFreq[it->first] << '\t' << uCumul[it->first] << '\t' << totalrscoreFreq[it->first] << '\t' << rCumul[it->first] << endl; 
194                 } 
195                 
196                 out.close();
197                 
198         }
199         catch(exception& e) {
200                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
201                 exit(1);
202         }
203         catch(...) {
204                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUnweightedFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205                 exit(1);
206         }
207 }
208
209 /***********************************************************/
210 void UnifracUnweightedCommand::printUWSummaryFile() {
211         try {
212                 //column headers
213                 outSum << "Tree#" << '\t'  <<  "UWScore" << '\t' << '\t' << "UWSig" <<  endl;
214                 
215                 //format output
216                 outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
217                 
218                 //print each line
219                 for (int i = 0; i< T.size(); i++) {
220                         outSum << setprecision(6) << i+1 << '\t' << '\t' << utreeScores[i] << '\t' << UWScoreSig[i] << endl; 
221                 }
222                 
223                 outSum.close();
224         }
225         catch(exception& e) {
226                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
227                 exit(1);
228         }
229         catch(...) {
230                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function printUWSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231                 exit(1);
232         }
233 }
234 /***********************************************************/
235 void UnifracUnweightedCommand::saveRandomScores() {
236         try {
237                 for (it = rscoreFreq.begin(); it != rscoreFreq.end(); it++) { 
238                         //does this score already exist in the total map
239                         it2 = totalrscoreFreq.find(it->first);
240                         //if yes then add them
241                         if (it2 != totalrscoreFreq.end()) { 
242                                 totalrscoreFreq[it->first] += rscoreFreq[it->first];
243                         }else{ //its a new score
244                                 totalrscoreFreq[it->first] = rscoreFreq[it->first];
245                         }
246                 }
247         }
248         catch(exception& e) {
249                 cout << "Standard Error: " << e.what() << " has occurred in the UnifracUnweightedCommand class Function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
250                 exit(1);
251         }
252         catch(...) {
253                 cout << "An unknown error has occurred in the UnifracUnweightedCommand class function saveRandomScores. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
254                 exit(1);
255         }
256 }
257
258 /***********************************************************/