]> git.donarmstrong.com Git - mothur.git/blob - mergetaxsummarycommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / mergetaxsummarycommand.cpp
1 //
2 //  mergetaxsummarycommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 2/13/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "mergetaxsummarycommand.h"
10
11
12 //**********************************************************************************************************************
13 vector<string> MergeTaxSummaryCommand::setParameters(){ 
14         try {
15                 CommandParameter pinput("input", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pinput);
16                 CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
17                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
18                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
19                 
20                 vector<string> myArray;
21                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "MergeTaxSummaryCommand", "setParameters");
26                 exit(1);
27         }
28 }
29 //**********************************************************************************************************************
30 string MergeTaxSummaryCommand::getHelpString(){ 
31         try {
32                 string helpString = "";
33                 helpString += "The merge.taxsummary command takes a list of tax.summary files separated by dashes and merges them into one file."; 
34                 helpString += "The merge.taxsummary command parameters are input and output."; 
35                 helpString += "Example merge.taxsummary(input=small.tax.summary-large.tax.summary, output=all.tax.summary).";
36                 helpString += "Note: No spaces between parameter labels (i.e. output), '=' and parameters (i.e.yourOutputFileName).\n";
37                 return helpString;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "MergeTaxSummaryCommand", "getHelpString");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 MergeTaxSummaryCommand::MergeTaxSummaryCommand(){       
46         try {
47                 abort = true; calledHelp = true; 
48                 setParameters();
49                 vector<string> tempOutNames;
50                 outputTypes["taxsummary"] = tempOutNames;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58
59 MergeTaxSummaryCommand::MergeTaxSummaryCommand(string option)  {
60         try {
61                 abort = false; calledHelp = false;   
62                 
63                 if(option == "help") { help();  abort = true; calledHelp = true;    }
64                 else if(option == "citation") { citation(); abort = true; calledHelp = true;   }
65                 else {
66                         vector<string> myArray = setParameters();
67                         
68                         OptionParser parser(option);
69                         map<string,string> parameters = parser.getParameters();
70                         
71                         ValidParameters validParameter;
72                         
73                         //check to make sure all parameters are valid for command
74                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
75                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
76                         }
77                         
78                         //initialize outputTypes
79                         vector<string> tempOutNames;
80                         outputTypes["taxsummary"] = tempOutNames;
81                         
82                         //if the user changes the input directory command factory will send this info to us in the output parameter 
83                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
84                         if (inputDir == "not found"){   inputDir = "";          }
85                         
86                         string fileList = validParameter.validFile(parameters, "input", false);                 
87                         if(fileList == "not found") { m->mothurOut("you must enter two or more file names"); m->mothurOutEndLine();  abort=true;  }
88                         else{   m->splitAtDash(fileList, fileNames);    }
89                         
90                         //if the user changes the output directory command factory will send this info to us in the output parameter 
91                         string outputDir = validParameter.validFile(parameters, "outputdir", false);            if (outputDir == "not found")   {       outputDir = "";         }
92                         
93                         
94                         numInputFiles = fileNames.size();
95                         ifstream testFile;
96                         if(numInputFiles == 0){
97                                 m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) +  " file names"); m->mothurOutEndLine();
98                                 abort=true;  
99                         }
100                         else{
101                                 for(int i=0;i<numInputFiles;i++){
102                                         if (inputDir != "") {
103                         string path = m->hasPath(fileNames[i]);
104                         //if the user has not given a path then, add inputdir. else leave path alone.
105                         if (path == "") {       fileNames[i] = inputDir + fileNames[i];         }
106                     }
107                     
108                     int ableToOpen;
109                     ifstream in;
110                     ableToOpen = m->openInputFile(fileNames[i], in, "noerror");
111                     in.close(); 
112                     
113                     //if you can't open it, try default location
114                     if (ableToOpen == 1) {
115                         if (m->getDefaultPath() != "") { //default path is set
116                             string tryPath = m->getDefaultPath() + m->getSimpleName(fileNames[i]);
117                             m->mothurOut("Unable to open " + fileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
118                             ifstream in2;
119                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
120                             in2.close();
121                             fileNames[i] = tryPath;
122                         }
123                     }
124                     
125                     //if you can't open it, try output location
126                     if (ableToOpen == 1) {
127                         if (m->getOutputDir() != "") { //default path is set
128                             string tryPath = m->getOutputDir() + m->getSimpleName(fileNames[i]);
129                             m->mothurOut("Unable to open " + fileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
130                             ifstream in2;
131                             ableToOpen = m->openInputFile(tryPath, in2, "noerror");
132                             in2.close();
133                             fileNames[i] = tryPath;
134                         }
135                     }
136                     
137                     
138                     
139                     if (ableToOpen == 1) { 
140                         m->mothurOut("Unable to open " + fileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
141                         //erase from file list
142                         fileNames.erase(fileNames.begin()+i);
143                         i--;
144                     }
145                                 }
146                         }   
147                         
148                         outputFileName = validParameter.validFile(parameters, "output", false);                 
149                         if (outputFileName == "not found") { m->mothurOut("you must enter an output file name"); m->mothurOutEndLine();  abort=true;  }
150                         else if (outputDir != "") { outputFileName = outputDir + m->getSimpleName(outputFileName);   }
151             
152                 }
153         
154         }
155         catch(exception& e) {
156                 m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
157                 exit(1);
158         }
159 }
160 //**********************************************************************************************************************
161
162 int MergeTaxSummaryCommand::execute(){
163         try {
164                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
165                 
166         outputFileName = m->getFullPathName(outputFileName);
167                 m->mothurRemove(outputFileName);
168         
169         vector<rawTaxNode> tree;
170         tree.push_back(rawTaxNode("Root"));
171                 tree[0].rank = "0";
172         bool hasGroups = true;
173         set<string> groups;
174        
175         for (int i = 0; i < fileNames.size(); i++) {
176             
177             ifstream in;
178             m->openInputFile(fileNames[i], in);
179             string temp = m->getline(in);
180             vector<string> headers = m->splitWhiteSpace(temp);
181             
182             vector<string> thisFilesGroups;
183             if (headers.size() == 5) { hasGroups = false; }
184             else {  for (int j = 5; j < headers.size(); j++) { groups.insert(headers[j]); thisFilesGroups.push_back(headers[j]); } }
185             
186             int level, daugterLevels, total;
187             string rankId, tax; 
188             map<int, int> levelToCurrentNode;
189             levelToCurrentNode[0] = 0;
190             while (!in.eof()) {
191                 
192                 if (m->control_pressed) {   return 0;  }
193                 
194                 in >> level >> rankId >> tax >> daugterLevels >> total; m->gobble(in);
195                 map<string, int> groupCounts;
196                 if (thisFilesGroups.size() != 0) {  
197                     for (int j = 0; j < thisFilesGroups.size(); j++) {  
198                         int tempNum; in >> tempNum; m->gobble(in);
199                         groupCounts[thisFilesGroups[j]] = tempNum; 
200                     } 
201                 }
202                 
203                 if (level == 0) {}
204                 else { 
205                     map<int, int>::iterator itParent = levelToCurrentNode.find(level-1);
206                     int parent = 0;
207                     if (itParent == levelToCurrentNode.end()) { m->mothurOut("[ERROR]: situation I didnt expect.\n"); }
208                     else { parent = itParent->second; }
209                     
210                     levelToCurrentNode[level] = addTaxToTree(tree, level, parent, tax, total, groupCounts);
211                 } 
212             }
213             in.close();
214         }
215         
216         if (!hasGroups && (groups.size() != 0)) { groups.clear();  m->mothurOut("[WARNING]: not all files contain group breakdown, ignoring group counts.\n");  }
217         
218         ofstream out;
219         m->openOutputFile(outputFileName, out);
220         print(out, tree, groups);
221                         
222                 if (m->control_pressed) {  m->mothurRemove(outputFileName); return 0;  }
223                 
224                 m->mothurOutEndLine();
225                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
226                 m->mothurOut(outputFileName); m->mothurOutEndLine();    outputNames.push_back(outputFileName); outputTypes["taxsummary"].push_back(outputFileName);
227                 m->mothurOutEndLine();
228         
229                 return 0;
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "MergeTaxSummaryCommand", "execute");
233                 exit(1);
234         }
235 }
236 /**************************************************************************************************/
237
238 int MergeTaxSummaryCommand::addTaxToTree(vector<rawTaxNode>& tree, int level, int currentNode, string taxon, int total, map<string, int> groups){
239         try {
240                 map<string, int>::iterator childPointer;
241                 
242         childPointer = tree[currentNode].children.find(taxon);
243         int nodeToIncrement = 0;
244                         
245         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, increment counts
246             nodeToIncrement = childPointer->second;
247             tree[nodeToIncrement].total += total;
248             
249             for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
250                 map<string, int>::iterator it = tree[nodeToIncrement].groupCount.find(itGroups->first);
251                 if (it == tree[nodeToIncrement].groupCount.end()) { tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; }
252                 else {   it->second += itGroups->second;  }
253             }
254         }
255         else{                                                                                   //otherwise, create it
256             tree.push_back(rawTaxNode(taxon));
257             tree[currentNode].children[taxon] = tree.size()-1;
258             tree[tree.size()-1].parent = currentNode;
259             nodeToIncrement = tree.size()-1;
260             tree[nodeToIncrement].total = total;
261             tree[nodeToIncrement].level = level;
262             for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
263                  tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; 
264             }
265         }
266         
267                 return nodeToIncrement;
268         }
269         catch(exception& e) {
270                 m->errorOut(e, "MergeTaxSummaryCommand", "addSeqToTree");
271                 exit(1);
272         }
273 }
274 /**************************************************************************************************/
275
276 int MergeTaxSummaryCommand::assignRank(int index, vector<rawTaxNode>& tree){
277         try {
278                 map<string,int>::iterator it;
279                 int counter = 1;
280                 
281                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
282             if (m->control_pressed) { return 0; }
283                         tree[it->second].rank = tree[index].rank + '.' + toString(counter);
284                         counter++;
285             
286                         assignRank(it->second, tree);
287                 }
288         
289         return 0;
290         }
291         catch(exception& e) {
292                 m->errorOut(e, "MergeTaxSummaryCommand", "assignRank");
293                 exit(1);
294         }
295 }
296 /**************************************************************************************************/
297
298 int MergeTaxSummaryCommand::print(ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
299         try {
300                 
301                 assignRank(0, tree); 
302         vector<string> mGroups;
303                 //print labels
304                 out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
305                 for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) { out << (*it) << '\t'; }             
306                 out << endl;
307         
308         for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) {  tree[0].groupCount[*it2] = 0;  }
309             
310         map<string,int>::iterator it;
311                 for(it=tree[0].children.begin();it!=tree[0].children.end();it++){   
312             tree[0].total += tree[it->second].total;
313                         for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { 
314                 map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
315                 if (itGroups != tree[it->second].groupCount.end()) { 
316                     tree[0].groupCount[*it2] += itGroups->second;
317                 }
318             }
319                 }
320
321                 
322                 //print root
323                 out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total << "\t";
324                 
325         for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) { 
326             map<string, int>:: iterator itGroups = tree[0].groupCount.find(*it);
327             int num = 0;
328             if (itGroups != tree[0].groupCount.end()) { num = itGroups->second; }
329             out << num << '\t';
330         }
331         out << endl;
332                 
333                 //print rest
334                 print(0, out, tree, groups);
335         
336         return 0;
337                 
338         }
339         catch(exception& e) {
340                 m->errorOut(e, "MergeTaxSummaryCommand", "print");
341                 exit(1);
342         }
343 }
344 /**************************************************************************************************/
345 int MergeTaxSummaryCommand::print(int i, ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
346         try {
347                 map<string,int>::iterator it;
348                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
349                         
350             //print root
351             out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total << "\t";
352             
353             for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { 
354                 map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
355                 int num = 0;
356                 if (itGroups != tree[it->second].groupCount.end()) { num = itGroups->second; }
357                 out << num << '\t';
358             }
359             out << endl;
360
361                         print(it->second, out, tree, groups);
362                 }
363         
364         return 0;
365         }
366         catch(exception& e) {
367                 m->errorOut(e, "MergeTaxSummaryCommand", "print");
368                 exit(1);
369         }
370 }
371 //**********************************************************************************************************************
372
373