2 // mergetaxsummarycommand.cpp
5 // Created by Sarah Westcott on 2/13/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "mergetaxsummarycommand.h"
12 //**********************************************************************************************************************
13 vector<string> MergeTaxSummaryCommand::setParameters(){
15 CommandParameter pinput("input", "String", "", "", "", "", "","",false,true,true); parameters.push_back(pinput);
16 CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
17 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
18 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
20 vector<string> myArray;
21 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
25 m->errorOut(e, "MergeTaxSummaryCommand", "setParameters");
29 //**********************************************************************************************************************
30 string MergeTaxSummaryCommand::getHelpString(){
32 string helpString = "";
33 helpString += "The merge.taxsummary command takes a list of tax.summary files separated by dashes and merges them into one file.";
34 helpString += "The merge.taxsummary command parameters are input and output.";
35 helpString += "Example merge.taxsummary(input=small.tax.summary-large.tax.summary, output=all.tax.summary).";
36 helpString += "Note: No spaces between parameter labels (i.e. output), '=' and parameters (i.e.yourOutputFileName).\n";
40 m->errorOut(e, "MergeTaxSummaryCommand", "getHelpString");
44 //**********************************************************************************************************************
45 MergeTaxSummaryCommand::MergeTaxSummaryCommand(){
47 abort = true; calledHelp = true;
49 vector<string> tempOutNames;
50 outputTypes["taxsummary"] = tempOutNames;
53 m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
57 //**********************************************************************************************************************
59 MergeTaxSummaryCommand::MergeTaxSummaryCommand(string option) {
61 abort = false; calledHelp = false;
63 if(option == "help") { help(); abort = true; calledHelp = true; }
64 else if(option == "citation") { citation(); abort = true; calledHelp = true; }
66 vector<string> myArray = setParameters();
68 OptionParser parser(option);
69 map<string,string> parameters = parser.getParameters();
71 ValidParameters validParameter;
73 //check to make sure all parameters are valid for command
74 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
75 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
78 //initialize outputTypes
79 vector<string> tempOutNames;
80 outputTypes["taxsummary"] = tempOutNames;
82 //if the user changes the input directory command factory will send this info to us in the output parameter
83 string inputDir = validParameter.validFile(parameters, "inputdir", false);
84 if (inputDir == "not found"){ inputDir = ""; }
86 string fileList = validParameter.validFile(parameters, "input", false);
87 if(fileList == "not found") { m->mothurOut("you must enter two or more file names"); m->mothurOutEndLine(); abort=true; }
88 else{ m->splitAtDash(fileList, fileNames); }
90 //if the user changes the output directory command factory will send this info to us in the output parameter
91 string outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found") { outputDir = ""; }
94 numInputFiles = fileNames.size();
96 if(numInputFiles == 0){
97 m->mothurOut("you must enter two or more file names and you entered " + toString(fileNames.size()) + " file names"); m->mothurOutEndLine();
101 for(int i=0;i<numInputFiles;i++){
102 if (inputDir != "") {
103 string path = m->hasPath(fileNames[i]);
104 //if the user has not given a path then, add inputdir. else leave path alone.
105 if (path == "") { fileNames[i] = inputDir + fileNames[i]; }
110 ableToOpen = m->openInputFile(fileNames[i], in, "noerror");
113 //if you can't open it, try default location
114 if (ableToOpen == 1) {
115 if (m->getDefaultPath() != "") { //default path is set
116 string tryPath = m->getDefaultPath() + m->getSimpleName(fileNames[i]);
117 m->mothurOut("Unable to open " + fileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
119 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
121 fileNames[i] = tryPath;
125 //if you can't open it, try output location
126 if (ableToOpen == 1) {
127 if (m->getOutputDir() != "") { //default path is set
128 string tryPath = m->getOutputDir() + m->getSimpleName(fileNames[i]);
129 m->mothurOut("Unable to open " + fileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
131 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
133 fileNames[i] = tryPath;
139 if (ableToOpen == 1) {
140 m->mothurOut("Unable to open " + fileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
141 //erase from file list
142 fileNames.erase(fileNames.begin()+i);
148 outputFileName = validParameter.validFile(parameters, "output", false);
149 if (outputFileName == "not found") { m->mothurOut("you must enter an output file name"); m->mothurOutEndLine(); abort=true; }
150 else if (outputDir != "") { outputFileName = outputDir + m->getSimpleName(outputFileName); }
155 catch(exception& e) {
156 m->errorOut(e, "MergeTaxSummaryCommand", "MergeTaxSummaryCommand");
160 //**********************************************************************************************************************
162 int MergeTaxSummaryCommand::execute(){
164 if (abort == true) { if (calledHelp) { return 0; } return 2; }
166 outputFileName = m->getFullPathName(outputFileName);
167 m->mothurRemove(outputFileName);
169 vector<rawTaxNode> tree;
170 tree.push_back(rawTaxNode("Root"));
172 bool hasGroups = true;
175 for (int i = 0; i < fileNames.size(); i++) {
178 m->openInputFile(fileNames[i], in);
179 string temp = m->getline(in);
180 vector<string> headers = m->splitWhiteSpace(temp);
182 vector<string> thisFilesGroups;
183 if (headers.size() == 5) { hasGroups = false; }
184 else { for (int j = 5; j < headers.size(); j++) { groups.insert(headers[j]); thisFilesGroups.push_back(headers[j]); } }
186 int level, daugterLevels, total;
188 map<int, int> levelToCurrentNode;
189 levelToCurrentNode[0] = 0;
192 if (m->control_pressed) { return 0; }
194 in >> level >> rankId >> tax >> daugterLevels >> total; m->gobble(in);
195 map<string, int> groupCounts;
196 if (thisFilesGroups.size() != 0) {
197 for (int j = 0; j < thisFilesGroups.size(); j++) {
198 int tempNum; in >> tempNum; m->gobble(in);
199 groupCounts[thisFilesGroups[j]] = tempNum;
205 map<int, int>::iterator itParent = levelToCurrentNode.find(level-1);
207 if (itParent == levelToCurrentNode.end()) { m->mothurOut("[ERROR]: situation I didnt expect.\n"); }
208 else { parent = itParent->second; }
210 levelToCurrentNode[level] = addTaxToTree(tree, level, parent, tax, total, groupCounts);
216 if (!hasGroups && (groups.size() != 0)) { groups.clear(); m->mothurOut("[WARNING]: not all files contain group breakdown, ignoring group counts.\n"); }
219 m->openOutputFile(outputFileName, out);
220 print(out, tree, groups);
222 if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
224 m->mothurOutEndLine();
225 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
226 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["taxsummary"].push_back(outputFileName);
227 m->mothurOutEndLine();
231 catch(exception& e) {
232 m->errorOut(e, "MergeTaxSummaryCommand", "execute");
236 /**************************************************************************************************/
238 int MergeTaxSummaryCommand::addTaxToTree(vector<rawTaxNode>& tree, int level, int currentNode, string taxon, int total, map<string, int> groups){
240 map<string, int>::iterator childPointer;
242 childPointer = tree[currentNode].children.find(taxon);
243 int nodeToIncrement = 0;
245 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, increment counts
246 nodeToIncrement = childPointer->second;
247 tree[nodeToIncrement].total += total;
249 for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
250 map<string, int>::iterator it = tree[nodeToIncrement].groupCount.find(itGroups->first);
251 if (it == tree[nodeToIncrement].groupCount.end()) { tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second; }
252 else { it->second += itGroups->second; }
255 else{ //otherwise, create it
256 tree.push_back(rawTaxNode(taxon));
257 tree[currentNode].children[taxon] = tree.size()-1;
258 tree[tree.size()-1].parent = currentNode;
259 nodeToIncrement = tree.size()-1;
260 tree[nodeToIncrement].total = total;
261 tree[nodeToIncrement].level = level;
262 for (map<string, int>::iterator itGroups = groups.begin(); itGroups != groups.end(); itGroups++) {
263 tree[nodeToIncrement].groupCount[itGroups->first] = itGroups->second;
267 return nodeToIncrement;
269 catch(exception& e) {
270 m->errorOut(e, "MergeTaxSummaryCommand", "addSeqToTree");
274 /**************************************************************************************************/
276 int MergeTaxSummaryCommand::assignRank(int index, vector<rawTaxNode>& tree){
278 map<string,int>::iterator it;
281 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
282 if (m->control_pressed) { return 0; }
283 tree[it->second].rank = tree[index].rank + '.' + toString(counter);
286 assignRank(it->second, tree);
291 catch(exception& e) {
292 m->errorOut(e, "MergeTaxSummaryCommand", "assignRank");
296 /**************************************************************************************************/
298 int MergeTaxSummaryCommand::print(ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
302 vector<string> mGroups;
304 out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
305 for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) { out << (*it) << '\t'; }
308 for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) { tree[0].groupCount[*it2] = 0; }
310 map<string,int>::iterator it;
311 for(it=tree[0].children.begin();it!=tree[0].children.end();it++){
312 tree[0].total += tree[it->second].total;
313 for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) {
314 map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
315 if (itGroups != tree[it->second].groupCount.end()) {
316 tree[0].groupCount[*it2] += itGroups->second;
323 out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total << "\t";
325 for (set<string>::iterator it = groups.begin(); it != groups.end(); it++) {
326 map<string, int>:: iterator itGroups = tree[0].groupCount.find(*it);
328 if (itGroups != tree[0].groupCount.end()) { num = itGroups->second; }
334 print(0, out, tree, groups);
339 catch(exception& e) {
340 m->errorOut(e, "MergeTaxSummaryCommand", "print");
344 /**************************************************************************************************/
345 int MergeTaxSummaryCommand::print(int i, ofstream& out, vector<rawTaxNode>& tree, set<string> groups){
347 map<string,int>::iterator it;
348 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
351 out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total << "\t";
353 for (set<string>::iterator it2 = groups.begin(); it2 != groups.end(); it2++) {
354 map<string, int>:: iterator itGroups = tree[it->second].groupCount.find(*it2);
356 if (itGroups != tree[it->second].groupCount.end()) { num = itGroups->second; }
361 print(it->second, out, tree, groups);
366 catch(exception& e) {
367 m->errorOut(e, "MergeTaxSummaryCommand", "print");
371 //**********************************************************************************************************************