]> git.donarmstrong.com Git - mothur.git/blob - mergegroupscommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / mergegroupscommand.cpp
1 /*
2  *  mergegroupscommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/24/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mergegroupscommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> MergeGroupsCommand::setParameters(){     
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false,true); parameters.push_back(pshared);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "none","group",false,false,true); parameters.push_back(pgroup);
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
19                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
20                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "MergeGroupsCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string MergeGroupsCommand::getHelpString(){     
35         try {
36                 string helpString = "";
37                 helpString += "The merge.groups command reads a shared or group file and a design file and merges the groups that are in the same grouping in the design file.\n";
38                 helpString += "The merge.groups command outputs a .shared file. \n";
39                 helpString += "The merge.groups command parameters are shared, group, groups, label and design.  The design parameter is required.\n";
40                 helpString += "The design parameter allows you to assign your groups to sets. It is required. \n";
41                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
42                 helpString += "The groups parameter allows you to specify which of the groups in your shared or group file you would like included. The group names are separated by dashes.\n";
43                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
44                 helpString += "The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n";
45                 helpString += "Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n";
46                 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "MergeGroupsCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55
56 //**********************************************************************************************************************
57 string MergeGroupsCommand::getOutputPattern(string type) {
58     try {
59         string pattern = "";
60         
61         if (type == "shared") {  pattern = "[filename],merge,[extension]"; } 
62         else if (type == "group") {  pattern = "[filename],merge,[extension]"; } 
63         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
64         
65         return pattern;
66     }
67     catch(exception& e) {
68         m->errorOut(e, "MergeGroupsCommand", "getOutputPattern");
69         exit(1);
70     }
71 }
72 //**********************************************************************************************************************
73 MergeGroupsCommand::MergeGroupsCommand(){       
74         try {
75                 abort = true; calledHelp = true; 
76                 setParameters();
77                 vector<string> tempOutNames;
78                 outputTypes["shared"] = tempOutNames;
79                 outputTypes["group"] = tempOutNames;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87
88 MergeGroupsCommand::MergeGroupsCommand(string option) {
89         try {
90                 abort = false; calledHelp = false;   
91                 allLines = 1;
92                 
93                 //allow user to run help
94                 if(option == "help") { help(); abort = true; calledHelp = true; }
95                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96                 
97                 else {
98                         vector<string> myArray = setParameters();
99                         
100                         OptionParser parser(option);
101                         map<string,string> parameters = parser.getParameters();
102                         
103                         ValidParameters validParameter;
104                         
105                         //check to make sure all parameters are valid for command
106                         map<string,string>::iterator it;
107                         for (it = parameters.begin(); it != parameters.end(); it++) { 
108                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
109                         }
110                         
111                         //initialize outputTypes
112                         vector<string> tempOutNames;
113                         outputTypes["shared"] = tempOutNames;
114                         outputTypes["group"] = tempOutNames;
115                         
116                         //if the user changes the output directory command factory will send this info to us in the output parameter 
117                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
118                         
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                                 it = parameters.find("design");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
130                                 }
131                                 
132                                 it = parameters.find("shared");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
138                                 }
139                                 
140                                 it = parameters.find("group");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
146                                 }
147                                 
148                         }
149                         
150                         //check for required parameters
151                         designfile = validParameter.validFile(parameters, "design", true);
152                         if (designfile == "not open") { abort = true; }
153                         else if (designfile == "not found") {                           
154                                 //if there is a current shared file, use it
155                                 designfile = m->getDesignFile(); 
156                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
157                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
158                         }else { m->setDesignFile(designfile); } 
159                         
160                         sharedfile = validParameter.validFile(parameters, "shared", true);
161                         if (sharedfile == "not open") { abort = true; sharedfile = ""; }
162                         else if (sharedfile == "not found") {  sharedfile = ""; }
163                         else { m->setSharedFile(sharedfile); }  
164                         
165                         groupfile = validParameter.validFile(parameters, "group", true);
166                         if (groupfile == "not open") { abort = true; groupfile = ""; }
167                         else if (groupfile == "not found") {  groupfile = ""; }
168                         else { m->setGroupFile(groupfile); }    
169                         
170                         //check for optional parameter and set defaults
171                         // ...at some point should added some additional type checking...
172                         label = validParameter.validFile(parameters, "label", false);                   
173                         if (label == "not found") { label = ""; }
174                         else { 
175                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
176                                 else { allLines = 1;  }
177                         }
178                         
179                         groups = validParameter.validFile(parameters, "groups", false);                 
180                         if (groups == "not found") { groups = "all";  }
181                         m->splitAtDash(groups, Groups);
182                         m->setGroups(Groups);
183                         
184                         if ((sharedfile == "") && (groupfile == "")) { 
185                                 //give priority to group, then shared
186                                 groupfile = m->getGroupFile(); 
187                                 if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
188                                 else { 
189                                         sharedfile = m->getSharedFile(); 
190                                         if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
191                                         else { 
192                                                 m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
193                                         }
194                                 }
195                         }
196                 }
197                 
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "MergeGroupsCommand", "MergeGroupsCommand");
201                 exit(1);
202         }
203 }
204 //**********************************************************************************************************************
205
206 int MergeGroupsCommand::execute(){
207         try {
208                 
209                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
210         
211                 designMap = new GroupMap(designfile);
212                 designMap->readDesignMap();
213                 
214                 if (groupfile != "") { processGroupFile(designMap); }
215                 if (sharedfile != "") { processSharedFile(designMap); }
216
217                 //reset groups parameter
218                 m->clearGroups();  
219                 delete designMap;
220                 
221                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0;}
222                 
223                 
224                 //set shared file as new current sharedfile
225                 string current = "";
226                 itTypes = outputTypes.find("shared");
227                 if (itTypes != outputTypes.end()) {
228                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
229                 }
230                 
231                 itTypes = outputTypes.find("group");
232                 if (itTypes != outputTypes.end()) {
233                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
234                 }
235                 
236                 m->mothurOutEndLine();
237                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
238                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
239                 m->mothurOutEndLine();
240                 
241                 return 0;
242         }
243         catch(exception& e) {
244                 m->errorOut(e, "MergeGroupsCommand", "execute");
245                 exit(1);
246         }
247 }
248 //**********************************************************************************************************************
249
250 int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
251         try {
252                 
253                 map<string, SharedRAbundVector> merged;
254                 map<string, SharedRAbundVector>::iterator it;
255                 
256                 for (int i = 0; i < thisLookUp.size(); i++) {
257                         
258                         if (m->control_pressed) { return 0; }
259                         
260                         //what grouping does this group belong to
261                         string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
262                         if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
263                         
264                         else {
265                                 //do we already have a member of this grouping?
266                                 it = merged.find(grouping);
267                                 
268                                 if (it == merged.end()) { //nope, so create it
269                                         merged[grouping] = *thisLookUp[i];
270                                         merged[grouping].setGroup(grouping);
271                                 }else { //yes, merge it
272                                         
273                                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
274                                                 int abund = (it->second).getAbundance(j);
275                                                 abund += thisLookUp[i]->getAbundance(j);
276                                                 
277                                                 (it->second).set(j, abund, grouping);
278                                         }
279                                 }
280                         }
281                 }
282                 
283                 //print new file
284                 for (it = merged.begin(); it != merged.end(); it++) {
285                         out << (it->second).getLabel() << '\t' << it->first << '\t';
286                         (it->second).print(out);
287                 }
288                 
289                 return 0;
290                 
291         }
292         catch(exception& e) {
293                 m->errorOut(e, "MergeGroupsCommand", "process");
294                 exit(1);
295         }
296 }
297 //**********************************************************************************************************************
298
299 int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
300         try {
301                 
302                 string thisOutputDir = outputDir;
303                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
304         map<string, string> variables; 
305         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
306         variables["[extension]"] = m->getExtension(sharedfile);
307                 string outputFileName = getOutputFileName("shared", variables);
308         outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
309                 
310                 ofstream out;
311                 m->openOutputFile(outputFileName, out);
312                 
313                 InputData input(sharedfile, "sharedfile");
314                 lookup = input.getSharedRAbundVectors();
315                 string lastLabel = lookup[0]->getLabel();
316                 
317                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
318                 set<string> processedLabels;
319                 set<string> userLabels = labels;
320                 
321                 //as long as you are not at the end of the file or done wih the lines you want
322                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
323                         
324                         if (m->control_pressed) {  out.close(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); } return 0; }
325                         
326                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
327                                 
328                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
329                                 
330                                 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
331                                 process(lookup, out);
332                                 
333                                 processedLabels.insert(lookup[0]->getLabel());
334                                 userLabels.erase(lookup[0]->getLabel());
335                         }
336                         
337                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
338                                 string saveLabel = lookup[0]->getLabel();
339                                 
340                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
341                                 lookup = input.getSharedRAbundVectors(lastLabel);
342                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
343                                 
344                                 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
345                                 process(lookup, out);
346                                 
347                                 processedLabels.insert(lookup[0]->getLabel());
348                                 userLabels.erase(lookup[0]->getLabel());
349                                 
350                                 //restore real lastlabel to save below
351                                 lookup[0]->setLabel(saveLabel);
352                         }
353                         
354                         lastLabel = lookup[0]->getLabel();
355                         //prevent memory leak
356                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
357                         
358                         if (m->control_pressed) {  out.close(); m->clearGroups();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0; }
359                         
360                         //get next line to process
361                         lookup = input.getSharedRAbundVectors();                                
362                 }
363                 
364                 if (m->control_pressed) { out.close(); m->clearGroups();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); }  return 0; }
365                 
366                 //output error messages about any remaining user labels
367                 set<string>::iterator it;
368                 bool needToRun = false;
369                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
370                         m->mothurOut("Your file does not include the label " + *it); 
371                         if (processedLabels.count(lastLabel) != 1) {
372                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
373                                 needToRun = true;
374                         }else {
375                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
376                         }
377                 }
378                 
379                 //run last label if you need to
380                 if (needToRun == true)  {
381                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
382                         lookup = input.getSharedRAbundVectors(lastLabel);
383                         
384                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
385                         
386                         if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
387                         process(lookup, out);
388                         
389                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
390                 }
391                 
392                 out.close();
393                 
394                                 
395                 return 0;
396                 
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "MergeGroupsCommand", "processSharedFile");
400                 exit(1);
401         }
402 }
403 //**********************************************************************************************************************
404
405 int MergeGroupsCommand::processGroupFile(GroupMap*& designMap){
406         try {
407                 
408                 string thisOutputDir = outputDir;
409                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
410         map<string, string> variables; 
411         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
412         variables["[extension]"] = m->getExtension(groupfile);
413                 string outputFileName = getOutputFileName("group", variables);
414                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
415                 
416                 ofstream out;
417                 m->openOutputFile(outputFileName, out);
418                 
419                 //read groupfile
420                 GroupMap groupMap(groupfile);
421                 groupMap.readMap();
422                 
423                 //fill Groups - checks for "all" and for any typo groups
424                 SharedUtil* util = new SharedUtil();
425                 vector<string> nameGroups = groupMap.getNamesOfGroups();
426                 util->setGroups(Groups, nameGroups);
427                 delete util;
428                 
429                 vector<string> namesOfSeqs = groupMap.getNamesSeqs();
430                 bool error = false;
431                 
432                 for (int i = 0; i < namesOfSeqs.size(); i++) {
433                         
434                         if (m->control_pressed) { break; }
435                         
436                         string thisGroup = groupMap.getGroup(namesOfSeqs[i]);
437                         
438                         //are you in a group the user wants
439                         if (m->inUsersGroups(thisGroup, Groups)) {
440                                 string thisGrouping = designMap->getGroup(thisGroup);
441                                 
442                                 if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine();  error = true; }
443                                 else {
444                                         out << namesOfSeqs[i] << '\t' << thisGrouping << endl;
445                                 }
446                         }
447                 }
448                 
449                 if (error) { m->control_pressed = true; }
450
451                 out.close();
452                 
453                 return 0;
454                 
455         }
456         catch(exception& e) {
457                 m->errorOut(e, "MergeGroupsCommand", "processGroupFile");
458                 exit(1);
459         }
460 }
461 //**********************************************************************************************************************
462
463
464