]> git.donarmstrong.com Git - mothur.git/blob - mergegroupscommand.cpp
Revert to previous commit
[mothur.git] / mergegroupscommand.cpp
1 /*
2  *  mergegroupscommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/24/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mergegroupscommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> MergeGroupsCommand::setParameters(){     
15         try {
16                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pgroup);
18                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
19                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "MergeGroupsCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string MergeGroupsCommand::getHelpString(){     
35         try {
36                 string helpString = "";
37                 helpString += "The merge.groups command reads a shared or group file and a design file and merges the groups that are in the same grouping in the design file.\n";
38                 helpString += "The merge.groups command outputs a .shared file. \n";
39                 helpString += "The merge.groups command parameters are shared, group, groups, label and design.  The design parameter is required.\n";
40                 helpString += "The design parameter allows you to assign your groups to sets. It is required. \n";
41                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
42                 helpString += "The groups parameter allows you to specify which of the groups in your shared or group file you would like included. The group names are separated by dashes.\n";
43                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
44                 helpString += "The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n";
45                 helpString += "Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n";
46                 helpString += "The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n";
47                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "MergeGroupsCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 MergeGroupsCommand::MergeGroupsCommand(){       
57         try {
58                 abort = true; calledHelp = true; 
59                 setParameters();
60                 vector<string> tempOutNames;
61                 outputTypes["shared"] = tempOutNames;
62                 outputTypes["group"] = tempOutNames;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70
71 MergeGroupsCommand::MergeGroupsCommand(string option) {
72         try {
73                 abort = false; calledHelp = false;   
74                 allLines = 1;
75                 
76                 //allow user to run help
77                 if(option == "help") { help(); abort = true; calledHelp = true; }
78                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
79                 
80                 else {
81                         vector<string> myArray = setParameters();
82                         
83                         OptionParser parser(option);
84                         map<string,string> parameters = parser.getParameters();
85                         
86                         ValidParameters validParameter;
87                         
88                         //check to make sure all parameters are valid for command
89                         map<string,string>::iterator it;
90                         for (it = parameters.begin(); it != parameters.end(); it++) { 
91                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
92                         }
93                         
94                         //initialize outputTypes
95                         vector<string> tempOutNames;
96                         outputTypes["shared"] = tempOutNames;
97                         outputTypes["group"] = tempOutNames;
98                         
99                         //if the user changes the output directory command factory will send this info to us in the output parameter 
100                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
101                         
102                         //if the user changes the input directory command factory will send this info to us in the output parameter 
103                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
104                         if (inputDir == "not found"){   inputDir = "";          }
105                         else {
106                                 string path;
107                                 it = parameters.find("design");
108                                 //user has given a template file
109                                 if(it != parameters.end()){ 
110                                         path = m->hasPath(it->second);
111                                         //if the user has not given a path then, add inputdir. else leave path alone.
112                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
113                                 }
114                                 
115                                 it = parameters.find("shared");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
121                                 }
122                                 
123                                 it = parameters.find("group");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
129                                 }
130                                 
131                         }
132                         
133                         //check for required parameters
134                         designfile = validParameter.validFile(parameters, "design", true);
135                         if (designfile == "not open") { abort = true; }
136                         else if (designfile == "not found") {                           
137                                 //if there is a current shared file, use it
138                                 designfile = m->getDesignFile(); 
139                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
140                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
141                         }else { m->setDesignFile(designfile); } 
142                         
143                         sharedfile = validParameter.validFile(parameters, "shared", true);
144                         if (sharedfile == "not open") { abort = true; sharedfile = ""; }
145                         else if (sharedfile == "not found") {  sharedfile = ""; }
146                         else { m->setSharedFile(sharedfile); }  
147                         
148                         groupfile = validParameter.validFile(parameters, "group", true);
149                         if (groupfile == "not open") { abort = true; groupfile = ""; }
150                         else if (groupfile == "not found") {  groupfile = ""; }
151                         else { m->setGroupFile(groupfile); }    
152                         
153                         //check for optional parameter and set defaults
154                         // ...at some point should added some additional type checking...
155                         label = validParameter.validFile(parameters, "label", false);                   
156                         if (label == "not found") { label = ""; }
157                         else { 
158                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
159                                 else { allLines = 1;  }
160                         }
161                         
162                         groups = validParameter.validFile(parameters, "groups", false);                 
163                         if (groups == "not found") { groups = "all";  }
164                         m->splitAtDash(groups, Groups);
165                         m->setGroups(Groups);
166                         
167                         if ((sharedfile == "") && (groupfile == "")) { 
168                                 //give priority to group, then shared
169                                 groupfile = m->getGroupFile(); 
170                                 if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
171                                 else { 
172                                         sharedfile = m->getSharedFile(); 
173                                         if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
174                                         else { 
175                                                 m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
176                                         }
177                                 }
178                         }
179                 }
180                 
181         }
182         catch(exception& e) {
183                 m->errorOut(e, "MergeGroupsCommand", "MergeGroupsCommand");
184                 exit(1);
185         }
186 }
187 //**********************************************************************************************************************
188
189 int MergeGroupsCommand::execute(){
190         try {
191                 
192                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
193         
194                 designMap = new GroupMap(designfile);
195                 designMap->readDesignMap();
196                 
197                 if (groupfile != "") { processGroupFile(designMap); }
198                 if (sharedfile != "") { processSharedFile(designMap); }
199
200                 //reset groups parameter
201                 m->clearGroups();  
202                 delete designMap;
203                 
204                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0;}
205                 
206                 
207                 //set shared file as new current sharedfile
208                 string current = "";
209                 itTypes = outputTypes.find("shared");
210                 if (itTypes != outputTypes.end()) {
211                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
212                 }
213                 
214                 itTypes = outputTypes.find("group");
215                 if (itTypes != outputTypes.end()) {
216                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
217                 }
218                 
219                 m->mothurOutEndLine();
220                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
221                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
222                 m->mothurOutEndLine();
223                 
224                 return 0;
225         }
226         catch(exception& e) {
227                 m->errorOut(e, "MergeGroupsCommand", "execute");
228                 exit(1);
229         }
230 }
231 //**********************************************************************************************************************
232
233 int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
234         try {
235                 
236                 map<string, SharedRAbundVector> merged;
237                 map<string, SharedRAbundVector>::iterator it;
238                 
239                 for (int i = 0; i < thisLookUp.size(); i++) {
240                         
241                         if (m->control_pressed) { return 0; }
242                         
243                         //what grouping does this group belong to
244                         string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
245                         if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
246                         
247                         else {
248                                 //do we already have a member of this grouping?
249                                 it = merged.find(grouping);
250                                 
251                                 if (it == merged.end()) { //nope, so create it
252                                         merged[grouping] = *thisLookUp[i];
253                                         merged[grouping].setGroup(grouping);
254                                 }else { //yes, merge it
255                                         
256                                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
257                                                 int abund = (it->second).getAbundance(j);
258                                                 abund += thisLookUp[i]->getAbundance(j);
259                                                 
260                                                 (it->second).set(j, abund, grouping);
261                                         }
262                                 }
263                         }
264                 }
265                 
266                 //print new file
267                 for (it = merged.begin(); it != merged.end(); it++) {
268                         out << (it->second).getLabel() << '\t' << it->first << '\t';
269                         (it->second).print(out);
270                 }
271                 
272                 return 0;
273                 
274         }
275         catch(exception& e) {
276                 m->errorOut(e, "MergeGroupsCommand", "process");
277                 exit(1);
278         }
279 }
280 //**********************************************************************************************************************
281
282 int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
283         try {
284                 
285                 string thisOutputDir = outputDir;
286                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
287                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" +  m->getExtension(sharedfile);
288                 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
289                 
290                 ofstream out;
291                 m->openOutputFile(outputFileName, out);
292                 
293                 InputData input(sharedfile, "sharedfile");
294                 lookup = input.getSharedRAbundVectors();
295                 string lastLabel = lookup[0]->getLabel();
296                 
297                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
298                 set<string> processedLabels;
299                 set<string> userLabels = labels;
300                 
301                 //as long as you are not at the end of the file or done wih the lines you want
302                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
303                         
304                         if (m->control_pressed) {  out.close(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]); } return 0; }
305                         
306                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
307                                 
308                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
309                                 
310                                 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
311                                 process(lookup, out);
312                                 
313                                 processedLabels.insert(lookup[0]->getLabel());
314                                 userLabels.erase(lookup[0]->getLabel());
315                         }
316                         
317                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
318                                 string saveLabel = lookup[0]->getLabel();
319                                 
320                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
321                                 lookup = input.getSharedRAbundVectors(lastLabel);
322                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
323                                 
324                                 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
325                                 process(lookup, out);
326                                 
327                                 processedLabels.insert(lookup[0]->getLabel());
328                                 userLabels.erase(lookup[0]->getLabel());
329                                 
330                                 //restore real lastlabel to save below
331                                 lookup[0]->setLabel(saveLabel);
332                         }
333                         
334                         lastLabel = lookup[0]->getLabel();
335                         //prevent memory leak
336                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
337                         
338                         if (m->control_pressed) {  out.close(); m->clearGroups();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0; }
339                         
340                         //get next line to process
341                         lookup = input.getSharedRAbundVectors();                                
342                 }
343                 
344                 if (m->control_pressed) { out.close(); m->clearGroups();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); }  return 0; }
345                 
346                 //output error messages about any remaining user labels
347                 set<string>::iterator it;
348                 bool needToRun = false;
349                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
350                         m->mothurOut("Your file does not include the label " + *it); 
351                         if (processedLabels.count(lastLabel) != 1) {
352                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
353                                 needToRun = true;
354                         }else {
355                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
356                         }
357                 }
358                 
359                 //run last label if you need to
360                 if (needToRun == true)  {
361                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
362                         lookup = input.getSharedRAbundVectors(lastLabel);
363                         
364                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
365                         
366                         if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
367                         process(lookup, out);
368                         
369                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
370                 }
371                 
372                 out.close();
373                 
374                                 
375                 return 0;
376                 
377         }
378         catch(exception& e) {
379                 m->errorOut(e, "MergeGroupsCommand", "processSharedFile");
380                 exit(1);
381         }
382 }
383 //**********************************************************************************************************************
384
385 int MergeGroupsCommand::processGroupFile(GroupMap*& designMap){
386         try {
387                 
388                 string thisOutputDir = outputDir;
389                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
390                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "merge" +  m->getExtension(groupfile);
391                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
392                 
393                 ofstream out;
394                 m->openOutputFile(outputFileName, out);
395                 
396                 //read groupfile
397                 GroupMap groupMap(groupfile);
398                 groupMap.readMap();
399                 
400                 //fill Groups - checks for "all" and for any typo groups
401                 SharedUtil* util = new SharedUtil();
402                 vector<string> nameGroups = groupMap.getNamesOfGroups();
403                 util->setGroups(Groups, nameGroups);
404                 delete util;
405                 
406                 vector<string> namesOfSeqs = groupMap.getNamesSeqs();
407                 bool error = false;
408                 
409                 for (int i = 0; i < namesOfSeqs.size(); i++) {
410                         
411                         if (m->control_pressed) { break; }
412                         
413                         string thisGroup = groupMap.getGroup(namesOfSeqs[i]);
414                         
415                         //are you in a group the user wants
416                         if (m->inUsersGroups(thisGroup, Groups)) {
417                                 string thisGrouping = designMap->getGroup(thisGroup);
418                                 
419                                 if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine();  error = true; }
420                                 else {
421                                         out << namesOfSeqs[i] << '\t' << thisGrouping << endl;
422                                 }
423                         }
424                 }
425                 
426                 if (error) { m->control_pressed = true; }
427
428                 out.close();
429                 
430                 return 0;
431                 
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "MergeGroupsCommand", "processGroupFile");
435                 exit(1);
436         }
437 }
438 //**********************************************************************************************************************
439
440
441