]> git.donarmstrong.com Git - mothur.git/blob - mergegroupscommand.cpp
fixed kendall method in corr.axes
[mothur.git] / mergegroupscommand.cpp
1 /*
2  *  mergegroupscommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/24/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mergegroupscommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> MergeGroupsCommand::getValidParameters(){        
15         try {
16                 string Array[] =  {"shared","label","outputdir","design","groups","processors","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "MergeGroupsCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 MergeGroupsCommand::MergeGroupsCommand(){       
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["shared"] = tempOutNames;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 vector<string> MergeGroupsCommand::getRequiredParameters(){     
40         try {
41                 string Array[] =  {"design", "shared"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "MergeGroupsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 vector<string> MergeGroupsCommand::getRequiredFiles(){  
52         try {
53                 string Array[] =  {};
54                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55                 return myArray;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "MergeGroupsCommand", "getRequiredFiles");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63
64 MergeGroupsCommand::MergeGroupsCommand(string option) {
65         try {
66                 globaldata = GlobalData::getInstance();
67                 abort = false;
68                 allLines = 1;
69                 labels.clear();
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string AlignArray[] =  {"groups","label","outputdir","shared","design","processors","inputdir"};
77                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
78                         
79                         OptionParser parser(option);
80                         map<string,string> parameters = parser.getParameters();
81                         
82                         ValidParameters validParameter;
83                         
84                         //check to make sure all parameters are valid for command
85                         map<string,string>::iterator it;
86                         for (it = parameters.begin(); it != parameters.end(); it++) { 
87                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
88                         }
89                         
90                         //initialize outputTypes
91                         vector<string> tempOutNames;
92                         outputTypes["shared"] = tempOutNames;
93                         
94                         //if the user changes the output directory command factory will send this info to us in the output parameter 
95                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("design");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
108                                 }
109                                 
110                                 it = parameters.find("shared");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
116                                 }
117                         }
118                         
119                         //check for required parameters
120                         designfile = validParameter.validFile(parameters, "design", true);
121                         if (designfile == "not open") { abort = true; }
122                         else if (designfile == "not found") {  designfile = "";  m->mothurOut("You must provide a design file."); m->mothurOutEndLine(); abort = true; }        
123                         
124                         //make sure the user has already run the read.otu command
125                         sharedfile = validParameter.validFile(parameters, "shared", true);
126                         if (sharedfile == "not open") { abort = true; }
127                         else if (sharedfile == "not found") {  sharedfile = "";  m->mothurOut("You must provide a shared file."); m->mothurOutEndLine(); abort = true; }        
128                         
129                         //check for optional parameter and set defaults
130                         // ...at some point should added some additional type checking...
131                         label = validParameter.validFile(parameters, "label", false);                   
132                         if (label == "not found") { label = ""; }
133                         else { 
134                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
135                                 else { allLines = 1;  }
136                         }
137                         
138                         groups = validParameter.validFile(parameters, "groups", false);                 
139                         if (groups == "not found") { groups = "all";  }
140                         m->splitAtDash(groups, Groups);
141                         globaldata->Groups = Groups;
142                 }
143                 
144         }
145         catch(exception& e) {
146                 m->errorOut(e, "MergeGroupsCommand", "MergeGroupsCommand");
147                 exit(1);
148         }
149 }
150
151 //**********************************************************************************************************************
152
153 void MergeGroupsCommand::help(){
154         try {
155                 m->mothurOut("The merge.groups command reads a shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file.\n");
156                 m->mothurOut("The merge.groups command outputs a .shared file. \n");
157                 m->mothurOut("The merge.groups command parameters are shared, groups, label and design.  The design and shared parameter are required.\n");
158                 m->mothurOut("The design parameter allows you to assign your groups to sets when you are running metastat. mothur will run all pairwise comparisons of the sets. It is required. \n");
159                 m->mothurOut("The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n");
160                 m->mothurOut("The groups parameter allows you to specify which of the groups in your shared you would like included. The group names are separated by dashes.\n");
161                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
162                 m->mothurOut("The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n");
163                 m->mothurOut("Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n");
164                 m->mothurOut("The default value for groups is all the groups in your sharedfile, and all labels in your inputfile will be used.\n");
165                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
166                 
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "MergeGroupsCommand", "help");
170                 exit(1);
171         }
172 }
173
174 //**********************************************************************************************************************
175
176 MergeGroupsCommand::~MergeGroupsCommand(){}
177
178 //**********************************************************************************************************************
179
180 int MergeGroupsCommand::execute(){
181         try {
182                 
183                 if (abort == true) { return 0; }
184                 
185                 if (outputDir == "") {  outputDir += m->hasPath(sharedfile);  }
186                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" +  m->getExtension(sharedfile);
187                 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
188                 
189                 ofstream out;
190                 m->openOutputFile(outputFileName, out);
191                 
192                 designMap = new GroupMap(designfile);
193                 designMap->readDesignMap();
194                 
195                 InputData input(sharedfile, "sharedfile");
196                 lookup = input.getSharedRAbundVectors();
197                 string lastLabel = lookup[0]->getLabel();
198                 
199                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
200                 set<string> processedLabels;
201                 set<string> userLabels = labels;
202                 
203                 //as long as you are not at the end of the file or done wih the lines you want
204                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
205                         
206                         if (m->control_pressed) {  out.close(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0; }
207                         
208                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
209                                 
210                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
211                                 process(lookup, out);
212                                 
213                                 processedLabels.insert(lookup[0]->getLabel());
214                                 userLabels.erase(lookup[0]->getLabel());
215                         }
216                         
217                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
218                                 string saveLabel = lookup[0]->getLabel();
219                                 
220                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
221                                 lookup = input.getSharedRAbundVectors(lastLabel);
222                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
223                                 
224                                 process(lookup, out);
225                                 
226                                 processedLabels.insert(lookup[0]->getLabel());
227                                 userLabels.erase(lookup[0]->getLabel());
228                                 
229                                 //restore real lastlabel to save below
230                                 lookup[0]->setLabel(saveLabel);
231                         }
232                         
233                         lastLabel = lookup[0]->getLabel();
234                         //prevent memory leak
235                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
236                         
237                         if (m->control_pressed) {  out.close(); globaldata->Groups.clear();   delete designMap;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
238                         
239                         //get next line to process
240                         lookup = input.getSharedRAbundVectors();                                
241                 }
242                 
243                 if (m->control_pressed) { out.close(); globaldata->Groups.clear();  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); }  return 0; }
244                 
245                 //output error messages about any remaining user labels
246                 set<string>::iterator it;
247                 bool needToRun = false;
248                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
249                         m->mothurOut("Your file does not include the label " + *it); 
250                         if (processedLabels.count(lastLabel) != 1) {
251                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
252                                 needToRun = true;
253                         }else {
254                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
255                         }
256                 }
257                 
258                 //run last label if you need to
259                 if (needToRun == true)  {
260                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
261                         lookup = input.getSharedRAbundVectors(lastLabel);
262                         
263                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
264                         
265                         process(lookup, out);
266                         
267                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
268                 }
269                 
270                 out.close();
271                 //reset groups parameter
272                 globaldata->Groups.clear();  
273                 delete designMap;
274                 
275                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0;}
276                 
277                 m->mothurOutEndLine();
278                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
279                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
280                 m->mothurOutEndLine();
281                 
282                 return 0;
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "MergeGroupsCommand", "execute");
286                 exit(1);
287         }
288 }
289 //**********************************************************************************************************************
290
291 int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
292         try {
293                 
294                 map<string, SharedRAbundVector> merged;
295                 map<string, SharedRAbundVector>::iterator it;
296                 
297                 for (int i = 0; i < thisLookUp.size(); i++) {
298                         
299                         if (m->control_pressed) { return 0; }
300                         
301                         //what grouping does this group belong to
302                         string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
303                         if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
304                         
305                         else {
306                                 //do we already have a member of this grouping?
307                                 it = merged.find(grouping);
308                                 
309                                 if (it == merged.end()) { //nope, so create it
310                                         merged[grouping] = *thisLookUp[i];
311                                         merged[grouping].setGroup(grouping);
312                                 }else { //yes, merge it
313                                         
314                                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
315                                                 int abund = (it->second).getAbundance(j);
316                                                 abund += thisLookUp[i]->getAbundance(j);
317                                                 
318                                                 (it->second).set(j, abund, grouping);
319                                         }
320                                 }
321                         }
322                 }
323                 
324                 //print new file
325                 for (it = merged.begin(); it != merged.end(); it++) {
326                         out << (it->second).getLabel() << '\t' << it->first << '\t';
327                         (it->second).print(out);
328                 }
329                 
330                 return 0;
331                 
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "MergeGroupsCommand", "process");
335                 exit(1);
336         }
337 }
338 //**********************************************************************************************************************
339
340
341