]> git.donarmstrong.com Git - mothur.git/blob - rarefactsharedcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / rarefactsharedcommand.cpp
1 /*
2  *  rarefactsharedcommand.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 1/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "rarefactsharedcommand.h"
11 #include "sharedsobs.h"
12 #include "sharednseqs.h"
13 #include "sharedutilities.h"
14
15 //**********************************************************************************************************************
16 vector<string> RareFactSharedCommand::setParameters(){  
17         try {
18                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
19         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign);
20                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
21                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
22                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
23                 CommandParameter pcalc("calc", "Multiple", "sharednseqs-sharedobserved", "sharedobserved", "", "", "",true,false); parameters.push_back(pcalc);
24                 CommandParameter pjumble("jumble", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pjumble);
25                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26         CommandParameter psets("sets", "String", "", "", "", "", "",false,false); parameters.push_back(psets);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "RareFactSharedCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string RareFactSharedCommand::getHelpString(){  
41         try {
42                 string helpString = "";
43                 ValidCalculators validCalculator;
44                 helpString += "The collect.shared command parameters are shared, label, freq, calc and groups.  shared is required if there is no current sharedfile. \n";
45                 helpString += "The rarefaction.shared command parameters are shared, design, label, iters, groups, sets, jumble and calc.  shared is required if there is no current sharedfile. \n";
46         helpString += "The design parameter allows you to assign your groups to sets. If provided mothur will run rarefaction.shared on a per set basis. \n";
47         helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
48                 helpString += "The rarefaction command should be in the following format: \n";
49                 helpString += "rarefaction.shared(label=yourLabel, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n";
50                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
51                 helpString += "Example rarefaction.shared(label=unique-0.01-0.03,  iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n";
52                 helpString += "The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n";
53                 helpString += "The default value for groups is all the groups in your groupfile, and jumble is true.\n";
54                 helpString += validCalculator.printCalc("sharedrarefaction");
55                 helpString += "The label parameter is used to analyze specific labels in your input.\n";
56                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
57                 helpString += "Note: No spaces between parameter labels (i.e. freq), '=' and parameters (i.e.yourFreq).\n";
58                 return helpString;
59         }
60         catch(exception& e) {
61                 m->errorOut(e, "RareFactSharedCommand", "getHelpString");
62                 exit(1);
63         }
64 }
65
66 //**********************************************************************************************************************
67 RareFactSharedCommand::RareFactSharedCommand(){ 
68         try {
69                 abort = true; calledHelp = true; 
70                 setParameters();
71                 vector<string> tempOutNames;
72                 outputTypes["sharedrarefaction"] = tempOutNames;
73                 outputTypes["sharedr_nseqs"] = tempOutNames;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
77                 exit(1);
78         }
79 }
80 //**********************************************************************************************************************
81
82 RareFactSharedCommand::RareFactSharedCommand(string option)  {
83         try {
84                 abort = false; calledHelp = false;   
85                 allLines = 1;
86                                 
87                 //allow user to run help
88                 if(option == "help") {  help(); abort = true; calledHelp = true; }
89                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90                 
91                 else {
92                         vector<string> myArray = setParameters();
93                         
94                         OptionParser parser(option);
95                         map<string,string> parameters = parser.getParameters();
96                         map<string,string>::iterator it;
97                         
98                         ValidParameters validParameter;
99                         
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["sharedrarefaction"] = tempOutNames;
108                         outputTypes["sharedr_nseqs"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("shared");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
121                                 }
122                 
123                 it = parameters.find("design");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
129                                 }
130
131                         }
132                         
133                         //get shared file
134                         sharedfile = validParameter.validFile(parameters, "shared", true);
135                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
136                         else if (sharedfile == "not found") { 
137                                 //if there is a current shared file, use it
138                                 sharedfile = m->getSharedFile(); 
139                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
140                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
141                         }else { m->setSharedFile(sharedfile); }
142             
143             designfile = validParameter.validFile(parameters, "design", true);
144                         if (designfile == "not open") { abort = true; designfile = ""; }
145                         else if (designfile == "not found") {   designfile = "";        }
146                         else { m->setDesignFile(designfile); }
147                         
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter 
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
151                         
152                         
153                         //check for optional parameter and set defaults
154                         // ...at some point should added some additional type checking...
155                         label = validParameter.validFile(parameters, "label", false);                   
156                         if (label == "not found") { label = ""; }
157                         else { 
158                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
159                                 else { allLines = 1;  }
160                         }
161                         
162                                 
163                         calc = validParameter.validFile(parameters, "calc", false);                     
164                         if (calc == "not found") { calc = "sharedobserved";  }
165                         else { 
166                                  if (calc == "default")  {  calc = "sharedobserved";  }
167                         }
168                         m->splitAtDash(calc, Estimators);
169                         if (m->inUsersGroups("citation", Estimators)) { 
170                                 ValidCalculators validCalc; validCalc.printCitations(Estimators); 
171                                 //remove citation from list of calcs
172                                 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
173                         }
174                         
175                         groups = validParameter.validFile(parameters, "groups", false);                 
176                         if (groups == "not found") { groups = ""; }
177                         else { 
178                                 m->splitAtDash(groups, Groups);
179                         }
180                         m->setGroups(Groups);
181             
182             string sets = validParameter.validFile(parameters, "sets", false);                  
183                         if (sets == "not found") { sets = ""; }
184                         else { 
185                                 m->splitAtDash(sets, Sets);
186                         }
187                         
188                         string temp;
189                         temp = validParameter.validFile(parameters, "freq", false);                     if (temp == "not found") { temp = "100"; }
190                         m->mothurConvert(temp, freq); 
191                         
192                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
193                         m->mothurConvert(temp, nIters); 
194                         
195                         temp = validParameter.validFile(parameters, "jumble", false);                   if (temp == "not found") { temp = "T"; }
196                         if (m->isTrue(temp)) { jumble = true; }
197                         else { jumble = false; }
198                         m->jumble = jumble;
199                                 
200                 }
201
202         }
203         catch(exception& e) {
204                 m->errorOut(e, "RareFactSharedCommand", "RareFactSharedCommand");
205                 exit(1);
206         }
207 }
208 //**********************************************************************************************************************
209
210 int RareFactSharedCommand::execute(){
211         try {
212         
213                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
214         
215         GroupMap designMap;
216         if (designfile == "") { //fake out designMap to run with process
217             process(designMap, "");
218         }else {
219             designMap.readDesignMap(designfile);
220             
221             //fill Sets - checks for "all" and for any typo groups
222                         SharedUtil util;
223                         vector<string> nameSets = designMap.getNamesOfGroups();
224                         util.setGroups(Sets, nameSets);
225             
226             for (int i = 0; i < Sets.size(); i++) {
227                 process(designMap, Sets[i]);
228             }
229         }
230                     
231                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
232                 
233                 m->mothurOutEndLine();
234                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
235                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
236                 m->mothurOutEndLine();
237
238                 return 0;
239         }
240         catch(exception& e) {
241                 m->errorOut(e, "RareFactSharedCommand", "execute");
242                 exit(1);
243         }
244 }
245 //**********************************************************************************************************************
246
247 int RareFactSharedCommand::process(GroupMap& designMap, string thisSet){
248         try {
249         Rarefact* rCurve;
250         vector<Display*> rDisplays;
251         
252         InputData input(sharedfile, "sharedfile");
253                 lookup = input.getSharedRAbundVectors();
254         if (lookup.size() < 2) { 
255                         m->mothurOut("I cannot run the command without at least 2 valid groups."); 
256             for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
257             return 0;
258         }
259         
260         string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(sharedfile));
261         
262         vector<string> newGroups = m->getGroups();
263         if (thisSet != "") {  //make groups only filled with groups from this set so that's all inputdata will read
264             vector<string> thisSets; thisSets.push_back(thisSet);
265             newGroups = designMap.getNamesSeqs(thisSets);
266             fileNameRoot += thisSet + ".";
267         }
268         
269         vector<SharedRAbundVector*> subset;
270         if (thisSet == "") {  subset.clear(); subset = lookup;  }
271         else {//fill subset with this sets groups
272             subset.clear();
273             for (int i = 0; i < lookup.size(); i++) {
274                 if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
275                     subset.push_back(lookup[i]);
276                 }
277             }
278         }
279                 
280                 ValidCalculators validCalculator;
281                 for (int i=0; i<Estimators.size(); i++) {
282                         if (validCalculator.isValidCalculator("sharedrarefaction", Estimators[i]) == true) { 
283                                 if (Estimators[i] == "sharedobserved") { 
284                                         rDisplays.push_back(new RareDisplay(new SharedSobs(), new SharedThreeColumnFile(fileNameRoot+"shared.rarefaction", "")));
285                                         outputNames.push_back(fileNameRoot+"shared.rarefaction"); outputTypes["sharedrarefaction"].push_back(fileNameRoot+"shared.rarefaction");
286                                 }else if (Estimators[i] == "sharednseqs") { 
287                                         rDisplays.push_back(new RareDisplay(new SharedNSeqs(), new SharedThreeColumnFile(fileNameRoot+"shared.r_nseqs", "")));
288                                         outputNames.push_back(fileNameRoot+"shared.r_nseqs"); outputTypes["sharedr_nseqs"].push_back(fileNameRoot+"shared.r_nseqs");
289                                 }
290                         }
291                 }
292                 
293                 //if the users entered no valid calculators don't execute command
294                 if (rDisplays.size() == 0) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
295                 
296                 if (m->control_pressed) { 
297                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
298                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
299                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
300                         return 0;
301                 }
302         
303         
304                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
305         string lastLabel = subset[0]->getLabel();
306                 set<string> processedLabels;
307                 set<string> userLabels = labels;
308         
309                 //as long as you are not at the end of the file or done wih the lines you want
310                 while((subset[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
311                         if (m->control_pressed) { 
312                                 for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
313                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
314                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
315                                 return 0;
316                         }
317                         
318                         if(allLines == 1 || labels.count(subset[0]->getLabel()) == 1){
319                                 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
320                                 rCurve = new Rarefact(subset, rDisplays);
321                                 rCurve->getSharedCurve(freq, nIters);
322                                 delete rCurve;
323                 
324                                 processedLabels.insert(subset[0]->getLabel());
325                                 userLabels.erase(subset[0]->getLabel());
326                         }
327                         
328                         if ((m->anyLabelsToProcess(subset[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
329                 string saveLabel = subset[0]->getLabel();
330                 
331                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
332                 lookup = input.getSharedRAbundVectors(lastLabel);
333                 
334                 if (thisSet == "") {  subset.clear(); subset = lookup;  }
335                 else {//fill subset with this sets groups
336                     subset.clear();
337                     for (int i = 0; i < lookup.size(); i++) {
338                         if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
339                             subset.push_back(lookup[i]);
340                         }
341                     }
342                 }
343
344                 m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
345                 rCurve = new Rarefact(subset, rDisplays);
346                 rCurve->getSharedCurve(freq, nIters);
347                 delete rCurve;
348                 
349                 processedLabels.insert(subset[0]->getLabel());
350                 userLabels.erase(subset[0]->getLabel());
351                 
352                 //restore real lastlabel to save below
353                 subset[0]->setLabel(saveLabel);
354                         }
355             
356                         
357                         lastLabel = subset[0]->getLabel();
358                         
359                         //get next line to process
360                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
361                         lookup = input.getSharedRAbundVectors();
362             
363             if (lookup[0] != NULL) {
364                 if (thisSet == "") {  subset.clear(); subset = lookup;  }
365                 else {//fill subset with this sets groups
366                     subset.clear();
367                     for (int i = 0; i < lookup.size(); i++) {
368                         if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
369                             subset.push_back(lookup[i]);
370                         }
371                     }
372                 }
373             }else {  subset.clear(); subset.push_back(NULL); }
374
375                 }
376                 
377                 if (m->control_pressed) { 
378                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
379                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
380                         return 0;
381                 }
382                 
383                 //output error messages about any remaining user labels
384                 set<string>::iterator it;
385                 bool needToRun = false;
386                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
387                         m->mothurOut("Your file does not include the label " + *it); 
388                         if (processedLabels.count(lastLabel) != 1) {
389                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
390                                 needToRun = true;
391                         }else {
392                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
393                         }
394                 }
395                 
396                 if (m->control_pressed) { 
397                         for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }
398                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        }
399                         return 0;
400                 }
401                 
402                 //run last label if you need to
403                 if (needToRun == true)  {
404                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i]; }  } 
405                         lookup = input.getSharedRAbundVectors(lastLabel);
406             
407             if (thisSet == "") {  subset.clear(); subset = lookup;  }
408             else {//fill subset with this sets groups
409                 subset.clear();
410                 for (int i = 0; i < lookup.size(); i++) {
411                     if (m->inUsersGroups(lookup[i]->getGroup(), newGroups)) {
412                         subset.push_back(lookup[i]);
413                     }
414                 }
415             }
416             
417                         m->mothurOut(subset[0]->getLabel() + '\t' + thisSet); m->mothurOutEndLine();
418                         rCurve = new Rarefact(subset, rDisplays);
419                         rCurve->getSharedCurve(freq, nIters);
420                         delete rCurve;
421                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
422                 }
423                 
424                 for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
425
426         
427         return 0;
428     }
429         catch(exception& e) {
430                 m->errorOut(e, "RareFactSharedCommand", "process");
431                 exit(1);
432         }
433 }
434 //**********************************************************************************************************************