]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::getValidParameters(){    
14         try {
15                 string Array[] =  {"groups","label","scale","outputdir","inputdir","norm"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "NormalizeSharedCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 NormalizeSharedCommand::NormalizeSharedCommand(){       
26         try {
27                 //initialize outputTypes
28                 vector<string> tempOutNames;
29                 outputTypes["shared"] = tempOutNames;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 vector<string> NormalizeSharedCommand::getRequiredParameters(){ 
38         try {
39                 vector<string> myArray;
40                 return myArray;
41         }
42         catch(exception& e) {
43                 m->errorOut(e, "NormalizeSharedCommand", "getRequiredParameters");
44                 exit(1);
45         }
46 }
47 //**********************************************************************************************************************
48 vector<string> NormalizeSharedCommand::getRequiredFiles(){      
49         try {
50                 string Array[] =  {"shared"};
51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "NormalizeSharedCommand", "getRequiredFiles");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60
61 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
62         try {
63                 globaldata = GlobalData::getInstance();
64                 abort = false;
65                 allLines = 1;
66                 labels.clear();
67                 
68                 //allow user to run help
69                 if(option == "help") { help(); abort = true; }
70                 
71                 else {
72                         //valid paramters for this command
73                         string AlignArray[] =  {"groups","label","scale","outputdir","inputdir","norm"};
74                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
75                         
76                         OptionParser parser(option);
77                         map<string,string> parameters = parser.getParameters();
78                         
79                         ValidParameters validParameter;
80                         
81                         //check to make sure all parameters are valid for command
82                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
83                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
84                         }
85                         
86                         //initialize outputTypes
87                         vector<string> tempOutNames;
88                         outputTypes["shared"] = tempOutNames;
89                         
90                         //if the user changes the output directory command factory will send this info to us in the output parameter 
91                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
92                                 outputDir = ""; 
93                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
94                         }
95                         
96                         //make sure the user has already run the read.otu command
97                         if ((globaldata->getSharedFile() == "")) {
98                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true; 
99                         }
100
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         label = validParameter.validFile(parameters, "label", false);                   
104                         if (label == "not found") { label = ""; }
105                         else { 
106                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
107                                 else { allLines = 1;  }
108                         }
109                         
110                         //if the user has not specified any labels use the ones from read.otu
111                         if (label == "") {  
112                                 allLines = globaldata->allLines; 
113                                 labels = globaldata->labels; 
114                         }
115                         
116                         groups = validParameter.validFile(parameters, "groups", false);                 
117                         if (groups == "not found") { groups = ""; pickedGroups = false; }
118                         else { 
119                                 pickedGroups = true;
120                                 m->splitAtDash(groups, Groups);
121                                 globaldata->Groups = Groups;
122                         }
123                         
124                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
125                         if (method != "totalgroup") {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
126                 
127                         string temp = validParameter.validFile(parameters, "norm", false);                              
128                         if (temp == "not found") {  
129                                 norm = 0;  //once you have read, set norm to smallest group number
130                         }else { 
131                                 convert(temp, norm);
132                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
133                         }
134                 }
135
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
139                 exit(1);
140         }
141 }
142
143 //**********************************************************************************************************************
144
145 void NormalizeSharedCommand::help(){
146         try {
147                 m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group or shared file.\n");
148                 m->mothurOut("The normalize.shared command parameters are groups, method, norm and label.  No parameters are required.\n");
149                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
150                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
151                 m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!\n");
152                 m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n");
153                 m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n");
154                 m->mothurOut("Example normalize.shared(groups=A-B-C, scale=averagegroup).\n");
155                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
156                 m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n");
157                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
158
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "NormalizeSharedCommand", "help");
162                 exit(1);
163         }
164 }
165
166 //**********************************************************************************************************************
167
168 NormalizeSharedCommand::~NormalizeSharedCommand(){}
169
170 //**********************************************************************************************************************
171
172 int NormalizeSharedCommand::execute(){
173         try {
174         
175                 if (abort == true) { return 0; }
176                 
177                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared";
178                 ofstream out;
179                 m->openOutputFile(outputFileName, out);
180                 
181                 read = new ReadOTUFile(globaldata->inputFileName);      
182                 read->read(&*globaldata); 
183                 input = globaldata->ginput;
184                 lookup = input->getSharedRAbundVectors();
185                 string lastLabel = lookup[0]->getLabel();
186                 
187                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
188                 set<string> processedLabels;
189                 set<string> userLabels = labels;
190                 
191                 //set norm to smallest group number
192                 if (norm == 0) { 
193                         norm = lookup[0]->getNumSeqs();
194                         for (int i = 1; i < lookup.size(); i++) {
195                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
196                         }  
197                 }
198                 
199                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
200
201                 //as long as you are not at the end of the file or done wih the lines you want
202                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
203                         
204                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str()); return 0; }
205         
206                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
207
208                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
209                                 normalize(lookup, out);
210                                 
211                                 processedLabels.insert(lookup[0]->getLabel());
212                                 userLabels.erase(lookup[0]->getLabel());
213                         }
214                         
215                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
216                                 string saveLabel = lookup[0]->getLabel();
217                         
218                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
219                                 lookup = input->getSharedRAbundVectors(lastLabel);
220                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
221                                 
222                                 normalize(lookup, out);
223                                 
224                                 processedLabels.insert(lookup[0]->getLabel());
225                                 userLabels.erase(lookup[0]->getLabel());
226                                 
227                                 //restore real lastlabel to save below
228                                 lookup[0]->setLabel(saveLabel);
229                         }
230                         
231                         lastLabel = lookup[0]->getLabel();
232                         //prevent memory leak
233                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
234                         
235                         if (m->control_pressed) {  outputTypes.clear(); globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str()); return 0; }
236
237                         //get next line to process
238                         lookup = input->getSharedRAbundVectors();                               
239                 }
240                 
241                 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str());  return 0; }
242
243                 //output error messages about any remaining user labels
244                 set<string>::iterator it;
245                 bool needToRun = false;
246                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
247                         m->mothurOut("Your file does not include the label " + *it); 
248                         if (processedLabels.count(lastLabel) != 1) {
249                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
250                                 needToRun = true;
251                         }else {
252                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
253                         }
254                 }
255         
256                 //run last label if you need to
257                 if (needToRun == true)  {
258                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
259                         lookup = input->getSharedRAbundVectors(lastLabel);
260                         
261                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
262                         
263                         normalize(lookup, out);
264                         
265                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
266                 }
267         
268                 //reset groups parameter
269                 globaldata->Groups.clear();  
270                 delete input; globaldata->ginput = NULL;
271                 delete read;
272                 out.close();
273                 
274                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
275                 
276                 m->mothurOutEndLine();
277                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
278                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
279                 m->mothurOutEndLine();
280                 
281                 return 0;
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "NormalizeSharedCommand", "execute");
285                 exit(1);
286         }
287 }
288 //**********************************************************************************************************************
289
290 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
291         try {
292                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
293
294                 
295                  for (int i = 0; i < thisLookUp.size(); i++) {
296                         //cout << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
297                         
298                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
299                         
300                                 if (m->control_pressed) { return 0; }
301                         
302                                 int abund = thisLookUp[i]->getAbundance(j);
303                                 
304                                 float relabund = 0.0;
305                                 int finalNorm = 0;
306                                 
307                                 if (method == "totalgroup") { 
308                                         relabund = abund / (float) thisLookUp[i]->getNumSeqs();
309                                         float newNorm = relabund * norm;
310                                         //round to nearest int
311                                         finalNorm = (int) floor((newNorm + 0.5));
312                                         
313                                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
314                                 
315                                 //cout << finalNorm << '\t';
316                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
317                         }
318                         //cout << endl;
319                  }
320                 
321                  eliminateZeroOTUS(thisLookUp);
322                  
323                   for (int i = 0; i < thisLookUp.size(); i++) {
324                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
325                         thisLookUp[i]->print(out);
326                  }
327                 
328                  return 0;
329         }
330         catch(exception& e) {
331                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
332                 exit(1);
333         }
334 }
335 //**********************************************************************************************************************
336 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
337         try {
338                 
339                 vector<SharedRAbundVector*> newLookup;
340                 for (int i = 0; i < thislookup.size(); i++) {
341                         SharedRAbundVector* temp = new SharedRAbundVector();
342                         temp->setLabel(thislookup[i]->getLabel());
343                         temp->setGroup(thislookup[i]->getGroup());
344                         newLookup.push_back(temp);
345                 }
346                 
347                 //for each bin
348                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
349                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
350                 
351                         //look at each sharedRabund and make sure they are not all zero
352                         bool allZero = true;
353                         for (int j = 0; j < thislookup.size(); j++) {
354                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
355                         }
356                         
357                         //if they are not all zero add this bin
358                         if (!allZero) {
359                                 for (int j = 0; j < thislookup.size(); j++) {
360                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
361                                 }
362                         }
363                 }
364
365                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
366
367                 thislookup = newLookup;
368                 
369                 return 0;
370  
371         }
372         catch(exception& e) {
373                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
374                 exit(1);
375         }
376 }
377 //**********************************************************************************************************************