]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
fixes while testing
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::getValidParameters(){    
14         try {
15                 string Array[] =  {"groups","label","scale","outputdir","inputdir","norm"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "NormalizeSharedCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 NormalizeSharedCommand::NormalizeSharedCommand(){       
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["shared"] = tempOutNames;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 vector<string> NormalizeSharedCommand::getRequiredParameters(){ 
39         try {
40                 vector<string> myArray;
41                 return myArray;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "NormalizeSharedCommand", "getRequiredParameters");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 vector<string> NormalizeSharedCommand::getRequiredFiles(){      
50         try {
51                 string Array[] =  {"shared"};
52                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "NormalizeSharedCommand", "getRequiredFiles");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61
62 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
63         try {
64                 globaldata = GlobalData::getInstance();
65                 abort = false;
66                 allLines = 1;
67                 labels.clear();
68                 
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string AlignArray[] =  {"groups","label","scale","outputdir","inputdir","norm"};
75                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string,string> parameters = parser.getParameters();
79                         
80                         ValidParameters validParameter;
81                         
82                         //check to make sure all parameters are valid for command
83                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
84                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
85                         }
86                         
87                         //initialize outputTypes
88                         vector<string> tempOutNames;
89                         outputTypes["shared"] = tempOutNames;
90                         
91                         //if the user changes the output directory command factory will send this info to us in the output parameter 
92                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
93                                 outputDir = ""; 
94                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
95                         }
96                         
97                         //make sure the user has already run the read.otu command
98                         if ((globaldata->getSharedFile() == "")) {
99                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true; 
100                         }
101
102                         //check for optional parameter and set defaults
103                         // ...at some point should added some additional type checking...
104                         label = validParameter.validFile(parameters, "label", false);                   
105                         if (label == "not found") { label = ""; }
106                         else { 
107                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
108                                 else { allLines = 1;  }
109                         }
110                         
111                         //if the user has not specified any labels use the ones from read.otu
112                         if (label == "") {  
113                                 allLines = globaldata->allLines; 
114                                 labels = globaldata->labels; 
115                         }
116                         
117                         groups = validParameter.validFile(parameters, "groups", false);                 
118                         if (groups == "not found") { groups = ""; pickedGroups = false; }
119                         else { 
120                                 pickedGroups = true;
121                                 m->splitAtDash(groups, Groups);
122                                 globaldata->Groups = Groups;
123                         }
124                         
125                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
126                         if (method != "totalgroup") {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
127                 
128                         string temp = validParameter.validFile(parameters, "norm", false);                              
129                         if (temp == "not found") {  
130                                 norm = 0;  //once you have read, set norm to smallest group number
131                         }else { 
132                                 convert(temp, norm);
133                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
134                         }
135                 }
136
137         }
138         catch(exception& e) {
139                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
140                 exit(1);
141         }
142 }
143
144 //**********************************************************************************************************************
145
146 void NormalizeSharedCommand::help(){
147         try {
148                 m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group or shared file.\n");
149                 m->mothurOut("The normalize.shared command parameters are groups, method, norm and label.  No parameters are required.\n");
150                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
151                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
152                 m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!\n");
153                 m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n");
154                 m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n");
155                 m->mothurOut("Example normalize.shared(groups=A-B-C, scale=totalgroup).\n");
156                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
157                 m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n");
158                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
159
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "NormalizeSharedCommand", "help");
163                 exit(1);
164         }
165 }
166
167 //**********************************************************************************************************************
168
169 NormalizeSharedCommand::~NormalizeSharedCommand(){}
170
171 //**********************************************************************************************************************
172
173 int NormalizeSharedCommand::execute(){
174         try {
175         
176                 if (abort == true) { return 0; }
177                 
178                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared";
179                 ofstream out;
180                 m->openOutputFile(outputFileName, out);
181                 
182                 read = new ReadOTUFile(globaldata->inputFileName);      
183                 read->read(&*globaldata); 
184                 input = globaldata->ginput;
185                 lookup = input->getSharedRAbundVectors();
186                 string lastLabel = lookup[0]->getLabel();
187                 
188                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
189                 set<string> processedLabels;
190                 set<string> userLabels = labels;
191                 
192                 //set norm to smallest group number
193                 if (norm == 0) { 
194                         norm = lookup[0]->getNumSeqs();
195                         for (int i = 1; i < lookup.size(); i++) {
196                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
197                         }  
198                 }
199                 
200                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
201
202                 //as long as you are not at the end of the file or done wih the lines you want
203                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
204                         
205                         if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str()); return 0; }
206         
207                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
208
209                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
210                                 normalize(lookup, out);
211                                 
212                                 processedLabels.insert(lookup[0]->getLabel());
213                                 userLabels.erase(lookup[0]->getLabel());
214                         }
215                         
216                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
217                                 string saveLabel = lookup[0]->getLabel();
218                         
219                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
220                                 lookup = input->getSharedRAbundVectors(lastLabel);
221                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
222                                 
223                                 normalize(lookup, out);
224                                 
225                                 processedLabels.insert(lookup[0]->getLabel());
226                                 userLabels.erase(lookup[0]->getLabel());
227                                 
228                                 //restore real lastlabel to save below
229                                 lookup[0]->setLabel(saveLabel);
230                         }
231                         
232                         lastLabel = lookup[0]->getLabel();
233                         //prevent memory leak
234                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
235                         
236                         if (m->control_pressed) {  outputTypes.clear(); globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str()); return 0; }
237
238                         //get next line to process
239                         lookup = input->getSharedRAbundVectors();                               
240                 }
241                 
242                 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str());  return 0; }
243
244                 //output error messages about any remaining user labels
245                 set<string>::iterator it;
246                 bool needToRun = false;
247                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
248                         m->mothurOut("Your file does not include the label " + *it); 
249                         if (processedLabels.count(lastLabel) != 1) {
250                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
251                                 needToRun = true;
252                         }else {
253                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
254                         }
255                 }
256         
257                 //run last label if you need to
258                 if (needToRun == true)  {
259                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
260                         lookup = input->getSharedRAbundVectors(lastLabel);
261                         
262                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263                         
264                         normalize(lookup, out);
265                         
266                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
267                 }
268         
269                 //reset groups parameter
270                 globaldata->Groups.clear();  
271                 delete input; globaldata->ginput = NULL;
272                 delete read;
273                 out.close();
274                 
275                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
276                 
277                 m->mothurOutEndLine();
278                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
279                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
280                 m->mothurOutEndLine();
281                 
282                 return 0;
283         }
284         catch(exception& e) {
285                 m->errorOut(e, "NormalizeSharedCommand", "execute");
286                 exit(1);
287         }
288 }
289 //**********************************************************************************************************************
290
291 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
292         try {
293                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
294
295                 
296                  for (int i = 0; i < thisLookUp.size(); i++) {
297                         //cout << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
298                         
299                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
300                         
301                                 if (m->control_pressed) { return 0; }
302                         
303                                 int abund = thisLookUp[i]->getAbundance(j);
304                                 
305                                 float relabund = 0.0;
306                                 int finalNorm = 0;
307                                 
308                                 if (method == "totalgroup") { 
309                                         relabund = abund / (float) thisLookUp[i]->getNumSeqs();
310                                         float newNorm = relabund * norm;
311                                         //round to nearest int
312                                         finalNorm = (int) floor((newNorm + 0.5));
313                                         
314                                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
315                                 
316                                 //cout << finalNorm << '\t';
317                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
318                         }
319                         //cout << endl;
320                  }
321                 
322                  eliminateZeroOTUS(thisLookUp);
323                  
324                   for (int i = 0; i < thisLookUp.size(); i++) {
325                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
326                         thisLookUp[i]->print(out);
327                  }
328                 
329                  return 0;
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
333                 exit(1);
334         }
335 }
336 //**********************************************************************************************************************
337 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
338         try {
339                 
340                 vector<SharedRAbundVector*> newLookup;
341                 for (int i = 0; i < thislookup.size(); i++) {
342                         SharedRAbundVector* temp = new SharedRAbundVector();
343                         temp->setLabel(thislookup[i]->getLabel());
344                         temp->setGroup(thislookup[i]->getGroup());
345                         newLookup.push_back(temp);
346                 }
347                 
348                 //for each bin
349                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
350                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
351                 
352                         //look at each sharedRabund and make sure they are not all zero
353                         bool allZero = true;
354                         for (int j = 0; j < thislookup.size(); j++) {
355                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
356                         }
357                         
358                         //if they are not all zero add this bin
359                         if (!allZero) {
360                                 for (int j = 0; j < thislookup.size(); j++) {
361                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
362                                 }
363                         }
364                 }
365
366                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
367
368                 thislookup = newLookup;
369                 
370                 return 0;
371  
372         }
373         catch(exception& e) {
374                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
375                 exit(1);
376         }
377 }
378 //**********************************************************************************************************************