]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
added normalize.shared command
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13
14 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 allLines = 1;
19                 labels.clear();
20                 
21                 //allow user to run help
22                 if(option == "help") { help(); abort = true; }
23                 
24                 else {
25                         //valid paramters for this command
26                         string AlignArray[] =  {"groups","label","scale","outputdir","inputdir","norm"};
27                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
28                         
29                         OptionParser parser(option);
30                         map<string,string> parameters = parser.getParameters();
31                         
32                         ValidParameters validParameter;
33                         
34                         //check to make sure all parameters are valid for command
35                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //if the user changes the output directory command factory will send this info to us in the output parameter 
40                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
41                                 outputDir = ""; 
42                                 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
43                         }
44                         
45                         //make sure the user has already run the read.otu command
46                         if ((globaldata->getSharedFile() == "")) {
47                                  m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true; 
48                         }
49
50                         //check for optional parameter and set defaults
51                         // ...at some point should added some additional type checking...
52                         label = validParameter.validFile(parameters, "label", false);                   
53                         if (label == "not found") { label = ""; }
54                         else { 
55                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
56                                 else { allLines = 1;  }
57                         }
58                         
59                         //if the user has not specified any labels use the ones from read.otu
60                         if (label == "") {  
61                                 allLines = globaldata->allLines; 
62                                 labels = globaldata->labels; 
63                         }
64                         
65                         groups = validParameter.validFile(parameters, "groups", false);                 
66                         if (groups == "not found") { groups = ""; pickedGroups = false; }
67                         else { 
68                                 pickedGroups = true;
69                                 m->splitAtDash(groups, Groups);
70                                 globaldata->Groups = Groups;
71                         }
72                         
73                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
74                         if (method != "totalgroup") {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
75                 
76                         string temp = validParameter.validFile(parameters, "norm", false);                              
77                         if (temp == "not found") {  
78                                 norm = 0;  //once you have read, set norm to smallest group number
79                         }else { 
80                                 convert(temp, norm);
81                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
82                         }
83                 }
84
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
88                 exit(1);
89         }
90 }
91
92 //**********************************************************************************************************************
93
94 void NormalizeSharedCommand::help(){
95         try {
96                 m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group or shared file.\n");
97                 m->mothurOut("The normalize.shared command parameters are groups, method, norm and label.  No parameters are required.\n");
98                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
99                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
100                 m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!\n");
101                 m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n");
102                 m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n");
103                 m->mothurOut("Example normalize.shared(groups=A-B-C, scale=averagegroup).\n");
104                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
105                 m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n");
106                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
107
108         }
109         catch(exception& e) {
110                 m->errorOut(e, "NormalizeSharedCommand", "help");
111                 exit(1);
112         }
113 }
114
115 //**********************************************************************************************************************
116
117 NormalizeSharedCommand::~NormalizeSharedCommand(){}
118
119 //**********************************************************************************************************************
120
121 int NormalizeSharedCommand::execute(){
122         try {
123         
124                 if (abort == true) { return 0; }
125                 
126                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared";
127                 ofstream out;
128                 m->openOutputFile(outputFileName, out);
129                 
130                 read = new ReadOTUFile(globaldata->inputFileName);      
131                 read->read(&*globaldata); 
132                 input = globaldata->ginput;
133                 lookup = input->getSharedRAbundVectors();
134                 string lastLabel = lookup[0]->getLabel();
135                 
136                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
137                 set<string> processedLabels;
138                 set<string> userLabels = labels;
139                 
140                 //set norm to smallest group number
141                 if (norm == 0) { 
142                         norm = lookup[0]->getNumSeqs();
143                         for (int i = 1; i < lookup.size(); i++) {
144                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
145                         }  
146                 }
147                 
148                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
149
150                 //as long as you are not at the end of the file or done wih the lines you want
151                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
152                         
153                         if (m->control_pressed) {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str()); return 0; }
154         
155                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
156
157                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
158                                 normalize(lookup, out);
159                                 
160                                 processedLabels.insert(lookup[0]->getLabel());
161                                 userLabels.erase(lookup[0]->getLabel());
162                         }
163                         
164                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
165                                 string saveLabel = lookup[0]->getLabel();
166                         
167                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
168                                 lookup = input->getSharedRAbundVectors(lastLabel);
169                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
170                                 
171                                 normalize(lookup, out);
172                                 
173                                 processedLabels.insert(lookup[0]->getLabel());
174                                 userLabels.erase(lookup[0]->getLabel());
175                                 
176                                 //restore real lastlabel to save below
177                                 lookup[0]->setLabel(saveLabel);
178                         }
179                         
180                         lastLabel = lookup[0]->getLabel();
181                         //prevent memory leak
182                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
183                         
184                         if (m->control_pressed) {  globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str()); return 0; }
185
186                         //get next line to process
187                         lookup = input->getSharedRAbundVectors();                               
188                 }
189                 
190                 if (m->control_pressed) { globaldata->Groups.clear(); delete read;  out.close(); remove(outputFileName.c_str());  return 0; }
191
192                 //output error messages about any remaining user labels
193                 set<string>::iterator it;
194                 bool needToRun = false;
195                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
196                         m->mothurOut("Your file does not include the label " + *it); 
197                         if (processedLabels.count(lastLabel) != 1) {
198                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
199                                 needToRun = true;
200                         }else {
201                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
202                         }
203                 }
204         
205                 //run last label if you need to
206                 if (needToRun == true)  {
207                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
208                         lookup = input->getSharedRAbundVectors(lastLabel);
209                         
210                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
211                         
212                         normalize(lookup, out);
213                         
214                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
215                 }
216         
217                 //reset groups parameter
218                 globaldata->Groups.clear();  
219                 delete input; globaldata->ginput = NULL;
220                 delete read;
221                 out.close();
222                 
223                 if (m->control_pressed) { remove(outputFileName.c_str()); return 0;}
224                 
225                 m->mothurOutEndLine();
226                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
227                 m->mothurOut(outputFileName); m->mothurOutEndLine();
228                 m->mothurOutEndLine();
229                 
230                 return 0;
231         }
232         catch(exception& e) {
233                 m->errorOut(e, "NormalizeSharedCommand", "execute");
234                 exit(1);
235         }
236 }
237 //**********************************************************************************************************************
238
239 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
240         try {
241                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
242
243                 
244                  for (int i = 0; i < thisLookUp.size(); i++) {
245                         //cout << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
246                         
247                         for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
248                         
249                                 if (m->control_pressed) { return 0; }
250                         
251                                 int abund = thisLookUp[i]->getAbundance(j);
252                                 
253                                 float relabund = 0.0;
254                                 int finalNorm = 0;
255                                 
256                                 if (method == "totalgroup") { 
257                                         relabund = abund / (float) thisLookUp[i]->getNumSeqs();
258                                         float newNorm = relabund * norm;
259                                         //round to nearest int
260                                         finalNorm = (int) floor((newNorm + 0.5));
261                                         
262                                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
263                                 
264                                 //cout << finalNorm << '\t';
265                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
266                         }
267                         //cout << endl;
268                  }
269                 
270                  eliminateZeroOTUS(thisLookUp);
271                  
272                   for (int i = 0; i < thisLookUp.size(); i++) {
273                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
274                         thisLookUp[i]->print(out);
275                  }
276                 
277                  return 0;
278         }
279         catch(exception& e) {
280                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
281                 exit(1);
282         }
283 }
284 //**********************************************************************************************************************
285 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
286         try {
287                 
288                 vector<SharedRAbundVector*> newLookup;
289                 for (int i = 0; i < thislookup.size(); i++) {
290                         SharedRAbundVector* temp = new SharedRAbundVector();
291                         temp->setLabel(thislookup[i]->getLabel());
292                         temp->setGroup(thislookup[i]->getGroup());
293                         newLookup.push_back(temp);
294                 }
295                 
296                 //for each bin
297                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
298                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
299                 
300                         //look at each sharedRabund and make sure they are not all zero
301                         bool allZero = true;
302                         for (int j = 0; j < thislookup.size(); j++) {
303                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
304                         }
305                         
306                         //if they are not all zero add this bin
307                         if (!allZero) {
308                                 for (int j = 0; j < thislookup.size(); j++) {
309                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
310                                 }
311                         }
312                 }
313
314                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
315
316                 thislookup = newLookup;
317                 
318                 return 0;
319  
320         }
321         catch(exception& e) {
322                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
323                 exit(1);
324         }
325 }
326 //**********************************************************************************************************************