]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
added citation function to commands
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 NormalizeSharedCommand::NormalizeSharedCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["shared"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72
73 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
74         try {
75                 abort = false; calledHelp = false;   
76                 allLines = 1;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81                 
82                 else {
83                         vector<string> myArray = setParameters();
84                         
85                         OptionParser parser(option);
86                         map<string,string> parameters = parser.getParameters();
87                         map<string,string>::iterator it;
88                         
89                         ValidParameters validParameter;
90                         
91                         //check to make sure all parameters are valid for command
92                         for (it = parameters.begin(); it != parameters.end(); it++) { 
93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
94                         }
95                         
96                         //initialize outputTypes
97                         vector<string> tempOutNames;
98                         outputTypes["shared"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("shared");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
111                                 }
112                                 
113                                 it = parameters.find("relabund");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
119                                 }
120                         }
121                         
122                         sharedfile = validParameter.validFile(parameters, "shared", true);
123                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
124                         else if (sharedfile == "not found") { sharedfile = ""; }
125                         else {  format = "sharedfile"; inputfile = sharedfile; }
126                         
127                         relabundfile = validParameter.validFile(parameters, "relabund", true);
128                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
129                         else if (relabundfile == "not found") { relabundfile = ""; }
130                         else {  format = "relabund"; inputfile = relabundfile; }
131                         
132                         
133                         if ((sharedfile == "") && (relabundfile == "")) { 
134                                 //is there are current file available for any of these?
135                                 //give priority to shared, then list, then rabund, then sabund
136                                 //if there is a current shared file, use it
137                                 sharedfile = m->getSharedFile(); 
138                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139                                 else { 
140                                         relabundfile = m->getRelAbundFile(); 
141                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
142                                         else { 
143                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
144                                                 abort = true;
145                                         }
146                                 }
147                         }
148                         
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
152                         
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; pickedGroups = false; }
166                         else { 
167                                 pickedGroups = true;
168                                 m->splitAtDash(groups, Groups);
169                                 m->Groups = Groups;
170                         }
171                         
172                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
173                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
174                 
175                         string temp = validParameter.validFile(parameters, "norm", false);                              
176                         if (temp == "not found") {  
177                                 norm = 0;  //once you have read, set norm to smallest group number
178                         }else { 
179                                 convert(temp, norm);
180                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
181                         }
182                         
183                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
184                         makeRelabund = m->isTrue(temp);
185                 }
186
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
190                 exit(1);
191         }
192 }
193 //**********************************************************************************************************************
194
195 int NormalizeSharedCommand::execute(){
196         try {
197         
198                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
199                 
200                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared";
201                 ofstream out;
202                 m->openOutputFile(outputFileName, out);
203                 
204                 input = new InputData(inputfile, format);
205                 
206                 //you are reading a sharedfile and you do not want to make relabund
207                 if ((format == "sharedfile") && (!makeRelabund)) {
208                         lookup = input->getSharedRAbundVectors();
209                         string lastLabel = lookup[0]->getLabel();
210                         
211                         //look for groups whose numseqs is below norm and remove them, warning the user
212                         if (norm != 0) { 
213                                 m->Groups.clear();
214                                 vector<SharedRAbundVector*> temp;
215                                 for (int i = 0; i < lookup.size(); i++) {
216                                         if (lookup[i]->getNumSeqs() < norm) { 
217                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
218                                                 delete lookup[i];
219                                         }else { 
220                                                 m->Groups.push_back(lookup[i]->getGroup()); 
221                                                 temp.push_back(lookup[i]);
222                                         }
223                                 } 
224                                 lookup = temp;
225                         }
226                         
227                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
228                         set<string> processedLabels;
229                         set<string> userLabels = labels;
230                         
231                         if (method == "totalgroup") {
232                                 //set norm to smallest group number
233                                 if (norm == 0) { 
234                                         norm = lookup[0]->getNumSeqs();
235                                         for (int i = 1; i < lookup.size(); i++) {
236                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
237                                         }  
238                                 }
239                                 
240                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
241                         }
242                         
243                         //as long as you are not at the end of the file or done wih the lines you want
244                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
245                                 
246                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
247                                 
248                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
249                                         
250                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
251                                         normalize(lookup, out);
252                                         
253                                         processedLabels.insert(lookup[0]->getLabel());
254                                         userLabels.erase(lookup[0]->getLabel());
255                                 }
256                                 
257                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
258                                         string saveLabel = lookup[0]->getLabel();
259                                         
260                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
261                                         lookup = input->getSharedRAbundVectors(lastLabel);
262                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263                                         
264                                         normalize(lookup, out);
265                                         
266                                         processedLabels.insert(lookup[0]->getLabel());
267                                         userLabels.erase(lookup[0]->getLabel());
268                                         
269                                         //restore real lastlabel to save below
270                                         lookup[0]->setLabel(saveLabel);
271                                 }
272                                 
273                                 lastLabel = lookup[0]->getLabel();
274                                 //prevent memory leak
275                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
276                                 
277                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
278                                 
279                                 //get next line to process
280                                 lookup = input->getSharedRAbundVectors();                               
281                         }
282                         
283                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
284                         
285                         //output error messages about any remaining user labels
286                         set<string>::iterator it;
287                         bool needToRun = false;
288                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
289                                 m->mothurOut("Your file does not include the label " + *it); 
290                                 if (processedLabels.count(lastLabel) != 1) {
291                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
292                                         needToRun = true;
293                                 }else {
294                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
295                                 }
296                         }
297                         
298                         //run last label if you need to
299                         if (needToRun == true)  {
300                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
301                                 lookup = input->getSharedRAbundVectors(lastLabel);
302                                 
303                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
304                                 
305                                 normalize(lookup, out);
306                                 
307                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
308                         }
309                         
310                 }else{ //relabund values
311                         lookupFloat = input->getSharedRAbundFloatVectors();
312                         string lastLabel = lookupFloat[0]->getLabel();
313                         
314                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
315                         set<string> processedLabels;
316                         set<string> userLabels = labels;
317                         
318                         //look for groups whose numseqs is below norm and remove them, warning the user
319                         if (norm != 0) { 
320                                 m->Groups.clear();
321                                 vector<SharedRAbundFloatVector*> temp;
322                                 for (int i = 0; i < lookupFloat.size(); i++) {
323                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
324                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
325                                                 delete lookupFloat[i];
326                                         }else { 
327                                                 m->Groups.push_back(lookupFloat[i]->getGroup()); 
328                                                 temp.push_back(lookupFloat[i]);
329                                         }
330                                 } 
331                                 lookupFloat = temp;
332                         }
333                         
334                         //set norm to smallest group number
335                         if (method == "totalgroup") {
336                                 if (norm == 0) { 
337                                         norm = lookupFloat[0]->getNumSeqs();
338                                         for (int i = 1; i < lookupFloat.size(); i++) {
339                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
340                                         }  
341                                 }
342                                 
343                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
344                         }
345                         
346                         //as long as you are not at the end of the file or done wih the lines you want
347                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
348                                 
349                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
350                                 
351                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
352                                         
353                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
354                                         normalize(lookupFloat, out);
355                                         
356                                         processedLabels.insert(lookupFloat[0]->getLabel());
357                                         userLabels.erase(lookupFloat[0]->getLabel());
358                                 }
359                                 
360                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
361                                         string saveLabel = lookupFloat[0]->getLabel();
362                                         
363                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
364                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
365                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
366                                         
367                                         normalize(lookupFloat, out);
368                                         
369                                         processedLabels.insert(lookupFloat[0]->getLabel());
370                                         userLabels.erase(lookupFloat[0]->getLabel());
371                                         
372                                         //restore real lastlabel to save below
373                                         lookupFloat[0]->setLabel(saveLabel);
374                                 }
375                                 
376                                 lastLabel = lookupFloat[0]->getLabel();
377                                 //prevent memory leak
378                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
379                                 
380                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
381                                 
382                                 //get next line to process
383                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
384                         }
385                         
386                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
387                         
388                         //output error messages about any remaining user labels
389                         set<string>::iterator it;
390                         bool needToRun = false;
391                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
392                                 m->mothurOut("Your file does not include the label " + *it); 
393                                 if (processedLabels.count(lastLabel) != 1) {
394                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
395                                         needToRun = true;
396                                 }else {
397                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
398                                 }
399                         }
400                         
401                         //run last label if you need to
402                         if (needToRun == true)  {
403                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
404                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
405                                 
406                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
407                                 
408                                 normalize(lookupFloat, out);
409                                 
410                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
411                         }
412                         
413                 }
414                 //reset groups parameter
415                 m->Groups.clear();  
416                 delete input;
417                 out.close();
418                 
419                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
420                 
421                 m->mothurOutEndLine();
422                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
423                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
424                 m->mothurOutEndLine();
425                 
426                 //set shared file as new current sharedfile
427                 string current = "";
428                 itTypes = outputTypes.find("shared");
429                 if (itTypes != outputTypes.end()) {
430                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
431                 }
432                 
433                 return 0;
434         }
435         catch(exception& e) {
436                 m->errorOut(e, "NormalizeSharedCommand", "execute");
437                 exit(1);
438         }
439 }
440 //**********************************************************************************************************************
441
442 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
443         try {
444                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
445                                 
446                 if (method == "totalgroup") { 
447                                         
448                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
449                                                 
450                                         for (int i = 0; i < thisLookUp.size(); i++) {
451                                                         
452                                                 if (m->control_pressed) { return 0; }
453                                                         
454                                                 int abund = thisLookUp[i]->getAbundance(j);
455                                                         
456                                                 float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
457                                                 float newNorm = relabund * norm;
458                                                 
459                                                 //round to nearest int
460                                                 int finalNorm = (int) floor((newNorm + 0.5));
461                                                 
462                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
463                                         }
464                                 }
465                                         
466                 }else if (method == "zscore") {
467                         
468                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
469                                 
470                                 if (m->control_pressed) { return 0; }
471                                 
472                                 //calc mean
473                                 float mean = 0.0;
474                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
475                                 mean /= (float) thisLookUp.size();
476                                         
477                                 //calc standard deviation
478                                 float sumSquared = 0.0;
479                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
480                                 sumSquared /= (float) thisLookUp.size();
481                                 
482                                 float standardDev = sqrt(sumSquared);
483                                         
484                                 for (int i = 0; i < thisLookUp.size(); i++) {
485                                         int finalNorm = 0;
486                                         if (standardDev != 0) { // stop divide by zero
487                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
488                                                 //round to nearest int
489                                                 finalNorm = (int) floor((newNorm + 0.5));
490                                         }
491                                         
492                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
493                                 }
494                         }
495                                                 
496                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
497                                 
498                                 
499                                                 
500                 eliminateZeroOTUS(thisLookUp);
501                  
502                 for (int i = 0; i < thisLookUp.size(); i++) {
503                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
504                         thisLookUp[i]->print(out);
505                 }
506                 
507                 return 0;
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
511                 exit(1);
512         }
513 }
514 //**********************************************************************************************************************
515
516 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp, ofstream& out){
517         try {
518                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
519                 
520                 if (method == "totalgroup") { 
521                         
522                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
523                                 
524                                 for (int i = 0; i < thisLookUp.size(); i++) {
525                                         
526                                         if (m->control_pressed) { return 0; }
527                                         
528                                         float abund = thisLookUp[i]->getAbundance(j);
529                                         
530                                         float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
531                                         float newNorm = relabund * norm;
532                                         
533                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
534                                 }
535                         }
536                         
537                 }else if (method == "zscore") {
538                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
539                                 
540                                 if (m->control_pressed) { return 0; }
541                                 
542                                 //calc mean
543                                 float mean = 0.0;
544                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
545                                 mean /= (float) thisLookUp.size();
546                                 
547                                 //calc standard deviation
548                                 float sumSquared = 0.0;
549                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
550                                 sumSquared /= (float) thisLookUp.size();
551                                 
552                                 float standardDev = sqrt(sumSquared);
553                                 
554                                 for (int i = 0; i < thisLookUp.size(); i++) {
555                                         float newNorm = 0.0;
556                                         if (standardDev != 0) { // stop divide by zero
557                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
558                                         }
559                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
560                                 }
561                         }                       
562                         
563                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
564                 
565                 
566                 eliminateZeroOTUS(thisLookUp);
567                 
568                 for (int i = 0; i < thisLookUp.size(); i++) {
569                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
570                         thisLookUp[i]->print(out);
571                 }
572                 
573                 return 0;
574         }
575         catch(exception& e) {
576                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
577                 exit(1);
578         }
579 }
580 //**********************************************************************************************************************
581 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
582         try {
583                 
584                 vector<SharedRAbundVector*> newLookup;
585                 for (int i = 0; i < thislookup.size(); i++) {
586                         SharedRAbundVector* temp = new SharedRAbundVector();
587                         temp->setLabel(thislookup[i]->getLabel());
588                         temp->setGroup(thislookup[i]->getGroup());
589                         newLookup.push_back(temp);
590                 }
591                 
592                 //for each bin
593                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
594                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
595                 
596                         //look at each sharedRabund and make sure they are not all zero
597                         bool allZero = true;
598                         for (int j = 0; j < thislookup.size(); j++) {
599                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
600                         }
601                         
602                         //if they are not all zero add this bin
603                         if (!allZero) {
604                                 for (int j = 0; j < thislookup.size(); j++) {
605                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
606                                 }
607                         }
608                 }
609
610                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
611
612                 thislookup = newLookup;
613                 
614                 return 0;
615  
616         }
617         catch(exception& e) {
618                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
619                 exit(1);
620         }
621 }
622 //**********************************************************************************************************************
623 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
624         try {
625                 
626                 vector<SharedRAbundFloatVector*> newLookup;
627                 for (int i = 0; i < thislookup.size(); i++) {
628                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
629                         temp->setLabel(thislookup[i]->getLabel());
630                         temp->setGroup(thislookup[i]->getGroup());
631                         newLookup.push_back(temp);
632                 }
633                 
634                 //for each bin
635                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
636                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
637                         
638                         //look at each sharedRabund and make sure they are not all zero
639                         bool allZero = true;
640                         for (int j = 0; j < thislookup.size(); j++) {
641                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
642                         }
643                         
644                         //if they are not all zero add this bin
645                         if (!allZero) {
646                                 for (int j = 0; j < thislookup.size(); j++) {
647                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
648                                 }
649                         }
650                 }
651                 
652                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
653                 
654                 thislookup = newLookup;
655                 
656                 return 0;
657                 
658         }
659         catch(exception& e) {
660                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
661                 exit(1);
662         }
663 }
664
665 //**********************************************************************************************************************