]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 NormalizeSharedCommand::NormalizeSharedCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["shared"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72
73 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
74         try {
75                 abort = false; calledHelp = false;   
76                 allLines = 1;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 
81                 else {
82                         vector<string> myArray = setParameters();
83                         
84                         OptionParser parser(option);
85                         map<string,string> parameters = parser.getParameters();
86                         map<string,string>::iterator it;
87                         
88                         ValidParameters validParameter;
89                         
90                         //check to make sure all parameters are valid for command
91                         for (it = parameters.begin(); it != parameters.end(); it++) { 
92                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
93                         }
94                         
95                         //initialize outputTypes
96                         vector<string> tempOutNames;
97                         outputTypes["shared"] = tempOutNames;
98                         
99                         //if the user changes the input directory command factory will send this info to us in the output parameter 
100                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
101                         if (inputDir == "not found"){   inputDir = "";          }
102                         else {
103                                 string path;
104                                 it = parameters.find("shared");
105                                 //user has given a template file
106                                 if(it != parameters.end()){ 
107                                         path = m->hasPath(it->second);
108                                         //if the user has not given a path then, add inputdir. else leave path alone.
109                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
110                                 }
111                                 
112                                 it = parameters.find("relabund");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
118                                 }
119                         }
120                         
121                         sharedfile = validParameter.validFile(parameters, "shared", true);
122                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
123                         else if (sharedfile == "not found") { sharedfile = ""; }
124                         else {  format = "sharedfile"; inputfile = sharedfile; }
125                         
126                         relabundfile = validParameter.validFile(parameters, "relabund", true);
127                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
128                         else if (relabundfile == "not found") { relabundfile = ""; }
129                         else {  format = "relabund"; inputfile = relabundfile; }
130                         
131                         
132                         if ((sharedfile == "") && (relabundfile == "")) { 
133                                 //is there are current file available for any of these?
134                                 //give priority to shared, then list, then rabund, then sabund
135                                 //if there is a current shared file, use it
136                                 sharedfile = m->getSharedFile(); 
137                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
138                                 else { 
139                                         relabundfile = m->getRelAbundFile(); 
140                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
141                                         else { 
142                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
143                                                 abort = true;
144                                         }
145                                 }
146                         }
147                         
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter 
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
151                         
152                         
153
154                         //check for optional parameter and set defaults
155                         // ...at some point should added some additional type checking...
156                         label = validParameter.validFile(parameters, "label", false);                   
157                         if (label == "not found") { label = ""; }
158                         else { 
159                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
160                                 else { allLines = 1;  }
161                         }
162                         
163                         groups = validParameter.validFile(parameters, "groups", false);                 
164                         if (groups == "not found") { groups = ""; pickedGroups = false; }
165                         else { 
166                                 pickedGroups = true;
167                                 m->splitAtDash(groups, Groups);
168                                 m->Groups = Groups;
169                         }
170                         
171                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
172                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
173                 
174                         string temp = validParameter.validFile(parameters, "norm", false);                              
175                         if (temp == "not found") {  
176                                 norm = 0;  //once you have read, set norm to smallest group number
177                         }else { 
178                                 convert(temp, norm);
179                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
180                         }
181                         
182                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
183                         makeRelabund = m->isTrue(temp);
184                 }
185
186         }
187         catch(exception& e) {
188                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
189                 exit(1);
190         }
191 }
192 //**********************************************************************************************************************
193
194 int NormalizeSharedCommand::execute(){
195         try {
196         
197                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
198                 
199                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared";
200                 ofstream out;
201                 m->openOutputFile(outputFileName, out);
202                 
203                 input = new InputData(inputfile, format);
204                 
205                 //you are reading a sharedfile and you do not want to make relabund
206                 if ((format == "sharedfile") && (!makeRelabund)) {
207                         lookup = input->getSharedRAbundVectors();
208                         string lastLabel = lookup[0]->getLabel();
209                         
210                         //look for groups whose numseqs is below norm and remove them, warning the user
211                         if (norm != 0) { 
212                                 m->Groups.clear();
213                                 vector<SharedRAbundVector*> temp;
214                                 for (int i = 0; i < lookup.size(); i++) {
215                                         if (lookup[i]->getNumSeqs() < norm) { 
216                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
217                                                 delete lookup[i];
218                                         }else { 
219                                                 m->Groups.push_back(lookup[i]->getGroup()); 
220                                                 temp.push_back(lookup[i]);
221                                         }
222                                 } 
223                                 lookup = temp;
224                         }
225                         
226                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
227                         set<string> processedLabels;
228                         set<string> userLabels = labels;
229                         
230                         if (method == "totalgroup") {
231                                 //set norm to smallest group number
232                                 if (norm == 0) { 
233                                         norm = lookup[0]->getNumSeqs();
234                                         for (int i = 1; i < lookup.size(); i++) {
235                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
236                                         }  
237                                 }
238                                 
239                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
240                         }
241                         
242                         //as long as you are not at the end of the file or done wih the lines you want
243                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
244                                 
245                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
246                                 
247                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
248                                         
249                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
250                                         normalize(lookup, out);
251                                         
252                                         processedLabels.insert(lookup[0]->getLabel());
253                                         userLabels.erase(lookup[0]->getLabel());
254                                 }
255                                 
256                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
257                                         string saveLabel = lookup[0]->getLabel();
258                                         
259                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
260                                         lookup = input->getSharedRAbundVectors(lastLabel);
261                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
262                                         
263                                         normalize(lookup, out);
264                                         
265                                         processedLabels.insert(lookup[0]->getLabel());
266                                         userLabels.erase(lookup[0]->getLabel());
267                                         
268                                         //restore real lastlabel to save below
269                                         lookup[0]->setLabel(saveLabel);
270                                 }
271                                 
272                                 lastLabel = lookup[0]->getLabel();
273                                 //prevent memory leak
274                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
275                                 
276                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
277                                 
278                                 //get next line to process
279                                 lookup = input->getSharedRAbundVectors();                               
280                         }
281                         
282                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
283                         
284                         //output error messages about any remaining user labels
285                         set<string>::iterator it;
286                         bool needToRun = false;
287                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
288                                 m->mothurOut("Your file does not include the label " + *it); 
289                                 if (processedLabels.count(lastLabel) != 1) {
290                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
291                                         needToRun = true;
292                                 }else {
293                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
294                                 }
295                         }
296                         
297                         //run last label if you need to
298                         if (needToRun == true)  {
299                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
300                                 lookup = input->getSharedRAbundVectors(lastLabel);
301                                 
302                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
303                                 
304                                 normalize(lookup, out);
305                                 
306                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
307                         }
308                         
309                 }else{ //relabund values
310                         lookupFloat = input->getSharedRAbundFloatVectors();
311                         string lastLabel = lookupFloat[0]->getLabel();
312                         
313                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
314                         set<string> processedLabels;
315                         set<string> userLabels = labels;
316                         
317                         //look for groups whose numseqs is below norm and remove them, warning the user
318                         if (norm != 0) { 
319                                 m->Groups.clear();
320                                 vector<SharedRAbundFloatVector*> temp;
321                                 for (int i = 0; i < lookupFloat.size(); i++) {
322                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
323                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
324                                                 delete lookupFloat[i];
325                                         }else { 
326                                                 m->Groups.push_back(lookupFloat[i]->getGroup()); 
327                                                 temp.push_back(lookupFloat[i]);
328                                         }
329                                 } 
330                                 lookupFloat = temp;
331                         }
332                         
333                         //set norm to smallest group number
334                         if (method == "totalgroup") {
335                                 if (norm == 0) { 
336                                         norm = lookupFloat[0]->getNumSeqs();
337                                         for (int i = 1; i < lookupFloat.size(); i++) {
338                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
339                                         }  
340                                 }
341                                 
342                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
343                         }
344                         
345                         //as long as you are not at the end of the file or done wih the lines you want
346                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
347                                 
348                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
349                                 
350                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
351                                         
352                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
353                                         normalize(lookupFloat, out);
354                                         
355                                         processedLabels.insert(lookupFloat[0]->getLabel());
356                                         userLabels.erase(lookupFloat[0]->getLabel());
357                                 }
358                                 
359                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
360                                         string saveLabel = lookupFloat[0]->getLabel();
361                                         
362                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
363                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
364                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
365                                         
366                                         normalize(lookupFloat, out);
367                                         
368                                         processedLabels.insert(lookupFloat[0]->getLabel());
369                                         userLabels.erase(lookupFloat[0]->getLabel());
370                                         
371                                         //restore real lastlabel to save below
372                                         lookupFloat[0]->setLabel(saveLabel);
373                                 }
374                                 
375                                 lastLabel = lookupFloat[0]->getLabel();
376                                 //prevent memory leak
377                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
378                                 
379                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
380                                 
381                                 //get next line to process
382                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
383                         }
384                         
385                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
386                         
387                         //output error messages about any remaining user labels
388                         set<string>::iterator it;
389                         bool needToRun = false;
390                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
391                                 m->mothurOut("Your file does not include the label " + *it); 
392                                 if (processedLabels.count(lastLabel) != 1) {
393                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
394                                         needToRun = true;
395                                 }else {
396                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
397                                 }
398                         }
399                         
400                         //run last label if you need to
401                         if (needToRun == true)  {
402                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
403                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
404                                 
405                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
406                                 
407                                 normalize(lookupFloat, out);
408                                 
409                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
410                         }
411                         
412                 }
413                 //reset groups parameter
414                 m->Groups.clear();  
415                 delete input;
416                 out.close();
417                 
418                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
419                 
420                 m->mothurOutEndLine();
421                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
422                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
423                 m->mothurOutEndLine();
424                 
425                 //set shared file as new current sharedfile
426                 string current = "";
427                 itTypes = outputTypes.find("shared");
428                 if (itTypes != outputTypes.end()) {
429                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
430                 }
431                 
432                 return 0;
433         }
434         catch(exception& e) {
435                 m->errorOut(e, "NormalizeSharedCommand", "execute");
436                 exit(1);
437         }
438 }
439 //**********************************************************************************************************************
440
441 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
442         try {
443                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
444                                 
445                 if (method == "totalgroup") { 
446                                         
447                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
448                                                 
449                                         for (int i = 0; i < thisLookUp.size(); i++) {
450                                                         
451                                                 if (m->control_pressed) { return 0; }
452                                                         
453                                                 int abund = thisLookUp[i]->getAbundance(j);
454                                                         
455                                                 float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
456                                                 float newNorm = relabund * norm;
457                                                 
458                                                 //round to nearest int
459                                                 int finalNorm = (int) floor((newNorm + 0.5));
460                                                 
461                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
462                                         }
463                                 }
464                                         
465                 }else if (method == "zscore") {
466                         
467                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
468                                 
469                                 if (m->control_pressed) { return 0; }
470                                 
471                                 //calc mean
472                                 float mean = 0.0;
473                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
474                                 mean /= (float) thisLookUp.size();
475                                         
476                                 //calc standard deviation
477                                 float sumSquared = 0.0;
478                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
479                                 sumSquared /= (float) thisLookUp.size();
480                                 
481                                 float standardDev = sqrt(sumSquared);
482                                         
483                                 for (int i = 0; i < thisLookUp.size(); i++) {
484                                         int finalNorm = 0;
485                                         if (standardDev != 0) { // stop divide by zero
486                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
487                                                 //round to nearest int
488                                                 finalNorm = (int) floor((newNorm + 0.5));
489                                         }
490                                         
491                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
492                                 }
493                         }
494                                                 
495                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
496                                 
497                                 
498                                                 
499                 eliminateZeroOTUS(thisLookUp);
500                  
501                 for (int i = 0; i < thisLookUp.size(); i++) {
502                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
503                         thisLookUp[i]->print(out);
504                 }
505                 
506                 return 0;
507         }
508         catch(exception& e) {
509                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
510                 exit(1);
511         }
512 }
513 //**********************************************************************************************************************
514
515 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp, ofstream& out){
516         try {
517                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
518                 
519                 if (method == "totalgroup") { 
520                         
521                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
522                                 
523                                 for (int i = 0; i < thisLookUp.size(); i++) {
524                                         
525                                         if (m->control_pressed) { return 0; }
526                                         
527                                         float abund = thisLookUp[i]->getAbundance(j);
528                                         
529                                         float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
530                                         float newNorm = relabund * norm;
531                                         
532                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
533                                 }
534                         }
535                         
536                 }else if (method == "zscore") {
537                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
538                                 
539                                 if (m->control_pressed) { return 0; }
540                                 
541                                 //calc mean
542                                 float mean = 0.0;
543                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
544                                 mean /= (float) thisLookUp.size();
545                                 
546                                 //calc standard deviation
547                                 float sumSquared = 0.0;
548                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
549                                 sumSquared /= (float) thisLookUp.size();
550                                 
551                                 float standardDev = sqrt(sumSquared);
552                                 
553                                 for (int i = 0; i < thisLookUp.size(); i++) {
554                                         float newNorm = 0.0;
555                                         if (standardDev != 0) { // stop divide by zero
556                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
557                                         }
558                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
559                                 }
560                         }                       
561                         
562                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
563                 
564                 
565                 eliminateZeroOTUS(thisLookUp);
566                 
567                 for (int i = 0; i < thisLookUp.size(); i++) {
568                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
569                         thisLookUp[i]->print(out);
570                 }
571                 
572                 return 0;
573         }
574         catch(exception& e) {
575                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
576                 exit(1);
577         }
578 }
579 //**********************************************************************************************************************
580 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
581         try {
582                 
583                 vector<SharedRAbundVector*> newLookup;
584                 for (int i = 0; i < thislookup.size(); i++) {
585                         SharedRAbundVector* temp = new SharedRAbundVector();
586                         temp->setLabel(thislookup[i]->getLabel());
587                         temp->setGroup(thislookup[i]->getGroup());
588                         newLookup.push_back(temp);
589                 }
590                 
591                 //for each bin
592                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
593                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
594                 
595                         //look at each sharedRabund and make sure they are not all zero
596                         bool allZero = true;
597                         for (int j = 0; j < thislookup.size(); j++) {
598                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
599                         }
600                         
601                         //if they are not all zero add this bin
602                         if (!allZero) {
603                                 for (int j = 0; j < thislookup.size(); j++) {
604                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
605                                 }
606                         }
607                 }
608
609                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
610
611                 thislookup = newLookup;
612                 
613                 return 0;
614  
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
618                 exit(1);
619         }
620 }
621 //**********************************************************************************************************************
622 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
623         try {
624                 
625                 vector<SharedRAbundFloatVector*> newLookup;
626                 for (int i = 0; i < thislookup.size(); i++) {
627                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
628                         temp->setLabel(thislookup[i]->getLabel());
629                         temp->setGroup(thislookup[i]->getGroup());
630                         newLookup.push_back(temp);
631                 }
632                 
633                 //for each bin
634                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
635                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
636                         
637                         //look at each sharedRabund and make sure they are not all zero
638                         bool allZero = true;
639                         for (int j = 0; j < thislookup.size(); j++) {
640                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
641                         }
642                         
643                         //if they are not all zero add this bin
644                         if (!allZero) {
645                                 for (int j = 0; j < thislookup.size(); j++) {
646                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
647                                 }
648                         }
649                 }
650                 
651                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
652                 
653                 thislookup = newLookup;
654                 
655                 return 0;
656                 
657         }
658         catch(exception& e) {
659                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
660                 exit(1);
661         }
662 }
663
664 //**********************************************************************************************************************