]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
added headers to shared and relabund files
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 NormalizeSharedCommand::NormalizeSharedCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["shared"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72
73 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
74         try {
75                 abort = false; calledHelp = false;   
76                 allLines = 1;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81                 
82                 else {
83                         vector<string> myArray = setParameters();
84                         
85                         OptionParser parser(option);
86                         map<string,string> parameters = parser.getParameters();
87                         map<string,string>::iterator it;
88                         
89                         ValidParameters validParameter;
90                         
91                         //check to make sure all parameters are valid for command
92                         for (it = parameters.begin(); it != parameters.end(); it++) { 
93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
94                         }
95                         
96                         //initialize outputTypes
97                         vector<string> tempOutNames;
98                         outputTypes["shared"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("shared");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
111                                 }
112                                 
113                                 it = parameters.find("relabund");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
119                                 }
120                         }
121                         
122                         sharedfile = validParameter.validFile(parameters, "shared", true);
123                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
124                         else if (sharedfile == "not found") { sharedfile = ""; }
125                         else {  format = "sharedfile"; inputfile = sharedfile; }
126                         
127                         relabundfile = validParameter.validFile(parameters, "relabund", true);
128                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
129                         else if (relabundfile == "not found") { relabundfile = ""; }
130                         else {  format = "relabund"; inputfile = relabundfile; }
131                         
132                         
133                         if ((sharedfile == "") && (relabundfile == "")) { 
134                                 //is there are current file available for any of these?
135                                 //give priority to shared, then list, then rabund, then sabund
136                                 //if there is a current shared file, use it
137                                 sharedfile = m->getSharedFile(); 
138                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139                                 else { 
140                                         relabundfile = m->getRelAbundFile(); 
141                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
142                                         else { 
143                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
144                                                 abort = true;
145                                         }
146                                 }
147                         }
148                         
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
152                         
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; pickedGroups = false; }
166                         else { 
167                                 pickedGroups = true;
168                                 m->splitAtDash(groups, Groups);
169                                 m->Groups = Groups;
170                         }
171                         
172                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
173                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
174                 
175                         string temp = validParameter.validFile(parameters, "norm", false);                              
176                         if (temp == "not found") {  
177                                 norm = 0;  //once you have read, set norm to smallest group number
178                         }else { 
179                                 convert(temp, norm);
180                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
181                         }
182                         
183                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
184                         makeRelabund = m->isTrue(temp);
185                 }
186
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
190                 exit(1);
191         }
192 }
193 //**********************************************************************************************************************
194
195 int NormalizeSharedCommand::execute(){
196         try {
197         
198                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
199                 
200                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared";
201                 ofstream out;
202                 m->openOutputFile(outputFileName, out);
203                 
204                 input = new InputData(inputfile, format);
205                 
206                 //you are reading a sharedfile and you do not want to make relabund
207                 if ((format == "sharedfile") && (!makeRelabund)) {
208                         lookup = input->getSharedRAbundVectors();
209                         string lastLabel = lookup[0]->getLabel();
210                         
211                         //look for groups whose numseqs is below norm and remove them, warning the user
212                         if (norm != 0) { 
213                                 m->Groups.clear();
214                                 vector<SharedRAbundVector*> temp;
215                                 for (int i = 0; i < lookup.size(); i++) {
216                                         if (lookup[i]->getNumSeqs() < norm) { 
217                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
218                                                 delete lookup[i];
219                                         }else { 
220                                                 m->Groups.push_back(lookup[i]->getGroup()); 
221                                                 temp.push_back(lookup[i]);
222                                         }
223                                 } 
224                                 lookup = temp;
225                         }
226                         
227                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
228                         set<string> processedLabels;
229                         set<string> userLabels = labels;
230                         
231                         if (method == "totalgroup") {
232                                 //set norm to smallest group number
233                                 if (norm == 0) { 
234                                         norm = lookup[0]->getNumSeqs();
235                                         for (int i = 1; i < lookup.size(); i++) {
236                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
237                                         }  
238                                 }
239                                 
240                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
241                         }
242                         
243                         
244                         //as long as you are not at the end of the file or done wih the lines you want
245                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
246                                 
247                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
248                                 
249                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
250                                         
251                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
252                                         if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
253                                         normalize(lookup, out);
254                                         
255                                         processedLabels.insert(lookup[0]->getLabel());
256                                         userLabels.erase(lookup[0]->getLabel());
257                                 }
258                                 
259                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
260                                         string saveLabel = lookup[0]->getLabel();
261                                         
262                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
263                                         lookup = input->getSharedRAbundVectors(lastLabel);
264                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
265                                         if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
266                                         normalize(lookup, out);
267                                         
268                                         processedLabels.insert(lookup[0]->getLabel());
269                                         userLabels.erase(lookup[0]->getLabel());
270                                         
271                                         //restore real lastlabel to save below
272                                         lookup[0]->setLabel(saveLabel);
273                                 }
274                                 
275                                 lastLabel = lookup[0]->getLabel();
276                                 //prevent memory leak
277                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
278                                 
279                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
280                                 
281                                 //get next line to process
282                                 lookup = input->getSharedRAbundVectors();                               
283                         }
284                         
285                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
286                         
287                         //output error messages about any remaining user labels
288                         set<string>::iterator it;
289                         bool needToRun = false;
290                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
291                                 m->mothurOut("Your file does not include the label " + *it); 
292                                 if (processedLabels.count(lastLabel) != 1) {
293                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
294                                         needToRun = true;
295                                 }else {
296                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
297                                 }
298                         }
299                         
300                         //run last label if you need to
301                         if (needToRun == true)  {
302                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
303                                 lookup = input->getSharedRAbundVectors(lastLabel);
304                                 
305                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
306                                 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
307                                 normalize(lookup, out);
308                                 
309                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
310                         }
311                         
312                 }else{ //relabund values
313                         lookupFloat = input->getSharedRAbundFloatVectors();
314                         string lastLabel = lookupFloat[0]->getLabel();
315                         
316                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
317                         set<string> processedLabels;
318                         set<string> userLabels = labels;
319                         
320                         //look for groups whose numseqs is below norm and remove them, warning the user
321                         if (norm != 0) { 
322                                 m->Groups.clear();
323                                 vector<SharedRAbundFloatVector*> temp;
324                                 for (int i = 0; i < lookupFloat.size(); i++) {
325                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
326                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
327                                                 delete lookupFloat[i];
328                                         }else { 
329                                                 m->Groups.push_back(lookupFloat[i]->getGroup()); 
330                                                 temp.push_back(lookupFloat[i]);
331                                         }
332                                 } 
333                                 lookupFloat = temp;
334                         }
335                         
336                         //set norm to smallest group number
337                         if (method == "totalgroup") {
338                                 if (norm == 0) { 
339                                         norm = lookupFloat[0]->getNumSeqs();
340                                         for (int i = 1; i < lookupFloat.size(); i++) {
341                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
342                                         }  
343                                 }
344                                 
345                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
346                         }
347                         
348                         //as long as you are not at the end of the file or done wih the lines you want
349                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
350                                 
351                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
352                                 
353                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
354                                         
355                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
356                                         
357                                         if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
358                                         normalize(lookupFloat, out);
359                                         
360                                         processedLabels.insert(lookupFloat[0]->getLabel());
361                                         userLabels.erase(lookupFloat[0]->getLabel());
362                                 }
363                                 
364                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
365                                         string saveLabel = lookupFloat[0]->getLabel();
366                                         
367                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
368                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
369                                         
370                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
371                                         if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
372                                         normalize(lookupFloat, out);
373                                         
374                                         processedLabels.insert(lookupFloat[0]->getLabel());
375                                         userLabels.erase(lookupFloat[0]->getLabel());
376                                         
377                                         //restore real lastlabel to save below
378                                         lookupFloat[0]->setLabel(saveLabel);
379                                 }
380                                 
381                                 lastLabel = lookupFloat[0]->getLabel();
382                                 //prevent memory leak
383                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
384                                 
385                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
386                                 
387                                 //get next line to process
388                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
389                         }
390                         
391                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
392                         
393                         //output error messages about any remaining user labels
394                         set<string>::iterator it;
395                         bool needToRun = false;
396                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
397                                 m->mothurOut("Your file does not include the label " + *it); 
398                                 if (processedLabels.count(lastLabel) != 1) {
399                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
400                                         needToRun = true;
401                                 }else {
402                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
403                                 }
404                         }
405                         
406                         //run last label if you need to
407                         if (needToRun == true)  {
408                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
409                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
410                                 
411                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
412                                 
413                                 if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
414                                 normalize(lookupFloat, out);
415                                 
416                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
417                         }
418                         
419                 }
420                 //reset groups parameter
421                 m->Groups.clear();  
422                 delete input;
423                 out.close();
424                 
425                 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
426                 
427                 m->mothurOutEndLine();
428                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
429                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
430                 m->mothurOutEndLine();
431                 
432                 //set shared file as new current sharedfile
433                 string current = "";
434                 itTypes = outputTypes.find("shared");
435                 if (itTypes != outputTypes.end()) {
436                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
437                 }
438                 
439                 return 0;
440         }
441         catch(exception& e) {
442                 m->errorOut(e, "NormalizeSharedCommand", "execute");
443                 exit(1);
444         }
445 }
446 //**********************************************************************************************************************
447
448 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
449         try {
450                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
451                                 
452                 if (method == "totalgroup") { 
453                                         
454                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
455                                                 
456                                         for (int i = 0; i < thisLookUp.size(); i++) {
457                                                         
458                                                 if (m->control_pressed) { return 0; }
459                                                         
460                                                 int abund = thisLookUp[i]->getAbundance(j);
461                                                         
462                                                 float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
463                                                 float newNorm = relabund * norm;
464                                                 
465                                                 //round to nearest int
466                                                 int finalNorm = (int) floor((newNorm + 0.5));
467                                                 
468                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
469                                         }
470                                 }
471                                         
472                 }else if (method == "zscore") {
473                         
474                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
475                                 
476                                 if (m->control_pressed) { return 0; }
477                                 
478                                 //calc mean
479                                 float mean = 0.0;
480                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
481                                 mean /= (float) thisLookUp.size();
482                                         
483                                 //calc standard deviation
484                                 float sumSquared = 0.0;
485                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
486                                 sumSquared /= (float) thisLookUp.size();
487                                 
488                                 float standardDev = sqrt(sumSquared);
489                                         
490                                 for (int i = 0; i < thisLookUp.size(); i++) {
491                                         int finalNorm = 0;
492                                         if (standardDev != 0) { // stop divide by zero
493                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
494                                                 //round to nearest int
495                                                 finalNorm = (int) floor((newNorm + 0.5));
496                                         }
497                                         
498                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
499                                 }
500                         }
501                                                 
502                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
503                                 
504                                 
505                                                 
506                 eliminateZeroOTUS(thisLookUp);
507                  
508                 for (int i = 0; i < thisLookUp.size(); i++) {
509                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
510                         thisLookUp[i]->print(out);
511                 }
512                 
513                 return 0;
514         }
515         catch(exception& e) {
516                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
517                 exit(1);
518         }
519 }
520 //**********************************************************************************************************************
521
522 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp, ofstream& out){
523         try {
524                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
525                 
526                 if (method == "totalgroup") { 
527                         
528                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
529                                 
530                                 for (int i = 0; i < thisLookUp.size(); i++) {
531                                         
532                                         if (m->control_pressed) { return 0; }
533                                         
534                                         float abund = thisLookUp[i]->getAbundance(j);
535                                         
536                                         float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
537                                         float newNorm = relabund * norm;
538                                         
539                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
540                                 }
541                         }
542                         
543                 }else if (method == "zscore") {
544                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
545                                 
546                                 if (m->control_pressed) { return 0; }
547                                 
548                                 //calc mean
549                                 float mean = 0.0;
550                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
551                                 mean /= (float) thisLookUp.size();
552                                 
553                                 //calc standard deviation
554                                 float sumSquared = 0.0;
555                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
556                                 sumSquared /= (float) thisLookUp.size();
557                                 
558                                 float standardDev = sqrt(sumSquared);
559                                 
560                                 for (int i = 0; i < thisLookUp.size(); i++) {
561                                         float newNorm = 0.0;
562                                         if (standardDev != 0) { // stop divide by zero
563                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
564                                         }
565                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
566                                 }
567                         }                       
568                         
569                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
570                 
571                 
572                 eliminateZeroOTUS(thisLookUp);
573                 
574                 for (int i = 0; i < thisLookUp.size(); i++) {
575                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
576                         thisLookUp[i]->print(out);
577                 }
578                 
579                 return 0;
580         }
581         catch(exception& e) {
582                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
583                 exit(1);
584         }
585 }
586 //**********************************************************************************************************************
587 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
588         try {
589                 
590                 vector<SharedRAbundVector*> newLookup;
591                 for (int i = 0; i < thislookup.size(); i++) {
592                         SharedRAbundVector* temp = new SharedRAbundVector();
593                         temp->setLabel(thislookup[i]->getLabel());
594                         temp->setGroup(thislookup[i]->getGroup());
595                         newLookup.push_back(temp);
596                 }
597                 
598                 //for each bin
599                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
600                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
601                 
602                         //look at each sharedRabund and make sure they are not all zero
603                         bool allZero = true;
604                         for (int j = 0; j < thislookup.size(); j++) {
605                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
606                         }
607                         
608                         //if they are not all zero add this bin
609                         if (!allZero) {
610                                 for (int j = 0; j < thislookup.size(); j++) {
611                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
612                                 }
613                         }
614                 }
615
616                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
617
618                 thislookup = newLookup;
619                 
620                 return 0;
621  
622         }
623         catch(exception& e) {
624                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
625                 exit(1);
626         }
627 }
628 //**********************************************************************************************************************
629 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
630         try {
631                 
632                 vector<SharedRAbundFloatVector*> newLookup;
633                 for (int i = 0; i < thislookup.size(); i++) {
634                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
635                         temp->setLabel(thislookup[i]->getLabel());
636                         temp->setGroup(thislookup[i]->getGroup());
637                         newLookup.push_back(temp);
638                 }
639                 
640                 //for each bin
641                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
642                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
643                         
644                         //look at each sharedRabund and make sure they are not all zero
645                         bool allZero = true;
646                         for (int j = 0; j < thislookup.size(); j++) {
647                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
648                         }
649                         
650                         //if they are not all zero add this bin
651                         if (!allZero) {
652                                 for (int j = 0; j < thislookup.size(); j++) {
653                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
654                                 }
655                         }
656                 }
657                 
658                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
659                 
660                 thislookup = newLookup;
661                 
662                 return 0;
663                 
664         }
665         catch(exception& e) {
666                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
667                 exit(1);
668         }
669 }
670
671 //**********************************************************************************************************************