]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
added shared file type to get.groups and remove.groups
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 NormalizeSharedCommand::NormalizeSharedCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["shared"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72
73 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
74         try {
75                 abort = false; calledHelp = false;   
76                 allLines = 1;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81                 
82                 else {
83                         vector<string> myArray = setParameters();
84                         
85                         OptionParser parser(option);
86                         map<string,string> parameters = parser.getParameters();
87                         map<string,string>::iterator it;
88                         
89                         ValidParameters validParameter;
90                         
91                         //check to make sure all parameters are valid for command
92                         for (it = parameters.begin(); it != parameters.end(); it++) { 
93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
94                         }
95                         
96                         //initialize outputTypes
97                         vector<string> tempOutNames;
98                         outputTypes["shared"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("shared");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
111                                 }
112                                 
113                                 it = parameters.find("relabund");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
119                                 }
120                         }
121                         
122                         sharedfile = validParameter.validFile(parameters, "shared", true);
123                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
124                         else if (sharedfile == "not found") { sharedfile = ""; }
125                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
126                         
127                         relabundfile = validParameter.validFile(parameters, "relabund", true);
128                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
129                         else if (relabundfile == "not found") { relabundfile = ""; }
130                         else {  format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); }
131                         
132                         
133                         if ((sharedfile == "") && (relabundfile == "")) { 
134                                 //is there are current file available for any of these?
135                                 //give priority to shared, then list, then rabund, then sabund
136                                 //if there is a current shared file, use it
137                                 sharedfile = m->getSharedFile(); 
138                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139                                 else { 
140                                         relabundfile = m->getRelAbundFile(); 
141                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
142                                         else { 
143                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
144                                                 abort = true;
145                                         }
146                                 }
147                         }
148                         
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
152                         
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; pickedGroups = false; }
166                         else { 
167                                 pickedGroups = true;
168                                 m->splitAtDash(groups, Groups);
169                                 m->Groups = Groups;
170                         }
171                         
172                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
173                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
174                 
175                         string temp = validParameter.validFile(parameters, "norm", false);                              
176                         if (temp == "not found") {  
177                                 norm = 0;  //once you have read, set norm to smallest group number
178                         }else { 
179                                 convert(temp, norm);
180                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
181                         }
182                         
183                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
184                         makeRelabund = m->isTrue(temp);
185                 }
186
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
190                 exit(1);
191         }
192 }
193 //**********************************************************************************************************************
194
195 int NormalizeSharedCommand::execute(){
196         try {
197         
198                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
199                 
200                 input = new InputData(inputfile, format);
201                 
202                 //you are reading a sharedfile and you do not want to make relabund
203                 if ((format == "sharedfile") && (!makeRelabund)) {
204                         lookup = input->getSharedRAbundVectors();
205                         string lastLabel = lookup[0]->getLabel();
206                         
207                         //look for groups whose numseqs is below norm and remove them, warning the user
208                         if (norm != 0) { 
209                                 m->Groups.clear();
210                                 vector<SharedRAbundVector*> temp;
211                                 for (int i = 0; i < lookup.size(); i++) {
212                                         if (lookup[i]->getNumSeqs() < norm) { 
213                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
214                                                 delete lookup[i];
215                                         }else { 
216                                                 m->Groups.push_back(lookup[i]->getGroup()); 
217                                                 temp.push_back(lookup[i]);
218                                         }
219                                 } 
220                                 lookup = temp;
221                         }
222                         
223                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
224                         set<string> processedLabels;
225                         set<string> userLabels = labels;
226                         
227                         if (method == "totalgroup") {
228                                 //set norm to smallest group number
229                                 if (norm == 0) { 
230                                         norm = lookup[0]->getNumSeqs();
231                                         for (int i = 1; i < lookup.size(); i++) {
232                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
233                                         }  
234                                 }
235                                 
236                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
237                         }
238                         
239                         
240                         //as long as you are not at the end of the file or done wih the lines you want
241                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
242                                 
243                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear();   return 0; }
244                                 
245                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
246                                         
247                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
248                                         normalize(lookup);
249                                         
250                                         processedLabels.insert(lookup[0]->getLabel());
251                                         userLabels.erase(lookup[0]->getLabel());
252                                 }
253                                 
254                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
255                                         string saveLabel = lookup[0]->getLabel();
256                                         
257                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
258                                         lookup = input->getSharedRAbundVectors(lastLabel);
259                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
260                                         
261                                         normalize(lookup);
262                                         
263                                         processedLabels.insert(lookup[0]->getLabel());
264                                         userLabels.erase(lookup[0]->getLabel());
265                                         
266                                         //restore real lastlabel to save below
267                                         lookup[0]->setLabel(saveLabel);
268                                 }
269                                 
270                                 lastLabel = lookup[0]->getLabel();
271                                 //prevent memory leak
272                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
273                                 
274                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->Groups.clear();  return 0; }
275                                 
276                                 //get next line to process
277                                 lookup = input->getSharedRAbundVectors();                               
278                         }
279                         
280                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->Groups.clear();   return 0; }
281                         
282                         //output error messages about any remaining user labels
283                         set<string>::iterator it;
284                         bool needToRun = false;
285                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
286                                 m->mothurOut("Your file does not include the label " + *it); 
287                                 if (processedLabels.count(lastLabel) != 1) {
288                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
289                                         needToRun = true;
290                                 }else {
291                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
292                                 }
293                         }
294                         
295                         //run last label if you need to
296                         if (needToRun == true)  {
297                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
298                                 lookup = input->getSharedRAbundVectors(lastLabel);
299                                 
300                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
301                                 
302                                 normalize(lookup);
303                                 
304                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
305                         }
306                         
307                 }else{ //relabund values
308                         lookupFloat = input->getSharedRAbundFloatVectors();
309                         string lastLabel = lookupFloat[0]->getLabel();
310                         
311                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
312                         set<string> processedLabels;
313                         set<string> userLabels = labels;
314                         
315                         //look for groups whose numseqs is below norm and remove them, warning the user
316                         if (norm != 0) { 
317                                 m->Groups.clear();
318                                 vector<SharedRAbundFloatVector*> temp;
319                                 for (int i = 0; i < lookupFloat.size(); i++) {
320                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
321                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
322                                                 delete lookupFloat[i];
323                                         }else { 
324                                                 m->Groups.push_back(lookupFloat[i]->getGroup()); 
325                                                 temp.push_back(lookupFloat[i]);
326                                         }
327                                 } 
328                                 lookupFloat = temp;
329                         }
330                         
331                         //set norm to smallest group number
332                         if (method == "totalgroup") {
333                                 if (norm == 0) { 
334                                         norm = lookupFloat[0]->getNumSeqs();
335                                         for (int i = 1; i < lookupFloat.size(); i++) {
336                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
337                                         }  
338                                 }
339                                 
340                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
341                         }
342                         
343                         //as long as you are not at the end of the file or done wih the lines you want
344                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
345                                 
346                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->Groups.clear();  return 0; }
347                                 
348                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
349                                         
350                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
351                                         
352                                         normalize(lookupFloat);
353                                         
354                                         processedLabels.insert(lookupFloat[0]->getLabel());
355                                         userLabels.erase(lookupFloat[0]->getLabel());
356                                 }
357                                 
358                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
359                                         string saveLabel = lookupFloat[0]->getLabel();
360                                         
361                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
362                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
363                                         
364                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
365                 
366                                         normalize(lookupFloat);
367                                         
368                                         processedLabels.insert(lookupFloat[0]->getLabel());
369                                         userLabels.erase(lookupFloat[0]->getLabel());
370                                         
371                                         //restore real lastlabel to save below
372                                         lookupFloat[0]->setLabel(saveLabel);
373                                 }
374                                 
375                                 lastLabel = lookupFloat[0]->getLabel();
376                                 //prevent memory leak
377                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
378                                 
379                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->Groups.clear();   return 0; }
380                                 
381                                 //get next line to process
382                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
383                         }
384                         
385                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->Groups.clear();   return 0; }
386                         
387                         //output error messages about any remaining user labels
388                         set<string>::iterator it;
389                         bool needToRun = false;
390                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
391                                 m->mothurOut("Your file does not include the label " + *it); 
392                                 if (processedLabels.count(lastLabel) != 1) {
393                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
394                                         needToRun = true;
395                                 }else {
396                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
397                                 }
398                         }
399                         
400                         //run last label if you need to
401                         if (needToRun == true)  {
402                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
403                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
404                                 
405                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
406                                 
407                                 normalize(lookupFloat);
408                                 
409                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
410                         }
411                         
412                 }
413                 //reset groups parameter
414                 m->Groups.clear();  
415                 delete input;
416                 
417                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); return 0;}
418                 
419                 m->mothurOutEndLine();
420                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
421                 //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
422                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
423                 m->mothurOutEndLine();
424                 
425                 //set shared file as new current sharedfile
426                 string current = "";
427                 itTypes = outputTypes.find("shared");
428                 if (itTypes != outputTypes.end()) {
429                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
430                 }
431                 
432                 return 0;
433         }
434         catch(exception& e) {
435                 m->errorOut(e, "NormalizeSharedCommand", "execute");
436                 exit(1);
437         }
438 }
439 //**********************************************************************************************************************
440
441 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
442         try {
443                 //save mothurOut's binLabels to restore for next label
444                 vector<string> saveBinLabels = m->currentBinLabels;
445                 
446                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
447                 
448                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared";
449                 ofstream out;
450                 m->openOutputFile(outputFileName, out);
451                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
452                                 
453                 if (method == "totalgroup") { 
454                         
455                         //save numSeqs since they will change as the data is normalized
456                         vector<int> sizes;
457                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
458                                         
459                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
460                                                 
461                                         for (int i = 0; i < thisLookUp.size(); i++) {
462                                                         
463                                                 if (m->control_pressed) { out.close(); return 0; }
464                                                         
465                                                 int abund = thisLookUp[i]->getAbundance(j);
466                                                         
467                                                 float relabund = abund / (float) sizes[i];
468                                                 float newNorm = relabund * norm;
469                                                 
470                                                 //round to nearest int
471                                                 int finalNorm = (int) floor((newNorm + 0.5));
472                                                 
473                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
474                                         }
475                                 }
476                                         
477                 }else if (method == "zscore") {
478                         
479                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
480                                 
481                                 if (m->control_pressed) { out.close(); return 0; }
482                                 
483                                 //calc mean
484                                 float mean = 0.0;
485                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
486                                 mean /= (float) thisLookUp.size();
487                                         
488                                 //calc standard deviation
489                                 float sumSquared = 0.0;
490                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
491                                 sumSquared /= (float) thisLookUp.size();
492                                 
493                                 float standardDev = sqrt(sumSquared);
494                                         
495                                 for (int i = 0; i < thisLookUp.size(); i++) {
496                                         int finalNorm = 0;
497                                         if (standardDev != 0) { // stop divide by zero
498                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
499                                                 //round to nearest int
500                                                 finalNorm = (int) floor((newNorm + 0.5));
501                                         }
502                                         
503                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
504                                 }
505                         }
506                                                 
507                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
508                                 
509                                 
510                                                 
511                 eliminateZeroOTUS(thisLookUp);
512                 
513                 thisLookUp[0]->printHeaders(out); 
514                  
515                 for (int i = 0; i < thisLookUp.size(); i++) {
516                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
517                         thisLookUp[i]->print(out);
518                 }
519                 
520                 out.close();
521                 
522                 m->currentBinLabels = saveBinLabels;
523                 
524                 return 0;
525         }
526         catch(exception& e) {
527                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
528                 exit(1);
529         }
530 }
531 //**********************************************************************************************************************
532
533 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp){
534         try {
535                 
536                 //save mothurOut's binLabels to restore for next label
537                 vector<string> saveBinLabels = m->currentBinLabels;
538                 
539                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared";
540                 ofstream out;
541                 m->openOutputFile(outputFileName, out);
542                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
543                 
544                 
545                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
546                 
547                 if (method == "totalgroup") { 
548                         
549                         //save numSeqs since they will change as the data is normalized
550                         vector<float> sizes;
551                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
552                         
553                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
554                                 
555                                 for (int i = 0; i < thisLookUp.size(); i++) {
556                                         
557                                         if (m->control_pressed) { out.close(); return 0; }
558                                         
559                                         float abund = thisLookUp[i]->getAbundance(j);
560                                         
561                                         float relabund = abund / (float) sizes[i];
562                                         float newNorm = relabund * norm;
563                                         
564                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
565                                 }
566                         }
567                         
568                 }else if (method == "zscore") {
569                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
570                                 
571                                 if (m->control_pressed) { out.close(); return 0; }
572                                 
573                                 //calc mean
574                                 float mean = 0.0;
575                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
576                                 mean /= (float) thisLookUp.size();
577                                 
578                                 //calc standard deviation
579                                 float sumSquared = 0.0;
580                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
581                                 sumSquared /= (float) thisLookUp.size();
582                                 
583                                 float standardDev = sqrt(sumSquared);
584                                 
585                                 for (int i = 0; i < thisLookUp.size(); i++) {
586                                         float newNorm = 0.0;
587                                         if (standardDev != 0) { // stop divide by zero
588                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
589                                         }
590                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
591                                 }
592                         }                       
593                         
594                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
595                 
596                 
597                 eliminateZeroOTUS(thisLookUp);
598                 
599                 thisLookUp[0]->printHeaders(out); 
600                 
601                 for (int i = 0; i < thisLookUp.size(); i++) {
602                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
603                         thisLookUp[i]->print(out);
604                 }
605                 
606                 out.close();
607                 
608                 m->currentBinLabels = saveBinLabels;
609                 
610                 return 0;
611         }
612         catch(exception& e) {
613                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
614                 exit(1);
615         }
616 }
617 //**********************************************************************************************************************
618 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
619         try {
620                 
621                 vector<SharedRAbundVector*> newLookup;
622                 for (int i = 0; i < thislookup.size(); i++) {
623                         SharedRAbundVector* temp = new SharedRAbundVector();
624                         temp->setLabel(thislookup[i]->getLabel());
625                         temp->setGroup(thislookup[i]->getGroup());
626                         newLookup.push_back(temp);
627                 }
628                 
629                 //for each bin
630                 vector<string> newBinLabels;
631                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
632                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
633                 
634                         //look at each sharedRabund and make sure they are not all zero
635                         bool allZero = true;
636                         for (int j = 0; j < thislookup.size(); j++) {
637                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
638                         }
639                         
640                         //if they are not all zero add this bin
641                         if (!allZero) {
642                                 for (int j = 0; j < thislookup.size(); j++) {
643                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
644                                 }
645                                 //if there is a bin label use it otherwise make one
646                                 string binLabel = "Otu" + toString(i+1);
647                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
648                                 
649                                 newBinLabels.push_back(binLabel);
650                         }
651                 }
652
653                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
654
655                 thislookup = newLookup;
656                 m->currentBinLabels = newBinLabels;
657                 
658                 return 0;
659  
660         }
661         catch(exception& e) {
662                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
663                 exit(1);
664         }
665 }
666 //**********************************************************************************************************************
667 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
668         try {
669                 
670                 vector<SharedRAbundFloatVector*> newLookup;
671                 for (int i = 0; i < thislookup.size(); i++) {
672                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
673                         temp->setLabel(thislookup[i]->getLabel());
674                         temp->setGroup(thislookup[i]->getGroup());
675                         newLookup.push_back(temp);
676                 }
677                 
678                 //for each bin
679                 vector<string> newBinLabels;
680                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
681                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
682                         
683                         //look at each sharedRabund and make sure they are not all zero
684                         bool allZero = true;
685                         for (int j = 0; j < thislookup.size(); j++) {
686                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
687                         }
688                         
689                         //if they are not all zero add this bin
690                         if (!allZero) {
691                                 for (int j = 0; j < thislookup.size(); j++) {
692                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
693                                 }
694                                 //if there is a bin label use it otherwise make one
695                                 string binLabel = "Otu" + toString(i+1);
696                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
697                                 
698                                 newBinLabels.push_back(binLabel);
699                         }
700                 }
701                 
702                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
703                 
704                 thislookup = newLookup;
705                 m->currentBinLabels = newBinLabels;
706                 
707                 return 0;
708                 
709         }
710         catch(exception& e) {
711                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
712                 exit(1);
713         }
714 }
715
716 //**********************************************************************************************************************