]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
1.23.0
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 NormalizeSharedCommand::NormalizeSharedCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["shared"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72
73 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
74         try {
75                 abort = false; calledHelp = false;   
76                 allLines = 1;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81                 
82                 else {
83                         vector<string> myArray = setParameters();
84                         
85                         OptionParser parser(option);
86                         map<string,string> parameters = parser.getParameters();
87                         map<string,string>::iterator it;
88                         
89                         ValidParameters validParameter;
90                         
91                         //check to make sure all parameters are valid for command
92                         for (it = parameters.begin(); it != parameters.end(); it++) { 
93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
94                         }
95                         
96                         //initialize outputTypes
97                         vector<string> tempOutNames;
98                         outputTypes["shared"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("shared");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
111                                 }
112                                 
113                                 it = parameters.find("relabund");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
119                                 }
120                         }
121                         
122                         sharedfile = validParameter.validFile(parameters, "shared", true);
123                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
124                         else if (sharedfile == "not found") { sharedfile = ""; }
125                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
126                         
127                         relabundfile = validParameter.validFile(parameters, "relabund", true);
128                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
129                         else if (relabundfile == "not found") { relabundfile = ""; }
130                         else {  format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); }
131                         
132                         
133                         if ((sharedfile == "") && (relabundfile == "")) { 
134                                 //is there are current file available for any of these?
135                                 //give priority to shared, then list, then rabund, then sabund
136                                 //if there is a current shared file, use it
137                                 sharedfile = m->getSharedFile(); 
138                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139                                 else { 
140                                         relabundfile = m->getRelAbundFile(); 
141                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
142                                         else { 
143                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
144                                                 abort = true;
145                                         }
146                                 }
147                         }
148                         
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
152                         
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; pickedGroups = false; }
166                         else { 
167                                 pickedGroups = true;
168                                 m->splitAtDash(groups, Groups);
169                                 m->setGroups(Groups);
170                         }
171                         
172                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
173                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
174                 
175                         string temp = validParameter.validFile(parameters, "norm", false);                              
176                         if (temp == "not found") {  
177                                 norm = 0;  //once you have read, set norm to smallest group number
178                         }else { 
179                                 m->mothurConvert(temp, norm);
180                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
181                         }
182                         
183                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
184                         makeRelabund = m->isTrue(temp);
185                 }
186
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
190                 exit(1);
191         }
192 }
193 //**********************************************************************************************************************
194
195 int NormalizeSharedCommand::execute(){
196         try {
197         
198                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
199                 
200                 input = new InputData(inputfile, format);
201                 
202                 //you are reading a sharedfile and you do not want to make relabund
203                 if ((format == "sharedfile") && (!makeRelabund)) {
204                         lookup = input->getSharedRAbundVectors();
205                         string lastLabel = lookup[0]->getLabel();
206                         
207                         //look for groups whose numseqs is below norm and remove them, warning the user
208                         if (norm != 0) { 
209                                 m->clearGroups();
210                                 vector<string> mGroups;
211                                 vector<SharedRAbundVector*> temp;
212                                 for (int i = 0; i < lookup.size(); i++) {
213                                         if (lookup[i]->getNumSeqs() < norm) { 
214                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
215                                                 delete lookup[i];
216                                         }else { 
217                                                 mGroups.push_back(lookup[i]->getGroup()); 
218                                                 temp.push_back(lookup[i]);
219                                         }
220                                 } 
221                                 lookup = temp;
222                                 m->setGroups(mGroups);
223                         }
224                         
225                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
226                         set<string> processedLabels;
227                         set<string> userLabels = labels;
228                         
229                         if (method == "totalgroup") {
230                                 //set norm to smallest group number
231                                 if (norm == 0) { 
232                                         norm = lookup[0]->getNumSeqs();
233                                         for (int i = 1; i < lookup.size(); i++) {
234                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
235                                         }  
236                                 }
237                                 
238                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
239                         }
240                         
241                         
242                         //as long as you are not at the end of the file or done wih the lines you want
243                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
244                                 
245                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups();   return 0; }
246                                 
247                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
248                                         
249                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
250                                         normalize(lookup);
251                                         
252                                         processedLabels.insert(lookup[0]->getLabel());
253                                         userLabels.erase(lookup[0]->getLabel());
254                                 }
255                                 
256                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
257                                         string saveLabel = lookup[0]->getLabel();
258                                         
259                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
260                                         lookup = input->getSharedRAbundVectors(lastLabel);
261                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
262                                         
263                                         normalize(lookup);
264                                         
265                                         processedLabels.insert(lookup[0]->getLabel());
266                                         userLabels.erase(lookup[0]->getLabel());
267                                         
268                                         //restore real lastlabel to save below
269                                         lookup[0]->setLabel(saveLabel);
270                                 }
271                                 
272                                 lastLabel = lookup[0]->getLabel();
273                                 //prevent memory leak
274                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
275                                 
276                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();  return 0; }
277                                 
278                                 //get next line to process
279                                 lookup = input->getSharedRAbundVectors();                               
280                         }
281                         
282                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
283                         
284                         //output error messages about any remaining user labels
285                         set<string>::iterator it;
286                         bool needToRun = false;
287                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
288                                 m->mothurOut("Your file does not include the label " + *it); 
289                                 if (processedLabels.count(lastLabel) != 1) {
290                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
291                                         needToRun = true;
292                                 }else {
293                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
294                                 }
295                         }
296                         
297                         //run last label if you need to
298                         if (needToRun == true)  {
299                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
300                                 lookup = input->getSharedRAbundVectors(lastLabel);
301                                 
302                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
303                                 
304                                 normalize(lookup);
305                                 
306                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
307                         }
308                         
309                 }else{ //relabund values
310                         lookupFloat = input->getSharedRAbundFloatVectors();
311                         string lastLabel = lookupFloat[0]->getLabel();
312                         
313                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
314                         set<string> processedLabels;
315                         set<string> userLabels = labels;
316                         
317                         //look for groups whose numseqs is below norm and remove them, warning the user
318                         if (norm != 0) { 
319                                 m->clearGroups();
320                                 vector<string> mGroups;
321                                 vector<SharedRAbundFloatVector*> temp;
322                                 for (int i = 0; i < lookupFloat.size(); i++) {
323                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
324                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
325                                                 delete lookupFloat[i];
326                                         }else { 
327                                                 mGroups.push_back(lookupFloat[i]->getGroup()); 
328                                                 temp.push_back(lookupFloat[i]);
329                                         }
330                                 } 
331                                 lookupFloat = temp;
332                                 m->setGroups(mGroups);
333                         }
334                         
335                         //set norm to smallest group number
336                         if (method == "totalgroup") {
337                                 if (norm == 0) { 
338                                         norm = lookupFloat[0]->getNumSeqs();
339                                         for (int i = 1; i < lookupFloat.size(); i++) {
340                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
341                                         }  
342                                 }
343                                 
344                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
345                         }
346                         
347                         //as long as you are not at the end of the file or done wih the lines you want
348                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
349                                 
350                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->clearGroups();  return 0; }
351                                 
352                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
353                                         
354                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
355                                         
356                                         normalize(lookupFloat);
357                                         
358                                         processedLabels.insert(lookupFloat[0]->getLabel());
359                                         userLabels.erase(lookupFloat[0]->getLabel());
360                                 }
361                                 
362                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
363                                         string saveLabel = lookupFloat[0]->getLabel();
364                                         
365                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
366                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
367                                         
368                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
369                 
370                                         normalize(lookupFloat);
371                                         
372                                         processedLabels.insert(lookupFloat[0]->getLabel());
373                                         userLabels.erase(lookupFloat[0]->getLabel());
374                                         
375                                         //restore real lastlabel to save below
376                                         lookupFloat[0]->setLabel(saveLabel);
377                                 }
378                                 
379                                 lastLabel = lookupFloat[0]->getLabel();
380                                 //prevent memory leak
381                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
382                                 
383                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
384                                 
385                                 //get next line to process
386                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
387                         }
388                         
389                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
390                         
391                         //output error messages about any remaining user labels
392                         set<string>::iterator it;
393                         bool needToRun = false;
394                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
395                                 m->mothurOut("Your file does not include the label " + *it); 
396                                 if (processedLabels.count(lastLabel) != 1) {
397                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
398                                         needToRun = true;
399                                 }else {
400                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
401                                 }
402                         }
403                         
404                         //run last label if you need to
405                         if (needToRun == true)  {
406                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
407                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
408                                 
409                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
410                                 
411                                 normalize(lookupFloat);
412                                 
413                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
414                         }
415                         
416                 }
417                 //reset groups parameter
418                 m->clearGroups();  
419                 delete input;
420                 
421                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); return 0;}
422                 
423                 m->mothurOutEndLine();
424                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
425                 //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
426                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
427                 m->mothurOutEndLine();
428                 
429                 //set shared file as new current sharedfile
430                 string current = "";
431                 itTypes = outputTypes.find("shared");
432                 if (itTypes != outputTypes.end()) {
433                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
434                 }
435                 
436                 return 0;
437         }
438         catch(exception& e) {
439                 m->errorOut(e, "NormalizeSharedCommand", "execute");
440                 exit(1);
441         }
442 }
443 //**********************************************************************************************************************
444
445 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
446         try {
447                 //save mothurOut's binLabels to restore for next label
448                 vector<string> saveBinLabels = m->currentBinLabels;
449                 
450                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
451                 
452                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared";
453                 ofstream out;
454                 m->openOutputFile(outputFileName, out);
455                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
456                                 
457                 if (method == "totalgroup") { 
458                         
459                         //save numSeqs since they will change as the data is normalized
460                         vector<int> sizes;
461                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
462                                         
463                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
464                                                 
465                                         for (int i = 0; i < thisLookUp.size(); i++) {
466                                                         
467                                                 if (m->control_pressed) { out.close(); return 0; }
468                                                         
469                                                 int abund = thisLookUp[i]->getAbundance(j);
470                                                         
471                                                 float relabund = abund / (float) sizes[i];
472                                                 float newNorm = relabund * norm;
473                                                 
474                                                 //round to nearest int
475                                                 int finalNorm = (int) floor((newNorm + 0.5));
476                                                 
477                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
478                                         }
479                                 }
480                                         
481                 }else if (method == "zscore") {
482                         
483                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
484                                 
485                                 if (m->control_pressed) { out.close(); return 0; }
486                                 
487                                 //calc mean
488                                 float mean = 0.0;
489                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
490                                 mean /= (float) thisLookUp.size();
491                                         
492                                 //calc standard deviation
493                                 float sumSquared = 0.0;
494                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
495                                 sumSquared /= (float) thisLookUp.size();
496                                 
497                                 float standardDev = sqrt(sumSquared);
498                                         
499                                 for (int i = 0; i < thisLookUp.size(); i++) {
500                                         int finalNorm = 0;
501                                         if (standardDev != 0) { // stop divide by zero
502                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
503                                                 //round to nearest int
504                                                 finalNorm = (int) floor((newNorm + 0.5));
505                                         }
506                                         
507                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
508                                 }
509                         }
510                                                 
511                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
512                                 
513                                 
514                                                 
515                 eliminateZeroOTUS(thisLookUp);
516                 
517                 thisLookUp[0]->printHeaders(out); 
518                  
519                 for (int i = 0; i < thisLookUp.size(); i++) {
520                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
521                         thisLookUp[i]->print(out);
522                 }
523                 
524                 out.close();
525                 
526                 m->currentBinLabels = saveBinLabels;
527                 
528                 return 0;
529         }
530         catch(exception& e) {
531                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
532                 exit(1);
533         }
534 }
535 //**********************************************************************************************************************
536
537 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp){
538         try {
539                 
540                 //save mothurOut's binLabels to restore for next label
541                 vector<string> saveBinLabels = m->currentBinLabels;
542                 
543                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + ".norm.shared";
544                 ofstream out;
545                 m->openOutputFile(outputFileName, out);
546                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
547                 
548                 
549                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
550                 
551                 if (method == "totalgroup") { 
552                         
553                         //save numSeqs since they will change as the data is normalized
554                         vector<float> sizes;
555                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
556                         
557                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
558                                 
559                                 for (int i = 0; i < thisLookUp.size(); i++) {
560                                         
561                                         if (m->control_pressed) { out.close(); return 0; }
562                                         
563                                         float abund = thisLookUp[i]->getAbundance(j);
564                                         
565                                         float relabund = abund / (float) sizes[i];
566                                         float newNorm = relabund * norm;
567                                         
568                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
569                                 }
570                         }
571                         
572                 }else if (method == "zscore") {
573                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
574                                 
575                                 if (m->control_pressed) { out.close(); return 0; }
576                                 
577                                 //calc mean
578                                 float mean = 0.0;
579                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
580                                 mean /= (float) thisLookUp.size();
581                                 
582                                 //calc standard deviation
583                                 float sumSquared = 0.0;
584                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
585                                 sumSquared /= (float) thisLookUp.size();
586                                 
587                                 float standardDev = sqrt(sumSquared);
588                                 
589                                 for (int i = 0; i < thisLookUp.size(); i++) {
590                                         float newNorm = 0.0;
591                                         if (standardDev != 0) { // stop divide by zero
592                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
593                                         }
594                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
595                                 }
596                         }                       
597                         
598                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
599                 
600                 
601                 eliminateZeroOTUS(thisLookUp);
602                 
603                 thisLookUp[0]->printHeaders(out); 
604                 
605                 for (int i = 0; i < thisLookUp.size(); i++) {
606                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
607                         thisLookUp[i]->print(out);
608                 }
609                 
610                 out.close();
611                 
612                 m->currentBinLabels = saveBinLabels;
613                 
614                 return 0;
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
618                 exit(1);
619         }
620 }
621 //**********************************************************************************************************************
622 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
623         try {
624                 
625                 vector<SharedRAbundVector*> newLookup;
626                 for (int i = 0; i < thislookup.size(); i++) {
627                         SharedRAbundVector* temp = new SharedRAbundVector();
628                         temp->setLabel(thislookup[i]->getLabel());
629                         temp->setGroup(thislookup[i]->getGroup());
630                         newLookup.push_back(temp);
631                 }
632                 
633                 //for each bin
634                 vector<string> newBinLabels;
635                 string snumBins = toString(thislookup[0]->getNumBins());
636                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
637                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
638                 
639                         //look at each sharedRabund and make sure they are not all zero
640                         bool allZero = true;
641                         for (int j = 0; j < thislookup.size(); j++) {
642                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
643                         }
644                         
645                         //if they are not all zero add this bin
646                         if (!allZero) {
647                                 for (int j = 0; j < thislookup.size(); j++) {
648                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
649                                 }
650                                 //if there is a bin label use it otherwise make one
651                                 string binLabel = "Otu";
652                                 string sbinNumber = toString(i+1);
653                                 if (sbinNumber.length() < snumBins.length()) { 
654                                         int diff = snumBins.length() - sbinNumber.length();
655                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
656                                 }
657                                 binLabel += sbinNumber; 
658                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
659                                 
660                                 newBinLabels.push_back(binLabel);
661                         }
662                 }
663
664                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
665
666                 thislookup = newLookup;
667                 m->currentBinLabels = newBinLabels;
668                 
669                 return 0;
670  
671         }
672         catch(exception& e) {
673                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
674                 exit(1);
675         }
676 }
677 //**********************************************************************************************************************
678 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
679         try {
680                 
681                 vector<SharedRAbundFloatVector*> newLookup;
682                 for (int i = 0; i < thislookup.size(); i++) {
683                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
684                         temp->setLabel(thislookup[i]->getLabel());
685                         temp->setGroup(thislookup[i]->getGroup());
686                         newLookup.push_back(temp);
687                 }
688                 
689                 //for each bin
690                 vector<string> newBinLabels;
691                 string snumBins = toString(thislookup[0]->getNumBins());
692                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
693                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
694                         
695                         //look at each sharedRabund and make sure they are not all zero
696                         bool allZero = true;
697                         for (int j = 0; j < thislookup.size(); j++) {
698                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
699                         }
700                         
701                         //if they are not all zero add this bin
702                         if (!allZero) {
703                                 for (int j = 0; j < thislookup.size(); j++) {
704                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
705                                 }
706                                 //if there is a bin label use it otherwise make one
707                                 string binLabel = "Otu";
708                                 string sbinNumber = toString(i+1);
709                                 if (sbinNumber.length() < snumBins.length()) { 
710                                         int diff = snumBins.length() - sbinNumber.length();
711                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
712                                 }
713                                 binLabel += sbinNumber; 
714                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
715                                 
716                                 newBinLabels.push_back(binLabel);
717                         }
718                 }
719                 
720                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
721                 
722                 thislookup = newLookup;
723                 m->currentBinLabels = newBinLabels;
724                 
725                 return 0;
726                 
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
730                 exit(1);
731         }
732 }
733
734 //**********************************************************************************************************************