]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 NormalizeSharedCommand::NormalizeSharedCommand(){       
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["shared"] = tempOutNames;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72
73 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
74         try {
75                 abort = false; calledHelp = false;   
76                 allLines = 1;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; calledHelp = true; }
80                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
81                 
82                 else {
83                         vector<string> myArray = setParameters();
84                         
85                         OptionParser parser(option);
86                         map<string,string> parameters = parser.getParameters();
87                         map<string,string>::iterator it;
88                         
89                         ValidParameters validParameter;
90                         
91                         //check to make sure all parameters are valid for command
92                         for (it = parameters.begin(); it != parameters.end(); it++) { 
93                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
94                         }
95                         
96                         //initialize outputTypes
97                         vector<string> tempOutNames;
98                         outputTypes["shared"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("shared");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
111                                 }
112                                 
113                                 it = parameters.find("relabund");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
119                                 }
120                         }
121                         
122                         sharedfile = validParameter.validFile(parameters, "shared", true);
123                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
124                         else if (sharedfile == "not found") { sharedfile = ""; }
125                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
126                         
127                         relabundfile = validParameter.validFile(parameters, "relabund", true);
128                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
129                         else if (relabundfile == "not found") { relabundfile = ""; }
130                         else {  format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); }
131                         
132                         
133                         if ((sharedfile == "") && (relabundfile == "")) { 
134                                 //is there are current file available for any of these?
135                                 //give priority to shared, then list, then rabund, then sabund
136                                 //if there is a current shared file, use it
137                                 sharedfile = m->getSharedFile(); 
138                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
139                                 else { 
140                                         relabundfile = m->getRelAbundFile(); 
141                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
142                                         else { 
143                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
144                                                 abort = true;
145                                         }
146                                 }
147                         }
148                         
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
152                         
153                         
154
155                         //check for optional parameter and set defaults
156                         // ...at some point should added some additional type checking...
157                         label = validParameter.validFile(parameters, "label", false);                   
158                         if (label == "not found") { label = ""; }
159                         else { 
160                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
161                                 else { allLines = 1;  }
162                         }
163                         
164                         groups = validParameter.validFile(parameters, "groups", false);                 
165                         if (groups == "not found") { groups = ""; pickedGroups = false; }
166                         else { 
167                                 pickedGroups = true;
168                                 m->splitAtDash(groups, Groups);
169                                 m->Groups = Groups;
170                         }
171                         
172                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
173                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
174                 
175                         string temp = validParameter.validFile(parameters, "norm", false);                              
176                         if (temp == "not found") {  
177                                 norm = 0;  //once you have read, set norm to smallest group number
178                         }else { 
179                                 convert(temp, norm);
180                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
181                         }
182                         
183                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
184                         makeRelabund = m->isTrue(temp);
185                 }
186
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
190                 exit(1);
191         }
192 }
193 //**********************************************************************************************************************
194
195 int NormalizeSharedCommand::execute(){
196         try {
197         
198                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
199                 
200                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared";
201                 ofstream out;
202                 m->openOutputFile(outputFileName, out);
203                 
204                 input = new InputData(inputfile, format);
205                 
206                 //you are reading a sharedfile and you do not want to make relabund
207                 if ((format == "sharedfile") && (!makeRelabund)) {
208                         lookup = input->getSharedRAbundVectors();
209                         string lastLabel = lookup[0]->getLabel();
210                         
211                         //look for groups whose numseqs is below norm and remove them, warning the user
212                         if (norm != 0) { 
213                                 m->Groups.clear();
214                                 vector<SharedRAbundVector*> temp;
215                                 for (int i = 0; i < lookup.size(); i++) {
216                                         if (lookup[i]->getNumSeqs() < norm) { 
217                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
218                                                 delete lookup[i];
219                                         }else { 
220                                                 m->Groups.push_back(lookup[i]->getGroup()); 
221                                                 temp.push_back(lookup[i]);
222                                         }
223                                 } 
224                                 lookup = temp;
225                         }
226                         
227                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
228                         set<string> processedLabels;
229                         set<string> userLabels = labels;
230                         
231                         if (method == "totalgroup") {
232                                 //set norm to smallest group number
233                                 if (norm == 0) { 
234                                         norm = lookup[0]->getNumSeqs();
235                                         for (int i = 1; i < lookup.size(); i++) {
236                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
237                                         }  
238                                 }
239                                 
240                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
241                         }
242                         
243                         
244                         //as long as you are not at the end of the file or done wih the lines you want
245                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
246                                 
247                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear();   out.close(); m->mothurRemove(outputFileName); return 0; }
248                                 
249                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
250                                         
251                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
252                                         if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
253                                         normalize(lookup, out);
254                                         
255                                         processedLabels.insert(lookup[0]->getLabel());
256                                         userLabels.erase(lookup[0]->getLabel());
257                                 }
258                                 
259                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
260                                         string saveLabel = lookup[0]->getLabel();
261                                         
262                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
263                                         lookup = input->getSharedRAbundVectors(lastLabel);
264                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
265                                         if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
266                                         normalize(lookup, out);
267                                         
268                                         processedLabels.insert(lookup[0]->getLabel());
269                                         userLabels.erase(lookup[0]->getLabel());
270                                         
271                                         //restore real lastlabel to save below
272                                         lookup[0]->setLabel(saveLabel);
273                                 }
274                                 
275                                 lastLabel = lookup[0]->getLabel();
276                                 //prevent memory leak
277                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
278                                 
279                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); m->mothurRemove(outputFileName); return 0; }
280                                 
281                                 //get next line to process
282                                 lookup = input->getSharedRAbundVectors();                               
283                         }
284                         
285                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); m->mothurRemove(outputFileName);  return 0; }
286                         
287                         //output error messages about any remaining user labels
288                         set<string>::iterator it;
289                         bool needToRun = false;
290                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
291                                 m->mothurOut("Your file does not include the label " + *it); 
292                                 if (processedLabels.count(lastLabel) != 1) {
293                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
294                                         needToRun = true;
295                                 }else {
296                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
297                                 }
298                         }
299                         
300                         //run last label if you need to
301                         if (needToRun == true)  {
302                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
303                                 lookup = input->getSharedRAbundVectors(lastLabel);
304                                 
305                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
306                                 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
307                                 normalize(lookup, out);
308                                 
309                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
310                         }
311                         
312                 }else{ //relabund values
313                         lookupFloat = input->getSharedRAbundFloatVectors();
314                         string lastLabel = lookupFloat[0]->getLabel();
315                         
316                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
317                         set<string> processedLabels;
318                         set<string> userLabels = labels;
319                         
320                         //look for groups whose numseqs is below norm and remove them, warning the user
321                         if (norm != 0) { 
322                                 m->Groups.clear();
323                                 vector<SharedRAbundFloatVector*> temp;
324                                 for (int i = 0; i < lookupFloat.size(); i++) {
325                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
326                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
327                                                 delete lookupFloat[i];
328                                         }else { 
329                                                 m->Groups.push_back(lookupFloat[i]->getGroup()); 
330                                                 temp.push_back(lookupFloat[i]);
331                                         }
332                                 } 
333                                 lookupFloat = temp;
334                         }
335                         
336                         //set norm to smallest group number
337                         if (method == "totalgroup") {
338                                 if (norm == 0) { 
339                                         norm = lookupFloat[0]->getNumSeqs();
340                                         for (int i = 1; i < lookupFloat.size(); i++) {
341                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
342                                         }  
343                                 }
344                                 
345                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
346                         }
347                         
348                         //as long as you are not at the end of the file or done wih the lines you want
349                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
350                                 
351                                 if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->Groups.clear();   out.close(); m->mothurRemove(outputFileName); return 0; }
352                                 
353                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
354                                         
355                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
356                                         
357                                         if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
358                                         normalize(lookupFloat, out);
359                                         
360                                         processedLabels.insert(lookupFloat[0]->getLabel());
361                                         userLabels.erase(lookupFloat[0]->getLabel());
362                                 }
363                                 
364                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
365                                         string saveLabel = lookupFloat[0]->getLabel();
366                                         
367                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
368                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
369                                         
370                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
371                                         if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
372                                         normalize(lookupFloat, out);
373                                         
374                                         processedLabels.insert(lookupFloat[0]->getLabel());
375                                         userLabels.erase(lookupFloat[0]->getLabel());
376                                         
377                                         //restore real lastlabel to save below
378                                         lookupFloat[0]->setLabel(saveLabel);
379                                 }
380                                 
381                                 lastLabel = lookupFloat[0]->getLabel();
382                                 //prevent memory leak
383                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
384                                 
385                                 if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); m->mothurRemove(outputFileName); return 0; }
386                                 
387                                 //get next line to process
388                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
389                         }
390                         
391                         if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); m->mothurRemove(outputFileName);  return 0; }
392                         
393                         //output error messages about any remaining user labels
394                         set<string>::iterator it;
395                         bool needToRun = false;
396                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
397                                 m->mothurOut("Your file does not include the label " + *it); 
398                                 if (processedLabels.count(lastLabel) != 1) {
399                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
400                                         needToRun = true;
401                                 }else {
402                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
403                                 }
404                         }
405                         
406                         //run last label if you need to
407                         if (needToRun == true)  {
408                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
409                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
410                                 
411                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
412                                 
413                                 if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
414                                 normalize(lookupFloat, out);
415                                 
416                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
417                         }
418                         
419                 }
420                 //reset groups parameter
421                 m->Groups.clear();  
422                 delete input;
423                 out.close();
424                 
425                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); return 0;}
426                 
427                 m->mothurOutEndLine();
428                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
429                 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
430                 m->mothurOutEndLine();
431                 
432                 //set shared file as new current sharedfile
433                 string current = "";
434                 itTypes = outputTypes.find("shared");
435                 if (itTypes != outputTypes.end()) {
436                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
437                 }
438                 
439                 return 0;
440         }
441         catch(exception& e) {
442                 m->errorOut(e, "NormalizeSharedCommand", "execute");
443                 exit(1);
444         }
445 }
446 //**********************************************************************************************************************
447
448 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
449         try {
450                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
451                                 
452                 if (method == "totalgroup") { 
453                         
454                         //save numSeqs since they will change as the data is normalized
455                         vector<int> sizes;
456                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
457                                         
458                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
459                                                 
460                                         for (int i = 0; i < thisLookUp.size(); i++) {
461                                                         
462                                                 if (m->control_pressed) { return 0; }
463                                                         
464                                                 int abund = thisLookUp[i]->getAbundance(j);
465                                                         
466                                                 float relabund = abund / (float) sizes[i];
467                                                 float newNorm = relabund * norm;
468                                                 
469                                                 //round to nearest int
470                                                 int finalNorm = (int) floor((newNorm + 0.5));
471                                                 
472                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
473                                         }
474                                 }
475                                         
476                 }else if (method == "zscore") {
477                         
478                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
479                                 
480                                 if (m->control_pressed) { return 0; }
481                                 
482                                 //calc mean
483                                 float mean = 0.0;
484                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
485                                 mean /= (float) thisLookUp.size();
486                                         
487                                 //calc standard deviation
488                                 float sumSquared = 0.0;
489                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
490                                 sumSquared /= (float) thisLookUp.size();
491                                 
492                                 float standardDev = sqrt(sumSquared);
493                                         
494                                 for (int i = 0; i < thisLookUp.size(); i++) {
495                                         int finalNorm = 0;
496                                         if (standardDev != 0) { // stop divide by zero
497                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
498                                                 //round to nearest int
499                                                 finalNorm = (int) floor((newNorm + 0.5));
500                                         }
501                                         
502                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
503                                 }
504                         }
505                                                 
506                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
507                                 
508                                 
509                                                 
510                 eliminateZeroOTUS(thisLookUp);
511                  
512                 for (int i = 0; i < thisLookUp.size(); i++) {
513                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
514                         thisLookUp[i]->print(out);
515                 }
516                 
517                 return 0;
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
521                 exit(1);
522         }
523 }
524 //**********************************************************************************************************************
525
526 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp, ofstream& out){
527         try {
528                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
529                 
530                 if (method == "totalgroup") { 
531                         
532                         //save numSeqs since they will change as the data is normalized
533                         vector<float> sizes;
534                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
535                         
536                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
537                                 
538                                 for (int i = 0; i < thisLookUp.size(); i++) {
539                                         
540                                         if (m->control_pressed) { return 0; }
541                                         
542                                         float abund = thisLookUp[i]->getAbundance(j);
543                                         
544                                         float relabund = abund / (float) sizes[i];
545                                         float newNorm = relabund * norm;
546                                         
547                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
548                                 }
549                         }
550                         
551                 }else if (method == "zscore") {
552                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
553                                 
554                                 if (m->control_pressed) { return 0; }
555                                 
556                                 //calc mean
557                                 float mean = 0.0;
558                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
559                                 mean /= (float) thisLookUp.size();
560                                 
561                                 //calc standard deviation
562                                 float sumSquared = 0.0;
563                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
564                                 sumSquared /= (float) thisLookUp.size();
565                                 
566                                 float standardDev = sqrt(sumSquared);
567                                 
568                                 for (int i = 0; i < thisLookUp.size(); i++) {
569                                         float newNorm = 0.0;
570                                         if (standardDev != 0) { // stop divide by zero
571                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
572                                         }
573                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
574                                 }
575                         }                       
576                         
577                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
578                 
579                 
580                 eliminateZeroOTUS(thisLookUp);
581                 
582                 for (int i = 0; i < thisLookUp.size(); i++) {
583                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
584                         thisLookUp[i]->print(out);
585                 }
586                 
587                 return 0;
588         }
589         catch(exception& e) {
590                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
591                 exit(1);
592         }
593 }
594 //**********************************************************************************************************************
595 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
596         try {
597                 
598                 vector<SharedRAbundVector*> newLookup;
599                 for (int i = 0; i < thislookup.size(); i++) {
600                         SharedRAbundVector* temp = new SharedRAbundVector();
601                         temp->setLabel(thislookup[i]->getLabel());
602                         temp->setGroup(thislookup[i]->getGroup());
603                         newLookup.push_back(temp);
604                 }
605                 
606                 //for each bin
607                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
608                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
609                 
610                         //look at each sharedRabund and make sure they are not all zero
611                         bool allZero = true;
612                         for (int j = 0; j < thislookup.size(); j++) {
613                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
614                         }
615                         
616                         //if they are not all zero add this bin
617                         if (!allZero) {
618                                 for (int j = 0; j < thislookup.size(); j++) {
619                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
620                                 }
621                         }
622                 }
623
624                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
625
626                 thislookup = newLookup;
627                 
628                 return 0;
629  
630         }
631         catch(exception& e) {
632                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
633                 exit(1);
634         }
635 }
636 //**********************************************************************************************************************
637 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
638         try {
639                 
640                 vector<SharedRAbundFloatVector*> newLookup;
641                 for (int i = 0; i < thislookup.size(); i++) {
642                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
643                         temp->setLabel(thislookup[i]->getLabel());
644                         temp->setGroup(thislookup[i]->getGroup());
645                         newLookup.push_back(temp);
646                 }
647                 
648                 //for each bin
649                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
650                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
651                         
652                         //look at each sharedRabund and make sure they are not all zero
653                         bool allZero = true;
654                         for (int j = 0; j < thislookup.size(); j++) {
655                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
656                         }
657                         
658                         //if they are not all zero add this bin
659                         if (!allZero) {
660                                 for (int j = 0; j < thislookup.size(); j++) {
661                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
662                                 }
663                         }
664                 }
665                 
666                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
667                 
668                 thislookup = newLookup;
669                 
670                 return 0;
671                 
672         }
673         catch(exception& e) {
674                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
675                 exit(1);
676         }
677 }
678
679 //**********************************************************************************************************************