]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57 //**********************************************************************************************************************
58 string NormalizeSharedCommand::getOutputFileNameTag(string type, string inputName=""){  
59         try {
60         string outputFileName = "";
61                 map<string, vector<string> >::iterator it;
62         
63         //is this a type this command creates
64         it = outputTypes.find(type);
65         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
66         else {
67             if (type == "shared") {  outputFileName =  "norm.shared"; }
68             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
69         }
70         return outputFileName;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "NormalizeSharedCommand", "getOutputFileNameTag");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 NormalizeSharedCommand::NormalizeSharedCommand(){       
79         try {
80                 abort = true; calledHelp = true; 
81                 setParameters();
82                 vector<string> tempOutNames;
83                 outputTypes["shared"] = tempOutNames;
84         }
85         catch(exception& e) {
86                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
87                 exit(1);
88         }
89 }
90 //**********************************************************************************************************************
91
92 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
93         try {
94                 abort = false; calledHelp = false;   
95                 allLines = 1;
96                 
97                 //allow user to run help
98                 if(option == "help") { help(); abort = true; calledHelp = true; }
99                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100                 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string,string> parameters = parser.getParameters();
106                         map<string,string>::iterator it;
107                         
108                         ValidParameters validParameter;
109                         
110                         //check to make sure all parameters are valid for command
111                         for (it = parameters.begin(); it != parameters.end(); it++) { 
112                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["shared"] = tempOutNames;
118                         
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                                 it = parameters.find("shared");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
130                                 }
131                                 
132                                 it = parameters.find("relabund");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
138                                 }
139                         }
140                         
141                         sharedfile = validParameter.validFile(parameters, "shared", true);
142                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
143                         else if (sharedfile == "not found") { sharedfile = ""; }
144                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
145                         
146                         relabundfile = validParameter.validFile(parameters, "relabund", true);
147                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
148                         else if (relabundfile == "not found") { relabundfile = ""; }
149                         else {  format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); }
150                         
151                         
152                         if ((sharedfile == "") && (relabundfile == "")) { 
153                                 //is there are current file available for any of these?
154                                 //give priority to shared, then list, then rabund, then sabund
155                                 //if there is a current shared file, use it
156                                 sharedfile = m->getSharedFile(); 
157                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
158                                 else { 
159                                         relabundfile = m->getRelAbundFile(); 
160                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
161                                         else { 
162                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
163                                                 abort = true;
164                                         }
165                                 }
166                         }
167                         
168                         
169                         //if the user changes the output directory command factory will send this info to us in the output parameter 
170                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
171                         
172                         
173
174                         //check for optional parameter and set defaults
175                         // ...at some point should added some additional type checking...
176                         label = validParameter.validFile(parameters, "label", false);                   
177                         if (label == "not found") { label = ""; }
178                         else { 
179                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
180                                 else { allLines = 1;  }
181                         }
182                         
183                         groups = validParameter.validFile(parameters, "groups", false);                 
184                         if (groups == "not found") { groups = ""; pickedGroups = false; }
185                         else { 
186                                 pickedGroups = true;
187                                 m->splitAtDash(groups, Groups);
188                                 m->setGroups(Groups);
189                         }
190                         
191                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
192                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
193                 
194                         string temp = validParameter.validFile(parameters, "norm", false);                              
195                         if (temp == "not found") {  
196                                 norm = 0;  //once you have read, set norm to smallest group number
197                         }else { 
198                                 m->mothurConvert(temp, norm);
199                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
200                         }
201                         
202                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
203                         makeRelabund = m->isTrue(temp);
204                 }
205
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213
214 int NormalizeSharedCommand::execute(){
215         try {
216         
217                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
218                 
219                 input = new InputData(inputfile, format);
220                 
221                 //you are reading a sharedfile and you do not want to make relabund
222                 if ((format == "sharedfile") && (!makeRelabund)) {
223                         lookup = input->getSharedRAbundVectors();
224                         string lastLabel = lookup[0]->getLabel();
225                         
226                         //look for groups whose numseqs is below norm and remove them, warning the user
227                         if (norm != 0) { 
228                                 m->clearGroups();
229                                 vector<string> mGroups;
230                                 vector<SharedRAbundVector*> temp;
231                                 for (int i = 0; i < lookup.size(); i++) {
232                                         if (lookup[i]->getNumSeqs() < norm) { 
233                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
234                                                 delete lookup[i];
235                                         }else { 
236                                                 mGroups.push_back(lookup[i]->getGroup()); 
237                                                 temp.push_back(lookup[i]);
238                                         }
239                                 } 
240                                 lookup = temp;
241                                 m->setGroups(mGroups);
242                         }
243                         
244                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
245                         set<string> processedLabels;
246                         set<string> userLabels = labels;
247                         
248                         if (method == "totalgroup") {
249                                 //set norm to smallest group number
250                                 if (norm == 0) { 
251                                         norm = lookup[0]->getNumSeqs();
252                                         for (int i = 1; i < lookup.size(); i++) {
253                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
254                                         }  
255                                 }
256                                 
257                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
258                         }
259                         
260                         
261                         //as long as you are not at the end of the file or done wih the lines you want
262                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
263                                 
264                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups();   return 0; }
265                                 
266                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
267                                         
268                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
269                                         normalize(lookup);
270                                         
271                                         processedLabels.insert(lookup[0]->getLabel());
272                                         userLabels.erase(lookup[0]->getLabel());
273                                 }
274                                 
275                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
276                                         string saveLabel = lookup[0]->getLabel();
277                                         
278                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
279                                         lookup = input->getSharedRAbundVectors(lastLabel);
280                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
281                                         
282                                         normalize(lookup);
283                                         
284                                         processedLabels.insert(lookup[0]->getLabel());
285                                         userLabels.erase(lookup[0]->getLabel());
286                                         
287                                         //restore real lastlabel to save below
288                                         lookup[0]->setLabel(saveLabel);
289                                 }
290                                 
291                                 lastLabel = lookup[0]->getLabel();
292                                 //prevent memory leak
293                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
294                                 
295                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();  return 0; }
296                                 
297                                 //get next line to process
298                                 lookup = input->getSharedRAbundVectors();                               
299                         }
300                         
301                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
302                         
303                         //output error messages about any remaining user labels
304                         set<string>::iterator it;
305                         bool needToRun = false;
306                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
307                                 m->mothurOut("Your file does not include the label " + *it); 
308                                 if (processedLabels.count(lastLabel) != 1) {
309                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
310                                         needToRun = true;
311                                 }else {
312                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
313                                 }
314                         }
315                         
316                         //run last label if you need to
317                         if (needToRun == true)  {
318                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
319                                 lookup = input->getSharedRAbundVectors(lastLabel);
320                                 
321                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
322                                 
323                                 normalize(lookup);
324                                 
325                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
326                         }
327                         
328                 }else{ //relabund values
329                         lookupFloat = input->getSharedRAbundFloatVectors();
330                         string lastLabel = lookupFloat[0]->getLabel();
331                         
332                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
333                         set<string> processedLabels;
334                         set<string> userLabels = labels;
335                         
336                         //look for groups whose numseqs is below norm and remove them, warning the user
337                         if (norm != 0) { 
338                                 m->clearGroups();
339                                 vector<string> mGroups;
340                                 vector<SharedRAbundFloatVector*> temp;
341                                 for (int i = 0; i < lookupFloat.size(); i++) {
342                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
343                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
344                                                 delete lookupFloat[i];
345                                         }else { 
346                                                 mGroups.push_back(lookupFloat[i]->getGroup()); 
347                                                 temp.push_back(lookupFloat[i]);
348                                         }
349                                 } 
350                                 lookupFloat = temp;
351                                 m->setGroups(mGroups);
352                         }
353                         
354                         //set norm to smallest group number
355                         if (method == "totalgroup") {
356                                 if (norm == 0) { 
357                                         norm = lookupFloat[0]->getNumSeqs();
358                                         for (int i = 1; i < lookupFloat.size(); i++) {
359                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
360                                         }  
361                                 }
362                                 
363                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
364                         }
365                         
366                         //as long as you are not at the end of the file or done wih the lines you want
367                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
368                                 
369                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->clearGroups();  return 0; }
370                                 
371                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
372                                         
373                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
374                                         
375                                         normalize(lookupFloat);
376                                         
377                                         processedLabels.insert(lookupFloat[0]->getLabel());
378                                         userLabels.erase(lookupFloat[0]->getLabel());
379                                 }
380                                 
381                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
382                                         string saveLabel = lookupFloat[0]->getLabel();
383                                         
384                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
385                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
386                                         
387                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
388                 
389                                         normalize(lookupFloat);
390                                         
391                                         processedLabels.insert(lookupFloat[0]->getLabel());
392                                         userLabels.erase(lookupFloat[0]->getLabel());
393                                         
394                                         //restore real lastlabel to save below
395                                         lookupFloat[0]->setLabel(saveLabel);
396                                 }
397                                 
398                                 lastLabel = lookupFloat[0]->getLabel();
399                                 //prevent memory leak
400                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
401                                 
402                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
403                                 
404                                 //get next line to process
405                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
406                         }
407                         
408                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
409                         
410                         //output error messages about any remaining user labels
411                         set<string>::iterator it;
412                         bool needToRun = false;
413                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
414                                 m->mothurOut("Your file does not include the label " + *it); 
415                                 if (processedLabels.count(lastLabel) != 1) {
416                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
417                                         needToRun = true;
418                                 }else {
419                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
420                                 }
421                         }
422                         
423                         //run last label if you need to
424                         if (needToRun == true)  {
425                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
426                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
427                                 
428                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
429                                 
430                                 normalize(lookupFloat);
431                                 
432                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
433                         }
434                         
435                 }
436                 //reset groups parameter
437                 m->clearGroups();  
438                 delete input;
439                 
440                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); return 0;}
441                 
442                 m->mothurOutEndLine();
443                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
444                 //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
445                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
446                 m->mothurOutEndLine();
447                 
448                 //set shared file as new current sharedfile
449                 string current = "";
450                 itTypes = outputTypes.find("shared");
451                 if (itTypes != outputTypes.end()) {
452                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
453                 }
454                 
455                 return 0;
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "NormalizeSharedCommand", "execute");
459                 exit(1);
460         }
461 }
462 //**********************************************************************************************************************
463
464 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
465         try {
466                 //save mothurOut's binLabels to restore for next label
467                 vector<string> saveBinLabels = m->currentBinLabels;
468                 
469                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
470                 
471                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + "." + getOutputFileNameTag("shared");
472                 ofstream out;
473                 m->openOutputFile(outputFileName, out);
474                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
475                                 
476                 if (method == "totalgroup") { 
477                         
478                         //save numSeqs since they will change as the data is normalized
479                         vector<int> sizes;
480                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
481                                         
482                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
483                                                 
484                                         for (int i = 0; i < thisLookUp.size(); i++) {
485                                                         
486                                                 if (m->control_pressed) { out.close(); return 0; }
487                                                         
488                                                 int abund = thisLookUp[i]->getAbundance(j);
489                                                         
490                                                 float relabund = abund / (float) sizes[i];
491                                                 float newNorm = relabund * norm;
492                                                 
493                                                 //round to nearest int
494                                                 int finalNorm = (int) floor((newNorm + 0.5));
495                                                 
496                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
497                                         }
498                                 }
499                                         
500                 }else if (method == "zscore") {
501                         
502                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
503                                 
504                                 if (m->control_pressed) { out.close(); return 0; }
505                                 
506                                 //calc mean
507                                 float mean = 0.0;
508                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
509                                 mean /= (float) thisLookUp.size();
510                                         
511                                 //calc standard deviation
512                                 float sumSquared = 0.0;
513                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
514                                 sumSquared /= (float) thisLookUp.size();
515                                 
516                                 float standardDev = sqrt(sumSquared);
517                                         
518                                 for (int i = 0; i < thisLookUp.size(); i++) {
519                                         int finalNorm = 0;
520                                         if (standardDev != 0) { // stop divide by zero
521                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
522                                                 //round to nearest int
523                                                 finalNorm = (int) floor((newNorm + 0.5));
524                                         }
525                                         
526                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
527                                 }
528                         }
529                                                 
530                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
531                                 
532                                 
533                                                 
534                 eliminateZeroOTUS(thisLookUp);
535                 
536                 thisLookUp[0]->printHeaders(out); 
537                  
538                 for (int i = 0; i < thisLookUp.size(); i++) {
539                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
540                         thisLookUp[i]->print(out);
541                 }
542                 
543                 out.close();
544                 
545                 m->currentBinLabels = saveBinLabels;
546                 
547                 return 0;
548         }
549         catch(exception& e) {
550                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
551                 exit(1);
552         }
553 }
554 //**********************************************************************************************************************
555
556 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp){
557         try {
558                 
559                 //save mothurOut's binLabels to restore for next label
560                 vector<string> saveBinLabels = m->currentBinLabels;
561                 
562                 string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + thisLookUp[0]->getLabel() + "." + getOutputFileNameTag("shared");
563                 ofstream out;
564                 m->openOutputFile(outputFileName, out);
565                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
566                 
567                 
568                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
569                 
570                 if (method == "totalgroup") { 
571                         
572                         //save numSeqs since they will change as the data is normalized
573                         vector<float> sizes;
574                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
575                         
576                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
577                                 
578                                 for (int i = 0; i < thisLookUp.size(); i++) {
579                                         
580                                         if (m->control_pressed) { out.close(); return 0; }
581                                         
582                                         float abund = thisLookUp[i]->getAbundance(j);
583                                         
584                                         float relabund = abund / (float) sizes[i];
585                                         float newNorm = relabund * norm;
586                                         
587                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
588                                 }
589                         }
590                         
591                 }else if (method == "zscore") {
592                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
593                                 
594                                 if (m->control_pressed) { out.close(); return 0; }
595                                 
596                                 //calc mean
597                                 float mean = 0.0;
598                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
599                                 mean /= (float) thisLookUp.size();
600                                 
601                                 //calc standard deviation
602                                 float sumSquared = 0.0;
603                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
604                                 sumSquared /= (float) thisLookUp.size();
605                                 
606                                 float standardDev = sqrt(sumSquared);
607                                 
608                                 for (int i = 0; i < thisLookUp.size(); i++) {
609                                         float newNorm = 0.0;
610                                         if (standardDev != 0) { // stop divide by zero
611                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
612                                         }
613                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
614                                 }
615                         }                       
616                         
617                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
618                 
619                 
620                 eliminateZeroOTUS(thisLookUp);
621                 
622                 thisLookUp[0]->printHeaders(out); 
623                 
624                 for (int i = 0; i < thisLookUp.size(); i++) {
625                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
626                         thisLookUp[i]->print(out);
627                 }
628                 
629                 out.close();
630                 
631                 m->currentBinLabels = saveBinLabels;
632                 
633                 return 0;
634         }
635         catch(exception& e) {
636                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
637                 exit(1);
638         }
639 }
640 //**********************************************************************************************************************
641 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
642         try {
643                 
644                 vector<SharedRAbundVector*> newLookup;
645                 for (int i = 0; i < thislookup.size(); i++) {
646                         SharedRAbundVector* temp = new SharedRAbundVector();
647                         temp->setLabel(thislookup[i]->getLabel());
648                         temp->setGroup(thislookup[i]->getGroup());
649                         newLookup.push_back(temp);
650                 }
651                 
652                 //for each bin
653                 vector<string> newBinLabels;
654                 string snumBins = toString(thislookup[0]->getNumBins());
655                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
656                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
657                 
658                         //look at each sharedRabund and make sure they are not all zero
659                         bool allZero = true;
660                         for (int j = 0; j < thislookup.size(); j++) {
661                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
662                         }
663                         
664                         //if they are not all zero add this bin
665                         if (!allZero) {
666                                 for (int j = 0; j < thislookup.size(); j++) {
667                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
668                                 }
669                                 //if there is a bin label use it otherwise make one
670                                 string binLabel = "Otu";
671                                 string sbinNumber = toString(i+1);
672                                 if (sbinNumber.length() < snumBins.length()) { 
673                                         int diff = snumBins.length() - sbinNumber.length();
674                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
675                                 }
676                                 binLabel += sbinNumber; 
677                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
678                                 
679                                 newBinLabels.push_back(binLabel);
680                         }
681                 }
682
683                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
684
685                 thislookup = newLookup;
686                 m->currentBinLabels = newBinLabels;
687                 
688                 return 0;
689  
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
693                 exit(1);
694         }
695 }
696 //**********************************************************************************************************************
697 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
698         try {
699                 
700                 vector<SharedRAbundFloatVector*> newLookup;
701                 for (int i = 0; i < thislookup.size(); i++) {
702                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
703                         temp->setLabel(thislookup[i]->getLabel());
704                         temp->setGroup(thislookup[i]->getGroup());
705                         newLookup.push_back(temp);
706                 }
707                 
708                 //for each bin
709                 vector<string> newBinLabels;
710                 string snumBins = toString(thislookup[0]->getNumBins());
711                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
712                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
713                         
714                         //look at each sharedRabund and make sure they are not all zero
715                         bool allZero = true;
716                         for (int j = 0; j < thislookup.size(); j++) {
717                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
718                         }
719                         
720                         //if they are not all zero add this bin
721                         if (!allZero) {
722                                 for (int j = 0; j < thislookup.size(); j++) {
723                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
724                                 }
725                                 //if there is a bin label use it otherwise make one
726                                 string binLabel = "Otu";
727                                 string sbinNumber = toString(i+1);
728                                 if (sbinNumber.length() < snumBins.length()) { 
729                                         int diff = snumBins.length() - sbinNumber.length();
730                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
731                                 }
732                                 binLabel += sbinNumber; 
733                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
734                                 
735                                 newBinLabels.push_back(binLabel);
736                         }
737                 }
738                 
739                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
740                 
741                 thislookup = newLookup;
742                 m->currentBinLabels = newBinLabels;
743                 
744                 return 0;
745                 
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
749                 exit(1);
750         }
751 }
752
753 //**********************************************************************************************************************