]> git.donarmstrong.com Git - mothur.git/blob - normalizesharedcommand.cpp
changes while testing
[mothur.git] / normalizesharedcommand.cpp
1 /*
2  *  normalizesharedcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "normalizesharedcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::setParameters(){ 
14         try {
15                 CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(pshared);      
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(prelabund);
17                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
18                 CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "","",false,false,true); parameters.push_back(pmethod);
19                 CommandParameter pnorm("norm", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pnorm);
20                 CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakerelabund);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "NormalizeSharedCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string NormalizeSharedCommand::getHelpString(){ 
36         try {
37                 string helpString = "";
38                 helpString += "The normalize.shared command parameters are shared, relabund, groups, method, norm, makerelabund and label.  shared or relabund is required, unless you have a valid current file.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
40                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
41                 helpString += "The method parameter allows you to select what method you would like to use to normalize. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!\n";
42                 helpString += "The makerelabund parameter allows you to convert a shared file to a relabund file before you normalize. default=f.\n";
43                 helpString += "The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n";
44                 helpString += "The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n";
45                 helpString += "Example normalize.shared(groups=A-B-C, scale=totalgroup).\n";
46                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
47                 helpString += "The normalize.shared command outputs a .norm.shared file.\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "NormalizeSharedCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string NormalizeSharedCommand::getOutputPattern(string type) {
58     try {
59         string pattern = "";
60         
61         if (type == "shared") {  pattern = "[filename],[distance],norm.shared"; } 
62         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
63         
64         return pattern;
65     }
66     catch(exception& e) {
67         m->errorOut(e, "NormalizeSharedCommand", "getOutputPattern");
68         exit(1);
69     }
70 }
71 //**********************************************************************************************************************
72 NormalizeSharedCommand::NormalizeSharedCommand(){       
73         try {
74                 abort = true; calledHelp = true; 
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["shared"] = tempOutNames;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
81                 exit(1);
82         }
83 }
84 //**********************************************************************************************************************
85
86 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
87         try {
88                 abort = false; calledHelp = false;   
89                 allLines = 1;
90                 
91                 //allow user to run help
92                 if(option == "help") { help(); abort = true; calledHelp = true; }
93                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
94                 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string,string> parameters = parser.getParameters();
100                         map<string,string>::iterator it;
101                         
102                         ValidParameters validParameter;
103                         
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["shared"] = tempOutNames;
112                         
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("shared");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
124                                 }
125                                 
126                                 it = parameters.find("relabund");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
132                                 }
133                         }
134                         
135                         sharedfile = validParameter.validFile(parameters, "shared", true);
136                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
137                         else if (sharedfile == "not found") { sharedfile = ""; }
138                         else {  format = "sharedfile"; inputfile = sharedfile; m->setSharedFile(sharedfile); }
139                         
140                         relabundfile = validParameter.validFile(parameters, "relabund", true);
141                         if (relabundfile == "not open") { relabundfile = ""; abort = true; }    
142                         else if (relabundfile == "not found") { relabundfile = ""; }
143                         else {  format = "relabund"; inputfile = relabundfile; m->setRelAbundFile(relabundfile); }
144                         
145                         
146                         if ((sharedfile == "") && (relabundfile == "")) { 
147                                 //is there are current file available for any of these?
148                                 //give priority to shared, then list, then rabund, then sabund
149                                 //if there is a current shared file, use it
150                                 sharedfile = m->getSharedFile(); 
151                                 if (sharedfile != "") { inputfile = sharedfile; format = "sharedfile"; m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
152                                 else { 
153                                         relabundfile = m->getRelAbundFile(); 
154                                         if (relabundfile != "") { inputfile = relabundfile; format = "relabund"; m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
155                                         else { 
156                                                 m->mothurOut("No valid current files. You must provide a list, sabund, rabund, relabund or shared file."); m->mothurOutEndLine(); 
157                                                 abort = true;
158                                         }
159                                 }
160                         }
161                         
162                         
163                         //if the user changes the output directory command factory will send this info to us in the output parameter 
164                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(inputfile);              }
165                         
166                         
167
168                         //check for optional parameter and set defaults
169                         // ...at some point should added some additional type checking...
170                         label = validParameter.validFile(parameters, "label", false);                   
171                         if (label == "not found") { label = ""; }
172                         else { 
173                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
174                                 else { allLines = 1;  }
175                         }
176                         
177                         groups = validParameter.validFile(parameters, "groups", false);                 
178                         if (groups == "not found") { groups = ""; pickedGroups = false; }
179                         else { 
180                                 pickedGroups = true;
181                                 m->splitAtDash(groups, Groups);
182                                 m->setGroups(Groups);
183                         }
184                         
185                         method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
186                         if ((method != "totalgroup") && (method != "zscore")) {  m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The options are totalgroup and zscore. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
187                 
188                         string temp = validParameter.validFile(parameters, "norm", false);                              
189                         if (temp == "not found") {  
190                                 norm = 0;  //once you have read, set norm to smallest group number
191                         }else { 
192                                 m->mothurConvert(temp, norm);
193                                 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
194                         }
195                         
196                         temp = validParameter.validFile(parameters, "makerelabund", false);     if (temp == "") { temp = "f"; }
197                         makeRelabund = m->isTrue(temp);
198                 }
199
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
203                 exit(1);
204         }
205 }
206 //**********************************************************************************************************************
207
208 int NormalizeSharedCommand::execute(){
209         try {
210         
211                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
212                 
213                 input = new InputData(inputfile, format);
214                 
215                 //you are reading a sharedfile and you do not want to make relabund
216                 if ((format == "sharedfile") && (!makeRelabund)) {
217                         lookup = input->getSharedRAbundVectors();
218                         string lastLabel = lookup[0]->getLabel();
219                         
220                         //look for groups whose numseqs is below norm and remove them, warning the user
221                         if (norm != 0) { 
222                                 m->clearGroups();
223                                 vector<string> mGroups;
224                                 vector<SharedRAbundVector*> temp;
225                                 for (int i = 0; i < lookup.size(); i++) {
226                                         if (lookup[i]->getNumSeqs() < norm) { 
227                                                 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
228                                                 delete lookup[i];
229                                         }else { 
230                                                 mGroups.push_back(lookup[i]->getGroup()); 
231                                                 temp.push_back(lookup[i]);
232                                         }
233                                 } 
234                                 lookup = temp;
235                                 m->setGroups(mGroups);
236                         }
237                         
238                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
239                         set<string> processedLabels;
240                         set<string> userLabels = labels;
241                         
242                         if (method == "totalgroup") {
243                                 //set norm to smallest group number
244                                 if (norm == 0) { 
245                                         norm = lookup[0]->getNumSeqs();
246                                         for (int i = 1; i < lookup.size(); i++) {
247                                                 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs();  }
248                                         }  
249                                 }
250                                 
251                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
252                         }
253                         
254                         
255                         //as long as you are not at the end of the file or done wih the lines you want
256                         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
257                                 
258                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups();   return 0; }
259                                 
260                                 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
261                                         
262                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263                                         normalize(lookup);
264                                         
265                                         processedLabels.insert(lookup[0]->getLabel());
266                                         userLabels.erase(lookup[0]->getLabel());
267                                 }
268                                 
269                                 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
270                                         string saveLabel = lookup[0]->getLabel();
271                                         
272                                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
273                                         lookup = input->getSharedRAbundVectors(lastLabel);
274                                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
275                                         
276                                         normalize(lookup);
277                                         
278                                         processedLabels.insert(lookup[0]->getLabel());
279                                         userLabels.erase(lookup[0]->getLabel());
280                                         
281                                         //restore real lastlabel to save below
282                                         lookup[0]->setLabel(saveLabel);
283                                 }
284                                 
285                                 lastLabel = lookup[0]->getLabel();
286                                 //prevent memory leak
287                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
288                                 
289                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();  return 0; }
290                                 
291                                 //get next line to process
292                                 lookup = input->getSharedRAbundVectors();                               
293                         }
294                         
295                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
296                         
297                         //output error messages about any remaining user labels
298                         set<string>::iterator it;
299                         bool needToRun = false;
300                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
301                                 m->mothurOut("Your file does not include the label " + *it); 
302                                 if (processedLabels.count(lastLabel) != 1) {
303                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
304                                         needToRun = true;
305                                 }else {
306                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
307                                 }
308                         }
309                         
310                         //run last label if you need to
311                         if (needToRun == true)  {
312                                 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
313                                 lookup = input->getSharedRAbundVectors(lastLabel);
314                                 
315                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
316                                 
317                                 normalize(lookup);
318                                 
319                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
320                         }
321                         
322                 }else{ //relabund values
323                         lookupFloat = input->getSharedRAbundFloatVectors();
324                         string lastLabel = lookupFloat[0]->getLabel();
325                         
326                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
327                         set<string> processedLabels;
328                         set<string> userLabels = labels;
329                         
330                         //look for groups whose numseqs is below norm and remove them, warning the user
331                         if (norm != 0) { 
332                                 m->clearGroups();
333                                 vector<string> mGroups;
334                                 vector<SharedRAbundFloatVector*> temp;
335                                 for (int i = 0; i < lookupFloat.size(); i++) {
336                                         if (lookupFloat[i]->getNumSeqs() < norm) { 
337                                                 m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
338                                                 delete lookupFloat[i];
339                                         }else { 
340                                                 mGroups.push_back(lookupFloat[i]->getGroup()); 
341                                                 temp.push_back(lookupFloat[i]);
342                                         }
343                                 } 
344                                 lookupFloat = temp;
345                                 m->setGroups(mGroups);
346                         }
347                         
348                         //set norm to smallest group number
349                         if (method == "totalgroup") {
350                                 if (norm == 0) { 
351                                         norm = lookupFloat[0]->getNumSeqs();
352                                         for (int i = 1; i < lookupFloat.size(); i++) {
353                                                 if (lookupFloat[i]->getNumSeqs() < norm) { norm = lookupFloat[i]->getNumSeqs();  }
354                                         }  
355                                 }
356                                 
357                                 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
358                         }
359                         
360                         //as long as you are not at the end of the file or done wih the lines you want
361                         while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
362                                 
363                                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->clearGroups();  return 0; }
364                                 
365                                 if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
366                                         
367                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
368                                         
369                                         normalize(lookupFloat);
370                                         
371                                         processedLabels.insert(lookupFloat[0]->getLabel());
372                                         userLabels.erase(lookupFloat[0]->getLabel());
373                                 }
374                                 
375                                 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
376                                         string saveLabel = lookupFloat[0]->getLabel();
377                                         
378                                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }  
379                                         lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
380                                         
381                                         m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
382                 
383                                         normalize(lookupFloat);
384                                         
385                                         processedLabels.insert(lookupFloat[0]->getLabel());
386                                         userLabels.erase(lookupFloat[0]->getLabel());
387                                         
388                                         //restore real lastlabel to save below
389                                         lookupFloat[0]->setLabel(saveLabel);
390                                 }
391                                 
392                                 lastLabel = lookupFloat[0]->getLabel();
393                                 //prevent memory leak
394                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
395                                 
396                                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
397                                 
398                                 //get next line to process
399                                 lookupFloat = input->getSharedRAbundFloatVectors();                             
400                         }
401                         
402                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
403                         
404                         //output error messages about any remaining user labels
405                         set<string>::iterator it;
406                         bool needToRun = false;
407                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
408                                 m->mothurOut("Your file does not include the label " + *it); 
409                                 if (processedLabels.count(lastLabel) != 1) {
410                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
411                                         needToRun = true;
412                                 }else {
413                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
414                                 }
415                         }
416                         
417                         //run last label if you need to
418                         if (needToRun == true)  {
419                                 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }  
420                                 lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
421                                 
422                                 m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
423                                 
424                                 normalize(lookupFloat);
425                                 
426                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
427                         }
428                         
429                 }
430                 //reset groups parameter
431                 m->clearGroups();  
432                 delete input;
433                 
434                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); return 0;}
435                 
436                 m->mothurOutEndLine();
437                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
438                 //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
439                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
440                 m->mothurOutEndLine();
441                 
442                 //set shared file as new current sharedfile
443                 string current = "";
444                 itTypes = outputTypes.find("shared");
445                 if (itTypes != outputTypes.end()) {
446                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
447                 }
448                 
449                 return 0;
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "NormalizeSharedCommand", "execute");
453                 exit(1);
454         }
455 }
456 //**********************************************************************************************************************
457
458 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
459         try {
460                 //save mothurOut's binLabels to restore for next label
461                 vector<string> saveBinLabels = m->currentBinLabels;
462                 
463                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
464                 
465         map<string, string> variables; 
466         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
467         variables["[distance]"] = thisLookUp[0]->getLabel();
468                 string outputFileName = getOutputFileName("shared",variables);
469         
470                 ofstream out;
471                 m->openOutputFile(outputFileName, out);
472                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
473                                 
474                 if (method == "totalgroup") { 
475                         
476                         //save numSeqs since they will change as the data is normalized
477                         vector<int> sizes;
478                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
479                                         
480                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
481                                                 
482                                         for (int i = 0; i < thisLookUp.size(); i++) {
483                                                         
484                                                 if (m->control_pressed) { out.close(); return 0; }
485                                                         
486                                                 int abund = thisLookUp[i]->getAbundance(j);
487                                                         
488                                                 float relabund = abund / (float) sizes[i];
489                                                 float newNorm = relabund * norm;
490                                                 
491                                                 //round to nearest int
492                                                 int finalNorm = (int) floor((newNorm + 0.5));
493                                                 
494                                                 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
495                                         }
496                                 }
497                                         
498                 }else if (method == "zscore") {
499                         
500                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
501                                 
502                                 if (m->control_pressed) { out.close(); return 0; }
503                                 
504                                 //calc mean
505                                 float mean = 0.0;
506                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
507                                 mean /= (float) thisLookUp.size();
508                                         
509                                 //calc standard deviation
510                                 float sumSquared = 0.0;
511                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += (((float)thisLookUp[i]->getAbundance(j) - mean) * ((float)thisLookUp[i]->getAbundance(j) - mean)); }
512                                 sumSquared /= (float) thisLookUp.size();
513                                 
514                                 float standardDev = sqrt(sumSquared);
515                                         
516                                 for (int i = 0; i < thisLookUp.size(); i++) {
517                                         int finalNorm = 0;
518                                         if (standardDev != 0) { // stop divide by zero
519                                                 float newNorm = ((float)thisLookUp[i]->getAbundance(j) - mean) / standardDev;
520                                                 //round to nearest int
521                                                 finalNorm = (int) floor((newNorm + 0.5));
522                                         }
523                                         
524                                         thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
525                                 }
526                         }
527                                                 
528                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
529                                 
530                                 
531                                                 
532                 eliminateZeroOTUS(thisLookUp);
533                 
534                 thisLookUp[0]->printHeaders(out); 
535                  
536                 for (int i = 0; i < thisLookUp.size(); i++) {
537                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
538                         thisLookUp[i]->print(out);
539                 }
540                 
541                 out.close();
542                 
543                 m->currentBinLabels = saveBinLabels;
544                 
545                 return 0;
546         }
547         catch(exception& e) {
548                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
549                 exit(1);
550         }
551 }
552 //**********************************************************************************************************************
553
554 int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp){
555         try {
556                 
557                 //save mothurOut's binLabels to restore for next label
558                 vector<string> saveBinLabels = m->currentBinLabels;
559                 
560         map<string, string> variables; 
561         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
562         variables["[distance]"] = thisLookUp[0]->getLabel();
563                 string outputFileName = getOutputFileName("shared",variables);
564                 ofstream out;
565                 m->openOutputFile(outputFileName, out);
566                 outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
567                 
568                 
569                 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
570                 
571                 if (method == "totalgroup") { 
572                         
573                         //save numSeqs since they will change as the data is normalized
574                         vector<float> sizes;
575                         for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
576                         
577                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
578                                 
579                                 for (int i = 0; i < thisLookUp.size(); i++) {
580                                         
581                                         if (m->control_pressed) { out.close(); return 0; }
582                                         
583                                         float abund = thisLookUp[i]->getAbundance(j);
584                                         
585                                         float relabund = abund / (float) sizes[i];
586                                         float newNorm = relabund * norm;
587                                         
588                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
589                                 }
590                         }
591                         
592                 }else if (method == "zscore") {
593                         for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
594                                 
595                                 if (m->control_pressed) { out.close(); return 0; }
596                                 
597                                 //calc mean
598                                 float mean = 0.0;
599                                 for (int i = 0; i < thisLookUp.size(); i++) {  mean += thisLookUp[i]->getAbundance(j); }
600                                 mean /= (float) thisLookUp.size();
601                                 
602                                 //calc standard deviation
603                                 float sumSquared = 0.0;
604                                 for (int i = 0; i < thisLookUp.size(); i++) { sumSquared += ((thisLookUp[i]->getAbundance(j) - mean) * (thisLookUp[i]->getAbundance(j) - mean)); }
605                                 sumSquared /= (float) thisLookUp.size();
606                                 
607                                 float standardDev = sqrt(sumSquared);
608                                 
609                                 for (int i = 0; i < thisLookUp.size(); i++) {
610                                         float newNorm = 0.0;
611                                         if (standardDev != 0) { // stop divide by zero
612                                                 newNorm = (thisLookUp[i]->getAbundance(j) - mean) / standardDev;
613                                         }
614                                         thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
615                                 }
616                         }                       
617                         
618                 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
619                 
620                 
621                 eliminateZeroOTUS(thisLookUp);
622                 
623                 thisLookUp[0]->printHeaders(out); 
624                 
625                 for (int i = 0; i < thisLookUp.size(); i++) {
626                         out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
627                         thisLookUp[i]->print(out);
628                 }
629                 
630                 out.close();
631                 
632                 m->currentBinLabels = saveBinLabels;
633                 
634                 return 0;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "NormalizeSharedCommand", "normalize");
638                 exit(1);
639         }
640 }
641 //**********************************************************************************************************************
642 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
643         try {
644                 
645                 vector<SharedRAbundVector*> newLookup;
646                 for (int i = 0; i < thislookup.size(); i++) {
647                         SharedRAbundVector* temp = new SharedRAbundVector();
648                         temp->setLabel(thislookup[i]->getLabel());
649                         temp->setGroup(thislookup[i]->getGroup());
650                         newLookup.push_back(temp);
651                 }
652                 
653                 //for each bin
654                 vector<string> newBinLabels;
655                 string snumBins = toString(thislookup[0]->getNumBins());
656                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
657                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
658                 
659                         //look at each sharedRabund and make sure they are not all zero
660                         bool allZero = true;
661                         for (int j = 0; j < thislookup.size(); j++) {
662                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
663                         }
664                         
665                         //if they are not all zero add this bin
666                         if (!allZero) {
667                                 for (int j = 0; j < thislookup.size(); j++) {
668                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
669                                 }
670                                 //if there is a bin label use it otherwise make one
671                                 string binLabel = "Otu";
672                                 string sbinNumber = toString(i+1);
673                                 if (sbinNumber.length() < snumBins.length()) { 
674                                         int diff = snumBins.length() - sbinNumber.length();
675                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
676                                 }
677                                 binLabel += sbinNumber; 
678                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
679                                 
680                                 newBinLabels.push_back(binLabel);
681                         }
682                 }
683
684                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
685
686                 thislookup = newLookup;
687                 m->currentBinLabels = newBinLabels;
688                 
689                 return 0;
690  
691         }
692         catch(exception& e) {
693                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
694                 exit(1);
695         }
696 }
697 //**********************************************************************************************************************
698 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thislookup) {
699         try {
700                 
701                 vector<SharedRAbundFloatVector*> newLookup;
702                 for (int i = 0; i < thislookup.size(); i++) {
703                         SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
704                         temp->setLabel(thislookup[i]->getLabel());
705                         temp->setGroup(thislookup[i]->getGroup());
706                         newLookup.push_back(temp);
707                 }
708                 
709                 //for each bin
710                 vector<string> newBinLabels;
711                 string snumBins = toString(thislookup[0]->getNumBins());
712                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
713                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
714                         
715                         //look at each sharedRabund and make sure they are not all zero
716                         bool allZero = true;
717                         for (int j = 0; j < thislookup.size(); j++) {
718                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
719                         }
720                         
721                         //if they are not all zero add this bin
722                         if (!allZero) {
723                                 for (int j = 0; j < thislookup.size(); j++) {
724                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
725                                 }
726                                 //if there is a bin label use it otherwise make one
727                                 string binLabel = "Otu";
728                                 string sbinNumber = toString(i+1);
729                                 if (sbinNumber.length() < snumBins.length()) { 
730                                         int diff = snumBins.length() - sbinNumber.length();
731                                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
732                                 }
733                                 binLabel += sbinNumber; 
734                                 if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
735                                 
736                                 newBinLabels.push_back(binLabel);
737                         }
738                 }
739                 
740                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
741                 
742                 thislookup = newLookup;
743                 m->currentBinLabels = newBinLabels;
744                 
745                 return 0;
746                 
747         }
748         catch(exception& e) {
749                 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
750                 exit(1);
751         }
752 }
753
754 //**********************************************************************************************************************