2 * normalizesharedcommand.cpp
5 * Created by westcott on 9/15/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "normalizesharedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::getValidParameters(){
15 string Array[] = {"groups","label","scale","outputdir","inputdir","norm"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "NormalizeSharedCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 NormalizeSharedCommand::NormalizeSharedCommand(){
27 //initialize outputTypes
28 vector<string> tempOutNames;
29 outputTypes["shared"] = tempOutNames;
32 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
36 //**********************************************************************************************************************
37 vector<string> NormalizeSharedCommand::getRequiredParameters(){
39 vector<string> myArray;
43 m->errorOut(e, "NormalizeSharedCommand", "getRequiredParameters");
47 //**********************************************************************************************************************
48 vector<string> NormalizeSharedCommand::getRequiredFiles(){
50 string Array[] = {"shared"};
51 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
55 m->errorOut(e, "NormalizeSharedCommand", "getRequiredFiles");
59 //**********************************************************************************************************************
61 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
63 globaldata = GlobalData::getInstance();
68 //allow user to run help
69 if(option == "help") { help(); abort = true; }
72 //valid paramters for this command
73 string AlignArray[] = {"groups","label","scale","outputdir","inputdir","norm"};
74 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
76 OptionParser parser(option);
77 map<string,string> parameters = parser.getParameters();
79 ValidParameters validParameter;
81 //check to make sure all parameters are valid for command
82 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
83 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
86 //initialize outputTypes
87 vector<string> tempOutNames;
88 outputTypes["shared"] = tempOutNames;
90 //if the user changes the output directory command factory will send this info to us in the output parameter
91 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
93 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
96 //make sure the user has already run the read.otu command
97 if ((globaldata->getSharedFile() == "")) {
98 m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true;
101 //check for optional parameter and set defaults
102 // ...at some point should added some additional type checking...
103 label = validParameter.validFile(parameters, "label", false);
104 if (label == "not found") { label = ""; }
106 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
107 else { allLines = 1; }
110 //if the user has not specified any labels use the ones from read.otu
112 allLines = globaldata->allLines;
113 labels = globaldata->labels;
116 groups = validParameter.validFile(parameters, "groups", false);
117 if (groups == "not found") { groups = ""; pickedGroups = false; }
120 m->splitAtDash(groups, Groups);
121 globaldata->Groups = Groups;
124 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; }
125 if (method != "totalgroup") { m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
127 string temp = validParameter.validFile(parameters, "norm", false);
128 if (temp == "not found") {
129 norm = 0; //once you have read, set norm to smallest group number
132 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
137 catch(exception& e) {
138 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
143 //**********************************************************************************************************************
145 void NormalizeSharedCommand::help(){
147 m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group or shared file.\n");
148 m->mothurOut("The normalize.shared command parameters are groups, method, norm and label. No parameters are required.\n");
149 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
150 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
151 m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!\n");
152 m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n");
153 m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n");
154 m->mothurOut("Example normalize.shared(groups=A-B-C, scale=averagegroup).\n");
155 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
156 m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n");
157 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
160 catch(exception& e) {
161 m->errorOut(e, "NormalizeSharedCommand", "help");
166 //**********************************************************************************************************************
168 NormalizeSharedCommand::~NormalizeSharedCommand(){}
170 //**********************************************************************************************************************
172 int NormalizeSharedCommand::execute(){
175 if (abort == true) { return 0; }
177 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared";
179 m->openOutputFile(outputFileName, out);
181 read = new ReadOTUFile(globaldata->inputFileName);
182 read->read(&*globaldata);
183 input = globaldata->ginput;
184 lookup = input->getSharedRAbundVectors();
185 string lastLabel = lookup[0]->getLabel();
187 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
188 set<string> processedLabels;
189 set<string> userLabels = labels;
191 //set norm to smallest group number
193 norm = lookup[0]->getNumSeqs();
194 for (int i = 1; i < lookup.size(); i++) {
195 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs(); }
199 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
201 //as long as you are not at the end of the file or done wih the lines you want
202 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
204 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
206 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
208 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
209 normalize(lookup, out);
211 processedLabels.insert(lookup[0]->getLabel());
212 userLabels.erase(lookup[0]->getLabel());
215 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
216 string saveLabel = lookup[0]->getLabel();
218 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
219 lookup = input->getSharedRAbundVectors(lastLabel);
220 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
222 normalize(lookup, out);
224 processedLabels.insert(lookup[0]->getLabel());
225 userLabels.erase(lookup[0]->getLabel());
227 //restore real lastlabel to save below
228 lookup[0]->setLabel(saveLabel);
231 lastLabel = lookup[0]->getLabel();
232 //prevent memory leak
233 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
235 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
237 //get next line to process
238 lookup = input->getSharedRAbundVectors();
241 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
243 //output error messages about any remaining user labels
244 set<string>::iterator it;
245 bool needToRun = false;
246 for (it = userLabels.begin(); it != userLabels.end(); it++) {
247 m->mothurOut("Your file does not include the label " + *it);
248 if (processedLabels.count(lastLabel) != 1) {
249 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
252 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
256 //run last label if you need to
257 if (needToRun == true) {
258 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
259 lookup = input->getSharedRAbundVectors(lastLabel);
261 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
263 normalize(lookup, out);
265 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
268 //reset groups parameter
269 globaldata->Groups.clear();
270 delete input; globaldata->ginput = NULL;
274 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
276 m->mothurOutEndLine();
277 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
278 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
279 m->mothurOutEndLine();
283 catch(exception& e) {
284 m->errorOut(e, "NormalizeSharedCommand", "execute");
288 //**********************************************************************************************************************
290 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
292 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
295 for (int i = 0; i < thisLookUp.size(); i++) {
296 //cout << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
298 for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
300 if (m->control_pressed) { return 0; }
302 int abund = thisLookUp[i]->getAbundance(j);
304 float relabund = 0.0;
307 if (method == "totalgroup") {
308 relabund = abund / (float) thisLookUp[i]->getNumSeqs();
309 float newNorm = relabund * norm;
310 //round to nearest int
311 finalNorm = (int) floor((newNorm + 0.5));
313 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
315 //cout << finalNorm << '\t';
316 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
321 eliminateZeroOTUS(thisLookUp);
323 for (int i = 0; i < thisLookUp.size(); i++) {
324 out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
325 thisLookUp[i]->print(out);
330 catch(exception& e) {
331 m->errorOut(e, "NormalizeSharedCommand", "normalize");
335 //**********************************************************************************************************************
336 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
339 vector<SharedRAbundVector*> newLookup;
340 for (int i = 0; i < thislookup.size(); i++) {
341 SharedRAbundVector* temp = new SharedRAbundVector();
342 temp->setLabel(thislookup[i]->getLabel());
343 temp->setGroup(thislookup[i]->getGroup());
344 newLookup.push_back(temp);
348 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
349 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
351 //look at each sharedRabund and make sure they are not all zero
353 for (int j = 0; j < thislookup.size(); j++) {
354 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
357 //if they are not all zero add this bin
359 for (int j = 0; j < thislookup.size(); j++) {
360 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
365 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
367 thislookup = newLookup;
372 catch(exception& e) {
373 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
377 //**********************************************************************************************************************