2 * normalizesharedcommand.cpp
5 * Created by westcott on 9/15/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "normalizesharedcommand.h"
12 //**********************************************************************************************************************
13 vector<string> NormalizeSharedCommand::getValidParameters(){
15 string Array[] = {"groups","label","method","outputdir","inputdir","norm"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "NormalizeSharedCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 NormalizeSharedCommand::NormalizeSharedCommand(){
28 //initialize outputTypes
29 vector<string> tempOutNames;
30 outputTypes["shared"] = tempOutNames;
33 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
37 //**********************************************************************************************************************
38 vector<string> NormalizeSharedCommand::getRequiredParameters(){
40 vector<string> myArray;
44 m->errorOut(e, "NormalizeSharedCommand", "getRequiredParameters");
48 //**********************************************************************************************************************
49 vector<string> NormalizeSharedCommand::getRequiredFiles(){
51 string Array[] = {"shared"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56 m->errorOut(e, "NormalizeSharedCommand", "getRequiredFiles");
60 //**********************************************************************************************************************
62 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
64 globaldata = GlobalData::getInstance();
69 //allow user to run help
70 if(option == "help") { help(); abort = true; }
73 //valid paramters for this command
74 string AlignArray[] = {"groups","label","method","outputdir","inputdir","norm"};
75 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
77 OptionParser parser(option);
78 map<string,string> parameters = parser.getParameters();
80 ValidParameters validParameter;
82 //check to make sure all parameters are valid for command
83 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
84 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
87 //initialize outputTypes
88 vector<string> tempOutNames;
89 outputTypes["shared"] = tempOutNames;
91 //if the user changes the output directory command factory will send this info to us in the output parameter
92 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
94 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
97 //make sure the user has already run the read.otu command
98 if ((globaldata->getSharedFile() == "")) {
99 m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true;
102 //check for optional parameter and set defaults
103 // ...at some point should added some additional type checking...
104 label = validParameter.validFile(parameters, "label", false);
105 if (label == "not found") { label = ""; }
107 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
108 else { allLines = 1; }
111 //if the user has not specified any labels use the ones from read.otu
113 allLines = globaldata->allLines;
114 labels = globaldata->labels;
117 groups = validParameter.validFile(parameters, "groups", false);
118 if (groups == "not found") { groups = ""; pickedGroups = false; }
121 m->splitAtDash(groups, Groups);
122 globaldata->Groups = Groups;
125 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; }
126 if (method != "totalgroup") { m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
128 string temp = validParameter.validFile(parameters, "norm", false);
129 if (temp == "not found") {
130 norm = 0; //once you have read, set norm to smallest group number
133 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
138 catch(exception& e) {
139 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
144 //**********************************************************************************************************************
146 void NormalizeSharedCommand::help(){
148 m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group or shared file.\n");
149 m->mothurOut("The normalize.shared command parameters are groups, method, norm and label. No parameters are required.\n");
150 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
151 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
152 m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!\n");
153 m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n");
154 m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n");
155 m->mothurOut("Example normalize.shared(groups=A-B-C, scale=totalgroup).\n");
156 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
157 m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n");
158 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
161 catch(exception& e) {
162 m->errorOut(e, "NormalizeSharedCommand", "help");
167 //**********************************************************************************************************************
169 NormalizeSharedCommand::~NormalizeSharedCommand(){}
171 //**********************************************************************************************************************
173 int NormalizeSharedCommand::execute(){
176 if (abort == true) { return 0; }
178 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared";
180 m->openOutputFile(outputFileName, out);
182 read = new ReadOTUFile(globaldata->inputFileName);
183 read->read(&*globaldata);
184 input = globaldata->ginput;
185 lookup = input->getSharedRAbundVectors();
186 string lastLabel = lookup[0]->getLabel();
188 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
189 set<string> processedLabels;
190 set<string> userLabels = labels;
192 //set norm to smallest group number
194 norm = lookup[0]->getNumSeqs();
195 for (int i = 1; i < lookup.size(); i++) {
196 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs(); }
200 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
202 //as long as you are not at the end of the file or done wih the lines you want
203 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
205 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
207 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
209 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
210 normalize(lookup, out);
212 processedLabels.insert(lookup[0]->getLabel());
213 userLabels.erase(lookup[0]->getLabel());
216 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
217 string saveLabel = lookup[0]->getLabel();
219 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
220 lookup = input->getSharedRAbundVectors(lastLabel);
221 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
223 normalize(lookup, out);
225 processedLabels.insert(lookup[0]->getLabel());
226 userLabels.erase(lookup[0]->getLabel());
228 //restore real lastlabel to save below
229 lookup[0]->setLabel(saveLabel);
232 lastLabel = lookup[0]->getLabel();
233 //prevent memory leak
234 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
236 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
238 //get next line to process
239 lookup = input->getSharedRAbundVectors();
242 if (m->control_pressed) { outputTypes.clear(); globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
244 //output error messages about any remaining user labels
245 set<string>::iterator it;
246 bool needToRun = false;
247 for (it = userLabels.begin(); it != userLabels.end(); it++) {
248 m->mothurOut("Your file does not include the label " + *it);
249 if (processedLabels.count(lastLabel) != 1) {
250 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
253 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
257 //run last label if you need to
258 if (needToRun == true) {
259 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
260 lookup = input->getSharedRAbundVectors(lastLabel);
262 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
264 normalize(lookup, out);
266 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
269 //reset groups parameter
270 globaldata->Groups.clear();
271 delete input; globaldata->ginput = NULL;
275 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
277 m->mothurOutEndLine();
278 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
279 m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
280 m->mothurOutEndLine();
284 catch(exception& e) {
285 m->errorOut(e, "NormalizeSharedCommand", "execute");
289 //**********************************************************************************************************************
291 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
293 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
296 for (int i = 0; i < thisLookUp.size(); i++) {
297 //cout << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
299 for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
301 if (m->control_pressed) { return 0; }
303 int abund = thisLookUp[i]->getAbundance(j);
305 float relabund = 0.0;
308 if (method == "totalgroup") {
309 relabund = abund / (float) thisLookUp[i]->getNumSeqs();
310 float newNorm = relabund * norm;
311 //round to nearest int
312 finalNorm = (int) floor((newNorm + 0.5));
313 //cout << thisLookUp[i]->getGroup() << '\t' << abund << '\t' << relabund << '\t' << norm << '\t' << newNorm << '\t' << finalNorm << endl;
315 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
317 //cout << finalNorm << '\t';
318 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
323 eliminateZeroOTUS(thisLookUp);
325 for (int i = 0; i < thisLookUp.size(); i++) {
326 out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
327 thisLookUp[i]->print(out);
332 catch(exception& e) {
333 m->errorOut(e, "NormalizeSharedCommand", "normalize");
337 //**********************************************************************************************************************
338 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
341 vector<SharedRAbundVector*> newLookup;
342 for (int i = 0; i < thislookup.size(); i++) {
343 SharedRAbundVector* temp = new SharedRAbundVector();
344 temp->setLabel(thislookup[i]->getLabel());
345 temp->setGroup(thislookup[i]->getGroup());
346 newLookup.push_back(temp);
350 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
351 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
353 //look at each sharedRabund and make sure they are not all zero
355 for (int j = 0; j < thislookup.size(); j++) {
356 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
359 //if they are not all zero add this bin
361 for (int j = 0; j < thislookup.size(); j++) {
362 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
367 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
369 thislookup = newLookup;
374 catch(exception& e) {
375 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
379 //**********************************************************************************************************************