2 * normalizesharedcommand.cpp
5 * Created by westcott on 9/15/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "normalizesharedcommand.h"
12 //**********************************************************************************************************************
14 NormalizeSharedCommand::NormalizeSharedCommand(string option) {
16 globaldata = GlobalData::getInstance();
21 //allow user to run help
22 if(option == "help") { help(); abort = true; }
25 //valid paramters for this command
26 string AlignArray[] = {"groups","label","scale","outputdir","inputdir","norm"};
27 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
29 OptionParser parser(option);
30 map<string,string> parameters = parser.getParameters();
32 ValidParameters validParameter;
34 //check to make sure all parameters are valid for command
35 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
36 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
39 //if the user changes the output directory command factory will send this info to us in the output parameter
40 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
42 outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
45 //make sure the user has already run the read.otu command
46 if ((globaldata->getSharedFile() == "")) {
47 m->mothurOut("You must read a list and a group, or a shared file before you can use the normalize.shared command."); m->mothurOutEndLine(); abort = true;
50 //check for optional parameter and set defaults
51 // ...at some point should added some additional type checking...
52 label = validParameter.validFile(parameters, "label", false);
53 if (label == "not found") { label = ""; }
55 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
56 else { allLines = 1; }
59 //if the user has not specified any labels use the ones from read.otu
61 allLines = globaldata->allLines;
62 labels = globaldata->labels;
65 groups = validParameter.validFile(parameters, "groups", false);
66 if (groups == "not found") { groups = ""; pickedGroups = false; }
69 m->splitAtDash(groups, Groups);
70 globaldata->Groups = Groups;
73 method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; }
74 if (method != "totalgroup") { m->mothurOut(method + " is not a valid scaling option for the normalize.shared command. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!"); m->mothurOutEndLine(); abort = true; }
76 string temp = validParameter.validFile(parameters, "norm", false);
77 if (temp == "not found") {
78 norm = 0; //once you have read, set norm to smallest group number
81 if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
87 m->errorOut(e, "NormalizeSharedCommand", "NormalizeSharedCommand");
92 //**********************************************************************************************************************
94 void NormalizeSharedCommand::help(){
96 m->mothurOut("The normalize.shared command can only be executed after a successful read.otu command of a list and group or shared file.\n");
97 m->mothurOut("The normalize.shared command parameters are groups, method, norm and label. No parameters are required.\n");
98 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
99 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
100 m->mothurOut("The method parameter allows you to select what method you would like to use to normalize. The only choice is totalgroup. We hope to add more ways to normalize in the future, suggestions are welcome!\n");
101 m->mothurOut("The norm parameter allows you to number you would like to normalize to. By default this is set to the number of sequences in your smallest group.\n");
102 m->mothurOut("The normalize.shared command should be in the following format: normalize.shared(groups=yourGroups, label=yourLabels).\n");
103 m->mothurOut("Example normalize.shared(groups=A-B-C, scale=averagegroup).\n");
104 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
105 m->mothurOut("The normalize.shared command outputs a .norm.shared file.\n");
106 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
109 catch(exception& e) {
110 m->errorOut(e, "NormalizeSharedCommand", "help");
115 //**********************************************************************************************************************
117 NormalizeSharedCommand::~NormalizeSharedCommand(){}
119 //**********************************************************************************************************************
121 int NormalizeSharedCommand::execute(){
124 if (abort == true) { return 0; }
126 string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "norm.shared";
128 m->openOutputFile(outputFileName, out);
130 read = new ReadOTUFile(globaldata->inputFileName);
131 read->read(&*globaldata);
132 input = globaldata->ginput;
133 lookup = input->getSharedRAbundVectors();
134 string lastLabel = lookup[0]->getLabel();
136 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
137 set<string> processedLabels;
138 set<string> userLabels = labels;
140 //set norm to smallest group number
142 norm = lookup[0]->getNumSeqs();
143 for (int i = 1; i < lookup.size(); i++) {
144 if (lookup[i]->getNumSeqs() < norm) { norm = lookup[i]->getNumSeqs(); }
148 m->mothurOut("Normalizing to " + toString(norm) + "."); m->mothurOutEndLine();
150 //as long as you are not at the end of the file or done wih the lines you want
151 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
153 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
155 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
157 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
158 normalize(lookup, out);
160 processedLabels.insert(lookup[0]->getLabel());
161 userLabels.erase(lookup[0]->getLabel());
164 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
165 string saveLabel = lookup[0]->getLabel();
167 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
168 lookup = input->getSharedRAbundVectors(lastLabel);
169 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
171 normalize(lookup, out);
173 processedLabels.insert(lookup[0]->getLabel());
174 userLabels.erase(lookup[0]->getLabel());
176 //restore real lastlabel to save below
177 lookup[0]->setLabel(saveLabel);
180 lastLabel = lookup[0]->getLabel();
181 //prevent memory leak
182 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
184 if (m->control_pressed) { globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
186 //get next line to process
187 lookup = input->getSharedRAbundVectors();
190 if (m->control_pressed) { globaldata->Groups.clear(); delete read; out.close(); remove(outputFileName.c_str()); return 0; }
192 //output error messages about any remaining user labels
193 set<string>::iterator it;
194 bool needToRun = false;
195 for (it = userLabels.begin(); it != userLabels.end(); it++) {
196 m->mothurOut("Your file does not include the label " + *it);
197 if (processedLabels.count(lastLabel) != 1) {
198 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
201 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
205 //run last label if you need to
206 if (needToRun == true) {
207 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
208 lookup = input->getSharedRAbundVectors(lastLabel);
210 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
212 normalize(lookup, out);
214 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
217 //reset groups parameter
218 globaldata->Groups.clear();
219 delete input; globaldata->ginput = NULL;
223 if (m->control_pressed) { remove(outputFileName.c_str()); return 0;}
225 m->mothurOutEndLine();
226 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
227 m->mothurOut(outputFileName); m->mothurOutEndLine();
228 m->mothurOutEndLine();
232 catch(exception& e) {
233 m->errorOut(e, "NormalizeSharedCommand", "execute");
237 //**********************************************************************************************************************
239 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
241 if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
244 for (int i = 0; i < thisLookUp.size(); i++) {
245 //cout << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
247 for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
249 if (m->control_pressed) { return 0; }
251 int abund = thisLookUp[i]->getAbundance(j);
253 float relabund = 0.0;
256 if (method == "totalgroup") {
257 relabund = abund / (float) thisLookUp[i]->getNumSeqs();
258 float newNorm = relabund * norm;
259 //round to nearest int
260 finalNorm = (int) floor((newNorm + 0.5));
262 }else{ m->mothurOut(method + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
264 //cout << finalNorm << '\t';
265 thisLookUp[i]->set(j, finalNorm, thisLookUp[i]->getGroup());
270 eliminateZeroOTUS(thisLookUp);
272 for (int i = 0; i < thisLookUp.size(); i++) {
273 out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t' << thisLookUp[i]->getNumBins() << '\t';
274 thisLookUp[i]->print(out);
279 catch(exception& e) {
280 m->errorOut(e, "NormalizeSharedCommand", "normalize");
284 //**********************************************************************************************************************
285 int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
288 vector<SharedRAbundVector*> newLookup;
289 for (int i = 0; i < thislookup.size(); i++) {
290 SharedRAbundVector* temp = new SharedRAbundVector();
291 temp->setLabel(thislookup[i]->getLabel());
292 temp->setGroup(thislookup[i]->getGroup());
293 newLookup.push_back(temp);
297 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
298 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
300 //look at each sharedRabund and make sure they are not all zero
302 for (int j = 0; j < thislookup.size(); j++) {
303 if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
306 //if they are not all zero add this bin
308 for (int j = 0; j < thislookup.size(); j++) {
309 newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
314 for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
316 thislookup = newLookup;
321 catch(exception& e) {
322 m->errorOut(e, "NormalizeSharedCommand", "eliminateZeroOTUS");
326 //**********************************************************************************************************************