2 * removerarecommand.cpp
5 * Created by westcott on 1/21/11.
6 * Copyright 2011 Schloss Lab. All rights reserved.
10 #include "removerarecommand.h"
11 #include "sequence.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> RemoveRareCommand::setParameters(){
19 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
20 CommandParameter prabund("rabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(prabund);
21 CommandParameter psabund("sabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psabund);
22 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pshared);
23 CommandParameter pcount("count", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pcount);
24 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
25 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
26 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
27 CommandParameter pnseqs("nseqs", "Number", "", "0", "", "", "",false,true); parameters.push_back(pnseqs);
28 CommandParameter pbygroup("bygroup", "Boolean", "", "f", "", "", "",false,true); parameters.push_back(pbygroup);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "RemoveRareCommand", "setParameters");
41 //**********************************************************************************************************************
42 string RemoveRareCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The remove.rare command parameters are list, rabund, sabund, shared, group, count, label, groups, bygroup and nseqs.\n";
46 helpString += "The remove.rare command reads one of the following file types: list, rabund, sabund or shared file. It outputs a new file after removing the rare otus.\n";
47 helpString += "The groups parameter allows you to specify which of the groups you would like analyzed. Default=all. You may separate group names with dashes.\n";
48 helpString += "The label parameter is used to analyze specific labels in your input. default=all. You may separate label names with dashes.\n";
49 helpString += "The bygroup parameter is only valid with the shared file. default=f, meaning remove any OTU that has nseqs or fewer sequences across all groups.\n";
50 helpString += "bygroups=T means remove any OTU that has nseqs or fewer sequences in each group (if groupA has 1 sequence and group B has 100 sequences in OTUZ and nseqs=1, then set the groupA count for OTUZ to 0 and keep groupB's count at 100.) \n";
51 helpString += "The nseqs parameter allows you to set the cutoff for an otu to be deemed rare. It is required.\n";
52 helpString += "The remove.rare command should be in the following format: remove.rare(shared=yourSharedFile, nseqs=yourRareCutoff).\n";
53 helpString += "Example remove.rare(shared=amazon.fn.shared, nseqs=2).\n";
54 helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourSharedFile).\n";
58 m->errorOut(e, "RemoveRareCommand", "getHelpString");
62 //**********************************************************************************************************************
63 string RemoveRareCommand::getOutputFileNameTag(string type, string inputName=""){
65 string outputFileName = "";
66 map<string, vector<string> >::iterator it;
68 //is this a type this command creates
69 it = outputTypes.find(type);
70 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
72 if (type == "rabund") { outputFileName = "pick" + m->getExtension(inputName); }
73 else if (type == "sabund") { outputFileName = "pick" + m->getExtension(inputName); }
74 else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
75 else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
76 else if (type == "count") { outputFileName = "pick" + m->getExtension(inputName); }
77 else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
78 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
80 return outputFileName;
83 m->errorOut(e, "RemoveRareCommand", "getOutputFileNameTag");
88 //**********************************************************************************************************************
89 RemoveRareCommand::RemoveRareCommand(){
91 abort = true; calledHelp = true;
93 vector<string> tempOutNames;
94 outputTypes["rabund"] = tempOutNames;
95 outputTypes["sabund"] = tempOutNames;
96 outputTypes["list"] = tempOutNames;
97 outputTypes["group"] = tempOutNames;
98 outputTypes["count"] = tempOutNames;
99 outputTypes["shared"] = tempOutNames;
101 catch(exception& e) {
102 m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand");
106 //**********************************************************************************************************************
107 RemoveRareCommand::RemoveRareCommand(string option) {
109 abort = false; calledHelp = false;
112 //allow user to run help
113 if(option == "help") { help(); abort = true; calledHelp = true; }
114 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117 vector<string> myArray = setParameters();
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
123 map<string,string>::iterator it;
125 //check to make sure all parameters are valid for command
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["rabund"] = tempOutNames;
133 outputTypes["sabund"] = tempOutNames;
134 outputTypes["list"] = tempOutNames;
135 outputTypes["group"] = tempOutNames;
136 outputTypes["shared"] = tempOutNames;
137 outputTypes["count"] = tempOutNames;
139 //if the user changes the output directory command factory will send this info to us in the output parameter
140 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
142 //if the user changes the input directory command factory will send this info to us in the output parameter
143 string inputDir = validParameter.validFile(parameters, "inputdir", false);
144 if (inputDir == "not found"){ inputDir = ""; }
147 it = parameters.find("list");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["list"] = inputDir + it->second; }
155 it = parameters.find("group");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["group"] = inputDir + it->second; }
163 it = parameters.find("sabund");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["sabund"] = inputDir + it->second; }
171 it = parameters.find("rabund");
172 //user has given a template file
173 if(it != parameters.end()){
174 path = m->hasPath(it->second);
175 //if the user has not given a path then, add inputdir. else leave path alone.
176 if (path == "") { parameters["rabund"] = inputDir + it->second; }
179 it = parameters.find("shared");
180 //user has given a template file
181 if(it != parameters.end()){
182 path = m->hasPath(it->second);
183 //if the user has not given a path then, add inputdir. else leave path alone.
184 if (path == "") { parameters["shared"] = inputDir + it->second; }
187 it = parameters.find("count");
188 //user has given a template file
189 if(it != parameters.end()){
190 path = m->hasPath(it->second);
191 //if the user has not given a path then, add inputdir. else leave path alone.
192 if (path == "") { parameters["count"] = inputDir + it->second; }
197 //check for file parameters
198 listfile = validParameter.validFile(parameters, "list", true);
199 if (listfile == "not open") { abort = true; }
200 else if (listfile == "not found") { listfile = ""; }
201 else { m->setListFile(listfile); }
203 sabundfile = validParameter.validFile(parameters, "sabund", true);
204 if (sabundfile == "not open") { abort = true; }
205 else if (sabundfile == "not found") { sabundfile = ""; }
206 else { m->setSabundFile(sabundfile); }
208 rabundfile = validParameter.validFile(parameters, "rabund", true);
209 if (rabundfile == "not open") { abort = true; }
210 else if (rabundfile == "not found") { rabundfile = ""; }
211 else { m->setRabundFile(rabundfile); }
213 groupfile = validParameter.validFile(parameters, "group", true);
214 if (groupfile == "not open") { groupfile = ""; abort = true; }
215 else if (groupfile == "not found") { groupfile = ""; }
216 else { m->setGroupFile(groupfile); }
218 sharedfile = validParameter.validFile(parameters, "shared", true);
219 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
220 else if (sharedfile == "not found") { sharedfile = ""; }
221 else { m->setSharedFile(sharedfile); }
223 countfile = validParameter.validFile(parameters, "count", true);
224 if (countfile == "not open") { countfile = ""; abort = true; }
225 else if (countfile == "not found") { countfile = ""; }
226 else { m->setCountTableFile(countfile); }
228 if ((groupfile != "") && (countfile != "")) {
229 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
232 if ((sharedfile == "") && (listfile == "") && (rabundfile == "") && (sabundfile == "")) {
233 //is there are current file available for any of these?
234 //give priority to shared, then list, then rabund, then sabund
235 //if there is a current shared file, use it
236 sharedfile = m->getSharedFile();
237 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
239 listfile = m->getListFile();
240 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
242 rabundfile = m->getRabundFile();
243 if (rabundfile != "") { m->mothurOut("Using " + rabundfile + " as input file for the rabund parameter."); m->mothurOutEndLine(); }
245 sabundfile = m->getSabundFile();
246 if (sabundfile != "") { m->mothurOut("Using " + sabundfile + " as input file for the sabund parameter."); m->mothurOutEndLine(); }
248 m->mothurOut("No valid current files. You must provide a list, sabund, rabund or shared file."); m->mothurOutEndLine();
256 groups = validParameter.validFile(parameters, "groups", false);
257 if (groups == "not found") { groups = "all"; }
258 m->splitAtDash(groups, Groups);
260 label = validParameter.validFile(parameters, "label", false);
261 if (label == "not found") { label = ""; }
263 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
264 else { allLines = 1; }
267 string temp = validParameter.validFile(parameters, "nseqs", false);
268 if (temp == "not found") { m->mothurOut("nseqs is a required parameter."); m->mothurOutEndLine(); abort = true; }
269 else { m->mothurConvert(temp, nseqs); }
271 temp = validParameter.validFile(parameters, "bygroup", false); if (temp == "not found") { temp = "f"; }
272 byGroup = m->isTrue(temp);
274 if (byGroup && (sharedfile == "")) { m->mothurOut("The byGroup parameter is only valid with a shared file."); m->mothurOutEndLine(); }
276 if (((groupfile != "") || (countfile != "")) && (listfile == "")) { m->mothurOut("A group or count file is only valid with a list file."); m->mothurOutEndLine(); groupfile = ""; countfile = ""; }
280 catch(exception& e) {
281 m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand");
285 //**********************************************************************************************************************
287 int RemoveRareCommand::execute(){
290 if (abort == true) { if (calledHelp) { return 0; } return 2; }
292 if (m->control_pressed) { return 0; }
294 //read through the correct file and output lines you want to keep
295 if (sabundfile != "") { processSabund(); }
296 if (rabundfile != "") { processRabund(); }
297 if (listfile != "") { processList(); }
298 if (sharedfile != "") { processShared(); }
300 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
302 if (outputNames.size() != 0) {
303 m->mothurOutEndLine();
304 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
305 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
306 m->mothurOutEndLine();
308 //set rabund file as new current rabundfile
310 itTypes = outputTypes.find("rabund");
311 if (itTypes != outputTypes.end()) {
312 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
315 itTypes = outputTypes.find("sabund");
316 if (itTypes != outputTypes.end()) {
317 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
320 itTypes = outputTypes.find("group");
321 if (itTypes != outputTypes.end()) {
322 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
325 itTypes = outputTypes.find("list");
326 if (itTypes != outputTypes.end()) {
327 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
330 itTypes = outputTypes.find("shared");
331 if (itTypes != outputTypes.end()) {
332 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
335 itTypes = outputTypes.find("count");
336 if (itTypes != outputTypes.end()) {
337 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
344 catch(exception& e) {
345 m->errorOut(e, "RemoveRareCommand", "execute");
350 //**********************************************************************************************************************
351 int RemoveRareCommand::processList(){
353 string thisOutputDir = outputDir;
354 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
355 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
356 string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
357 string outputCountFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
359 ofstream out, outGroup;
360 m->openOutputFile(outputFileName, out);
362 bool wroteSomething = false;
364 //you must provide a label because the names in the listfile need to be consistent
365 string thisLabel = "";
366 if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
367 else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
368 else { thisLabel = *labels.begin(); }
370 InputData input(listfile, "list");
371 ListVector* list = input.getListVector();
373 //get first one or the one we want
374 if (thisLabel != "") {
375 //use smart distancing
376 set<string> userLabels; userLabels.insert(thisLabel);
377 set<string> processedLabels;
378 string lastLabel = list->getLabel();
379 while((list != NULL) && (userLabels.size() != 0)) {
380 if(userLabels.count(list->getLabel()) == 1){
381 processedLabels.insert(list->getLabel());
382 userLabels.erase(list->getLabel());
386 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
387 processedLabels.insert(list->getLabel());
388 userLabels.erase(list->getLabel());
390 list = input.getListVector(lastLabel);
393 lastLabel = list->getLabel();
395 list = input.getListVector();
397 if (userLabels.size() != 0) {
398 m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine();
399 list = input.getListVector(lastLabel);
403 //if groupfile is given then use it
406 if (groupfile != "") {
407 groupMap = new GroupMap(groupfile); groupMap->readMap();
409 vector<string> namesGroups = groupMap->getNamesOfGroups();
410 util.setGroups(Groups, namesGroups);
411 m->openOutputFile(outputGroupFileName, outGroup);
412 }else if (countfile != "") {
413 ct.readTable(countfile);
414 if (ct.hasGroupInfo()) {
415 vector<string> namesGroups = ct.getNamesOfGroups();
417 util.setGroups(Groups, namesGroups);
423 //make a new list vector
425 newList.setLabel(list->getLabel());
428 for (int i = 0; i < list->getNumBins(); i++) {
429 if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; }
431 //parse out names that are in accnos file
432 string binnames = list->get(i);
433 vector<string> names;
434 string saveBinNames = binnames;
435 m->splitAtComma(binnames, names);
436 int binsize = names.size();
438 vector<string> newGroupFile;
439 if (groupfile != "") {
440 vector<string> newNames;
442 for(int k = 0; k < names.size(); k++) {
443 string group = groupMap->getGroup(names[k]);
445 if (m->inUsersGroups(group, Groups)) {
446 newGroupFile.push_back(names[k] + "\t" + group);
448 newNames.push_back(names[k]);
449 saveBinNames += names[k] + ",";
452 names = newNames; binsize = names.size();
453 saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
454 }else if (countfile != "") {
457 for(int k = 0; k < names.size(); k++) {
458 if (ct.hasGroupInfo()) {
459 vector<string> thisSeqsGroups = ct.getGroups(names[k]);
461 int thisSeqsCount = 0;
462 for (int n = 0; n < thisSeqsGroups.size(); n++) {
463 if (m->inUsersGroups(thisSeqsGroups[n], Groups)) {
464 thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]);
467 binsize += thisSeqsCount;
468 //if you don't have any seqs from the groups the user wants, then remove you.
469 if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); }
470 else { saveBinNames += names[k] + ","; }
472 binsize += ct.getNumSeqs(names[k]);
473 saveBinNames += names[k] + ",";
476 saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
479 if (binsize > nseqs) { //keep bin
480 newList.push_back(saveBinNames);
481 if (groupfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } }
482 else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { ct.remove(newGroupFile[k]); } }
483 }else { if (countfile != "") { for(int k = 0; k < names.size(); k++) { ct.remove(names[k]); } } }
486 //print new listvector
487 if (newList.getNumBins() != 0) {
488 wroteSomething = true;
494 if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
495 if (countfile != "") {
496 if (ct.hasGroupInfo()) {
497 vector<string> allGroups = ct.getNamesOfGroups();
498 for (int i = 0; i < allGroups.size(); i++) {
499 if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); }
503 ct.printTable(outputCountFileName);
504 outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName);
507 if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); }
508 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
512 catch(exception& e) {
513 m->errorOut(e, "RemoveRareCommand", "processList");
517 //**********************************************************************************************************************
518 int RemoveRareCommand::processSabund(){
520 string thisOutputDir = outputDir;
521 if (outputDir == "") { thisOutputDir += m->hasPath(sabundfile); }
522 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + getOutputFileNameTag("sabund", sabundfile);
523 outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
526 m->openOutputFile(outputFileName, out);
528 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
529 InputData input(sabundfile, "sabund");
530 SAbundVector* sabund = input.getSAbundVector();
531 string lastLabel = sabund->getLabel();
532 set<string> processedLabels;
533 set<string> userLabels = labels;
535 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
537 if (m->control_pressed) { delete sabund; out.close(); return 0; }
539 if(allLines == 1 || labels.count(sabund->getLabel()) == 1){
541 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
542 processedLabels.insert(sabund->getLabel());
543 userLabels.erase(sabund->getLabel());
545 if (sabund->getMaxRank() > nseqs) {
546 for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); }
547 }else { sabund->clear(); }
549 if (sabund->getNumBins() > 0) { sabund->print(out); }
552 if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
553 string saveLabel = sabund->getLabel();
556 sabund = input.getSAbundVector(lastLabel);
558 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
559 processedLabels.insert(sabund->getLabel());
560 userLabels.erase(sabund->getLabel());
562 if (sabund->getMaxRank() > nseqs) {
563 for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); }
564 }else { sabund->clear(); }
566 if (sabund->getNumBins() > 0) { sabund->print(out); }
568 //restore real lastlabel to save below
569 sabund->setLabel(saveLabel);
572 lastLabel = sabund->getLabel();
575 sabund = input.getSAbundVector();
578 if (m->control_pressed) { out.close(); return 0; }
580 //output error messages about any remaining user labels
581 set<string>::iterator it;
582 bool needToRun = false;
583 for (it = userLabels.begin(); it != userLabels.end(); it++) {
584 m->mothurOut("Your file does not include the label " + *it);
585 if (processedLabels.count(lastLabel) != 1) {
586 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
589 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
593 //run last label if you need to
594 if (needToRun == true) {
595 if (sabund != NULL) { delete sabund; }
596 sabund = input.getSAbundVector(lastLabel);
598 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
600 if (sabund->getMaxRank() > nseqs) {
601 for(int i = 1; i <=nseqs; i++) { sabund->set(i, 0); }
602 }else { sabund->clear(); }
604 if (sabund->getNumBins() > 0) { sabund->print(out); }
611 catch(exception& e) {
612 m->errorOut(e, "RemoveRareCommand", "processSabund");
616 //**********************************************************************************************************************
617 int RemoveRareCommand::processRabund(){
619 string thisOutputDir = outputDir;
620 if (outputDir == "") { thisOutputDir += m->hasPath(rabundfile); }
621 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + getOutputFileNameTag("rabund", rabundfile);
622 outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
625 m->openOutputFile(outputFileName, out);
627 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
628 InputData input(rabundfile, "rabund");
629 RAbundVector* rabund = input.getRAbundVector();
630 string lastLabel = rabund->getLabel();
631 set<string> processedLabels;
632 set<string> userLabels = labels;
634 while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
636 if (m->control_pressed) { delete rabund; out.close(); return 0; }
638 if(allLines == 1 || labels.count(rabund->getLabel()) == 1){
640 m->mothurOut(rabund->getLabel()); m->mothurOutEndLine();
641 processedLabels.insert(rabund->getLabel());
642 userLabels.erase(rabund->getLabel());
644 RAbundVector newRabund; newRabund.setLabel(rabund->getLabel());
645 for (int i = 0; i < rabund->getNumBins(); i++) {
646 if (rabund->get(i) > nseqs) {
647 newRabund.push_back(rabund->get(i));
650 if (newRabund.getNumBins() > 0) { newRabund.print(out); }
653 if ((m->anyLabelsToProcess(rabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
654 string saveLabel = rabund->getLabel();
657 rabund = input.getRAbundVector(lastLabel);
659 m->mothurOut(rabund->getLabel()); m->mothurOutEndLine();
660 processedLabels.insert(rabund->getLabel());
661 userLabels.erase(rabund->getLabel());
663 RAbundVector newRabund; newRabund.setLabel(rabund->getLabel());
664 for (int i = 0; i < rabund->getNumBins(); i++) {
665 if (rabund->get(i) > nseqs) {
666 newRabund.push_back(rabund->get(i));
669 if (newRabund.getNumBins() > 0) { newRabund.print(out); }
671 //restore real lastlabel to save below
672 rabund->setLabel(saveLabel);
675 lastLabel = rabund->getLabel();
678 rabund = input.getRAbundVector();
681 if (m->control_pressed) { out.close(); return 0; }
683 //output error messages about any remaining user labels
684 set<string>::iterator it;
685 bool needToRun = false;
686 for (it = userLabels.begin(); it != userLabels.end(); it++) {
687 m->mothurOut("Your file does not include the label " + *it);
688 if (processedLabels.count(lastLabel) != 1) {
689 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
692 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
696 //run last label if you need to
697 if (needToRun == true) {
698 if (rabund != NULL) { delete rabund; }
699 rabund = input.getRAbundVector(lastLabel);
701 m->mothurOut(rabund->getLabel()); m->mothurOutEndLine();
703 RAbundVector newRabund; newRabund.setLabel(rabund->getLabel());
704 for (int i = 0; i < rabund->getNumBins(); i++) {
705 if (rabund->get(i) > nseqs) {
706 newRabund.push_back(rabund->get(i));
709 if (newRabund.getNumBins() > 0) { newRabund.print(out); }
716 catch(exception& e) {
717 m->errorOut(e, "RemoveRareCommand", "processRabund");
721 //**********************************************************************************************************************
722 int RemoveRareCommand::processShared(){
724 m->setGroups(Groups);
726 string thisOutputDir = outputDir;
727 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
728 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("shared", sharedfile);
729 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
732 m->openOutputFile(outputFileName, out);
734 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
735 InputData input(sharedfile, "sharedfile");
736 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
737 string lastLabel = lookup[0]->getLabel();
738 set<string> processedLabels;
739 set<string> userLabels = labels;
741 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
743 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } out.close(); return 0; }
745 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
747 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
748 processedLabels.insert(lookup[0]->getLabel());
749 userLabels.erase(lookup[0]->getLabel());
751 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
752 processLookup(lookup, out);
755 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
756 string saveLabel = lookup[0]->getLabel();
758 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
759 lookup = input.getSharedRAbundVectors(lastLabel);
761 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
762 processedLabels.insert(lookup[0]->getLabel());
763 userLabels.erase(lookup[0]->getLabel());
765 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
766 processLookup(lookup, out);
768 //restore real lastlabel to save below
769 lookup[0]->setLabel(saveLabel);
772 lastLabel = lookup[0]->getLabel();
774 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
775 lookup = input.getSharedRAbundVectors();
778 if (m->control_pressed) { out.close(); return 0; }
780 //output error messages about any remaining user labels
781 set<string>::iterator it;
782 bool needToRun = false;
783 for (it = userLabels.begin(); it != userLabels.end(); it++) {
784 m->mothurOut("Your file does not include the label " + *it);
785 if (processedLabels.count(lastLabel) != 1) {
786 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
789 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
793 //run last label if you need to
794 if (needToRun == true) {
795 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
796 lookup = input.getSharedRAbundVectors(lastLabel);
798 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
800 if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
801 processLookup(lookup, out);
803 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
808 catch(exception& e) {
809 m->errorOut(e, "RemoveRareCommand", "processSabund");
813 //**********************************************************************************************************************
814 int RemoveRareCommand::processLookup(vector<SharedRAbundVector*>& lookup, ofstream& out){
817 vector<SharedRAbundVector> newRabunds; newRabunds.resize(lookup.size());
818 for (int i = 0; i < lookup.size(); i++) {
819 newRabunds[i].setGroup(lookup[i]->getGroup());
820 newRabunds[i].setLabel(lookup[i]->getLabel());
826 for (int i = 0; i < lookup[0]->getNumBins(); i++) {
829 if (m->control_pressed) { return 0; }
832 for (int j = 0; j < lookup.size(); j++) {
835 if (lookup[j]->getAbundance(i) > nseqs) {
836 newRabunds[j].push_back(lookup[j]->getAbundance(i), newRabunds[j].getGroup());
839 newRabunds[j].push_back(0, newRabunds[j].getGroup());
843 //eliminates zero otus
844 if (allZero) { for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].pop_back(); } }
848 for (int i = 0; i < lookup[0]->getNumBins(); i++) {
850 if (m->control_pressed) { return 0; }
853 //get total otu abundance
854 for (int j = 0; j < lookup.size(); j++) {
855 newRabunds[j].push_back(lookup[j]->getAbundance(i), newRabunds[j].getGroup());
856 totalAbund += lookup[j]->getAbundance(i);
859 //eliminates otus below rare cutoff
860 if (totalAbund <= nseqs) { for (int j = 0; j < newRabunds.size(); j++) { newRabunds[j].pop_back(); } }
864 //do we have any otus above the rare cutoff
865 if (newRabunds[0].getNumBins() != 0) {
866 for (int j = 0; j < newRabunds.size(); j++) {
867 out << newRabunds[j].getLabel() << '\t' << newRabunds[j].getGroup() << '\t';
868 newRabunds[j].print(out);
874 catch(exception& e) {
875 m->errorOut(e, "RemoveRareCommand", "processLookup");
879 //**********************************************************************************************************************