2 * removelineagecommand.cpp
5 * Created by westcott on 9/24/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> RemoveLineageCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
20 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24 CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
25 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
26 CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
27 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
28 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
29 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon);
30 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "RemoveLineageCommand", "setParameters");
43 //**********************************************************************************************************************
44 string RemoveLineageCommand::getHelpString(){
46 string helpString = "";
47 helpString += "The remove.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n";
48 helpString += "It outputs a file containing only the sequences or OTUS from the taxonomy file that are not from the taxon you requested to be removed.\n";
49 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups. You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n";
50 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
51 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
52 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
53 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input. \n";
55 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
56 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
57 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
58 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
59 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
63 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string RemoveLineageCommand::getOutputPattern(string type) {
72 if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
73 else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
74 else if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; }
75 else if (type == "name") { pattern = "[filename],pick,[extension]"; }
76 else if (type == "group") { pattern = "[filename],pick,[extension]"; }
77 else if (type == "count") { pattern = "[filename],pick,[extension]"; }
78 else if (type == "list") { pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]"; }
79 else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; }
80 else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
86 m->errorOut(e, "RemoveLineageCommand", "getOutputPattern");
91 //**********************************************************************************************************************
92 RemoveLineageCommand::RemoveLineageCommand(){
94 abort = true; calledHelp = true;
96 vector<string> tempOutNames;
97 outputTypes["fasta"] = tempOutNames;
98 outputTypes["taxonomy"] = tempOutNames;
99 outputTypes["name"] = tempOutNames;
100 outputTypes["group"] = tempOutNames;
101 outputTypes["alignreport"] = tempOutNames;
102 outputTypes["list"] = tempOutNames;
103 outputTypes["count"] = tempOutNames;
104 outputTypes["constaxonomy"] = tempOutNames;
105 outputTypes["shared"] = tempOutNames;
108 catch(exception& e) {
109 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
113 //**********************************************************************************************************************
114 RemoveLineageCommand::RemoveLineageCommand(string option) {
116 abort = false; calledHelp = false;
118 //allow user to run help
119 if(option == "help") { help(); abort = true; calledHelp = true; }
120 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123 vector<string> myArray = setParameters();
125 OptionParser parser(option);
126 map<string,string> parameters = parser.getParameters();
128 ValidParameters validParameter;
129 map<string,string>::iterator it;
131 //check to make sure all parameters are valid for command
132 for (it = parameters.begin(); it != parameters.end(); it++) {
133 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
136 //initialize outputTypes
137 vector<string> tempOutNames;
138 outputTypes["fasta"] = tempOutNames;
139 outputTypes["taxonomy"] = tempOutNames;
140 outputTypes["name"] = tempOutNames;
141 outputTypes["group"] = tempOutNames;
142 outputTypes["alignreport"] = tempOutNames;
143 outputTypes["list"] = tempOutNames;
144 outputTypes["count"] = tempOutNames;
145 outputTypes["constaxonomy"] = tempOutNames;
146 outputTypes["shared"] = tempOutNames;
149 //if the user changes the output directory command factory will send this info to us in the output parameter
150 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
152 //if the user changes the input directory command factory will send this info to us in the output parameter
153 string inputDir = validParameter.validFile(parameters, "inputdir", false);
154 if (inputDir == "not found"){ inputDir = ""; }
157 it = parameters.find("alignreport");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
165 it = parameters.find("fasta");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["fasta"] = inputDir + it->second; }
173 it = parameters.find("list");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["list"] = inputDir + it->second; }
181 it = parameters.find("name");
182 //user has given a template file
183 if(it != parameters.end()){
184 path = m->hasPath(it->second);
185 //if the user has not given a path then, add inputdir. else leave path alone.
186 if (path == "") { parameters["name"] = inputDir + it->second; }
189 it = parameters.find("group");
190 //user has given a template file
191 if(it != parameters.end()){
192 path = m->hasPath(it->second);
193 //if the user has not given a path then, add inputdir. else leave path alone.
194 if (path == "") { parameters["group"] = inputDir + it->second; }
197 it = parameters.find("taxonomy");
198 //user has given a template file
199 if(it != parameters.end()){
200 path = m->hasPath(it->second);
201 //if the user has not given a path then, add inputdir. else leave path alone.
202 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
205 it = parameters.find("count");
206 //user has given a template file
207 if(it != parameters.end()){
208 path = m->hasPath(it->second);
209 //if the user has not given a path then, add inputdir. else leave path alone.
210 if (path == "") { parameters["count"] = inputDir + it->second; }
213 it = parameters.find("constaxonomy");
214 //user has given a template file
215 if(it != parameters.end()){
216 path = m->hasPath(it->second);
217 //if the user has not given a path then, add inputdir. else leave path alone.
218 if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
221 it = parameters.find("shared");
222 //user has given a template file
223 if(it != parameters.end()){
224 path = m->hasPath(it->second);
225 //if the user has not given a path then, add inputdir. else leave path alone.
226 if (path == "") { parameters["shared"] = inputDir + it->second; }
231 //check for required parameters
232 fastafile = validParameter.validFile(parameters, "fasta", true);
233 if (fastafile == "not open") { fastafile = ""; abort = true; }
234 else if (fastafile == "not found") { fastafile = ""; }
235 else { m->setFastaFile(fastafile); }
237 namefile = validParameter.validFile(parameters, "name", true);
238 if (namefile == "not open") { namefile = ""; abort = true; }
239 else if (namefile == "not found") { namefile = ""; }
240 else { m->setNameFile(namefile); }
242 groupfile = validParameter.validFile(parameters, "group", true);
243 if (groupfile == "not open") { abort = true; }
244 else if (groupfile == "not found") { groupfile = ""; }
245 else { m->setGroupFile(groupfile); }
247 alignfile = validParameter.validFile(parameters, "alignreport", true);
248 if (alignfile == "not open") { abort = true; }
249 else if (alignfile == "not found") { alignfile = ""; }
251 listfile = validParameter.validFile(parameters, "list", true);
252 if (listfile == "not open") { abort = true; }
253 else if (listfile == "not found") { listfile = ""; }
254 else { m->setListFile(listfile); }
256 taxfile = validParameter.validFile(parameters, "taxonomy", true);
257 if (taxfile == "not open") { taxfile = ""; abort = true; }
258 else if (taxfile == "not found") { taxfile = ""; }
259 else { m->setTaxonomyFile(taxfile); }
261 sharedfile = validParameter.validFile(parameters, "shared", true);
262 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
263 else if (sharedfile == "not found") { sharedfile = ""; }
264 else { m->setSharedFile(sharedfile); }
267 constaxonomy = validParameter.validFile(parameters, "constaxonomy", true);
268 if (constaxonomy == "not open") { constaxonomy = ""; abort = true; }
269 else if (constaxonomy == "not found") { constaxonomy = ""; }
271 if ((constaxonomy == "") && (taxfile == "")) {
272 taxfile = m->getTaxonomyFile();
273 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
275 m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
279 countfile = validParameter.validFile(parameters, "count", true);
280 if (countfile == "not open") { countfile = ""; abort = true; }
281 else if (countfile == "not found") { countfile = ""; }
282 else { m->setCountTableFile(countfile); }
284 if ((namefile != "") && (countfile != "")) {
285 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
288 if ((groupfile != "") && (countfile != "")) {
289 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
292 string usedDups = "true";
293 string temp = validParameter.validFile(parameters, "dups", false);
294 if (temp == "not found") {
295 if (namefile != "") { temp = "true"; }
296 else { temp = "false"; usedDups = ""; }
298 dups = m->isTrue(temp);
300 taxons = validParameter.validFile(parameters, "taxon", false);
301 if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine(); abort = true; }
304 if (taxons[0] == '\'') { taxons = taxons.substr(1); }
305 if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); }
307 m->splitAtChar(taxons, listOfTaxons, '-');
309 if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile."); m->mothurOutEndLine(); abort = true; }
311 if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) {
312 m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n"); abort = true;
315 if ((constaxonomy != "") && (taxfile != "")) {
316 m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true;
319 if ((sharedfile != "") && (taxfile != "")) {
320 m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true;
323 if ((sharedfile != "") || (listfile != "")) {
324 label = validParameter.validFile(parameters, "label", false);
325 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
329 if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
331 if (countfile == "") {
332 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
333 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
334 parser.getNameFile(files);
341 catch(exception& e) {
342 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
346 //**********************************************************************************************************************
348 int RemoveLineageCommand::execute(){
351 if (abort == true) { if (calledHelp) { return 0; } return 2; }
353 if (m->control_pressed) { return 0; }
355 if (countfile != "") {
356 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
357 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
361 //read through the correct file and output lines you want to keep
363 readTax(); //fills the set of names to get
364 if (namefile != "") { readName(); }
365 if (fastafile != "") { readFasta(); }
366 if (countfile != "") { readCount(); }
367 if (groupfile != "") { readGroup(); }
368 if (alignfile != "") { readAlign(); }
369 if (listfile != "") { readList(); }
373 if (listfile != "") { readConsList(); }
374 if (sharedfile != "") { readShared(); }
378 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
380 if (outputNames.size() != 0) {
381 m->mothurOutEndLine();
382 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
383 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
384 m->mothurOutEndLine();
386 //set fasta file as new current fastafile
388 itTypes = outputTypes.find("fasta");
389 if (itTypes != outputTypes.end()) {
390 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
393 itTypes = outputTypes.find("name");
394 if (itTypes != outputTypes.end()) {
395 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
398 itTypes = outputTypes.find("group");
399 if (itTypes != outputTypes.end()) {
400 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
403 itTypes = outputTypes.find("list");
404 if (itTypes != outputTypes.end()) {
405 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
408 itTypes = outputTypes.find("taxonomy");
409 if (itTypes != outputTypes.end()) {
410 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
413 itTypes = outputTypes.find("count");
414 if (itTypes != outputTypes.end()) {
415 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
422 catch(exception& e) {
423 m->errorOut(e, "RemoveLineageCommand", "execute");
428 //**********************************************************************************************************************
429 int RemoveLineageCommand::readFasta(){
431 string thisOutputDir = outputDir;
432 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
433 map<string, string> variables;
434 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
435 variables["[extension]"] = m->getExtension(fastafile);
436 string outputFileName = getOutputFileName("fasta", variables);
438 m->openOutputFile(outputFileName, out);
441 m->openInputFile(fastafile, in);
444 bool wroteSomething = false;
447 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
449 Sequence currSeq(in);
450 name = currSeq.getName();
453 //if this name is in the accnos file
454 if (names.count(name) == 0) {
455 wroteSomething = true;
457 currSeq.printSequence(out);
465 if (wroteSomething == false) { m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
466 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
471 catch(exception& e) {
472 m->errorOut(e, "RemoveLineageCommand", "readFasta");
476 //**********************************************************************************************************************
477 int RemoveLineageCommand::readList(){
479 string thisOutputDir = outputDir;
480 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
481 map<string, string> variables;
482 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
483 variables["[extension]"] = m->getExtension(listfile);
484 string outputFileName = getOutputFileName("list", variables);
486 m->openOutputFile(outputFileName, out);
489 m->openInputFile(listfile, in);
491 bool wroteSomething = false;
494 //read in list vector
497 //make a new list vector
499 newList.setLabel(list.getLabel());
502 for (int i = 0; i < list.getNumBins(); i++) {
503 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
505 //parse out names that are in accnos file
506 string binnames = list.get(i);
507 vector<string> bnames;
508 m->splitAtComma(binnames, bnames);
510 string newNames = "";
511 for (int j = 0; j < bnames.size(); j++) {
512 string name = bnames[j];
513 //if that name is in the .accnos file, add it
514 if (names.count(name) == 0) { newNames += name + ","; }
517 //if there are names in this bin add to new list
518 if (newNames != "") {
519 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
520 newList.push_back(newNames);
524 //print new listvector
525 if (newList.getNumBins() != 0) {
526 wroteSomething = true;
535 if (wroteSomething == false) { m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
536 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
541 catch(exception& e) {
542 m->errorOut(e, "RemoveLineageCommand", "readList");
546 //**********************************************************************************************************************
547 int RemoveLineageCommand::readName(){
549 string thisOutputDir = outputDir;
550 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
551 map<string, string> variables;
552 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
553 variables["[extension]"] = m->getExtension(namefile);
554 string outputFileName = getOutputFileName("name", variables);
556 m->openOutputFile(outputFileName, out);
559 m->openInputFile(namefile, in);
560 string name, firstCol, secondCol;
562 bool wroteSomething = false;
565 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
570 vector<string> parsedNames;
571 m->splitAtComma(secondCol, parsedNames);
573 vector<string> validSecond; validSecond.clear();
574 for (int i = 0; i < parsedNames.size(); i++) {
575 if (names.count(parsedNames[i]) == 0) {
576 validSecond.push_back(parsedNames[i]);
580 if ((dups) && (validSecond.size() != parsedNames.size())) { //if dups is true and we want to get rid of anyone, get rid of everyone
581 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
583 //if the name in the first column is in the set then print it and any other names in second column also in set
584 if (names.count(firstCol) == 0) {
586 wroteSomething = true;
588 out << firstCol << '\t';
590 //you know you have at least one valid second since first column is valid
591 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
592 out << validSecond[validSecond.size()-1] << endl;
594 //make first name in set you come to first column and then add the remaining names to second column
597 //you want part of this row
598 if (validSecond.size() != 0) {
600 wroteSomething = true;
602 out << validSecond[0] << '\t';
604 //you know you have at least one valid second since first column is valid
605 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
606 out << validSecond[validSecond.size()-1] << endl;
615 if (wroteSomething == false) { m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
616 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
620 catch(exception& e) {
621 m->errorOut(e, "RemoveLineageCommand", "readName");
625 //**********************************************************************************************************************
626 int RemoveLineageCommand::readCount(){
628 string thisOutputDir = outputDir;
629 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
630 map<string, string> variables;
631 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
632 variables["[extension]"] = m->getExtension(countfile);
633 string outputFileName = getOutputFileName("count", variables);
637 m->openOutputFile(outputFileName, out);
640 m->openInputFile(countfile, in);
642 bool wroteSomething = false;
644 string headers = m->getline(in); m->gobble(in);
645 out << headers << endl;
647 string name, rest; int thisTotal;
650 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
652 in >> name; m->gobble(in);
653 in >> thisTotal; m->gobble(in);
654 rest = m->getline(in); m->gobble(in);
655 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
657 if (names.count(name) == 0) {
658 out << name << '\t' << thisTotal << '\t' << rest << endl;
659 wroteSomething = true;
665 //check for groups that have been eliminated
667 if (ct.testGroups(outputFileName)) {
668 ct.readTable(outputFileName, true);
669 ct.printTable(outputFileName);
672 if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
673 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
677 catch(exception& e) {
678 m->errorOut(e, "RemoveLineageCommand", "readCount");
682 //**********************************************************************************************************************
683 int RemoveLineageCommand::readConsList(){
687 if (m->control_pressed) { delete list; return 0;}
690 newList.setLabel(list->getLabel());
691 int removedCount = 0;
692 bool wroteSomething = false;
693 string snumBins = toString(list->getNumBins());
695 for (int i = 0; i < list->getNumBins(); i++) {
697 if (m->control_pressed) { delete list; return 0;}
699 //create a label for this otu
700 string otuLabel = "Otu";
701 string sbinNumber = toString(i+1);
702 if (sbinNumber.length() < snumBins.length()) {
703 int diff = snumBins.length() - sbinNumber.length();
704 for (int h = 0; h < diff; h++) { otuLabel += "0"; }
706 otuLabel += sbinNumber;
708 if (names.count(otuLabel) == 0) {
709 newList.push_back(list->get(i));
710 }else { removedCount++; }
713 string thisOutputDir = outputDir;
714 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
715 map<string, string> variables;
716 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
717 variables["[extension]"] = m->getExtension(listfile);
718 variables["[distance]"] = list->getLabel();
719 string outputFileName = getOutputFileName("list", variables);
721 m->openOutputFile(outputFileName, out);
724 //print new listvector
725 if (newList.getNumBins() != 0) {
726 wroteSomething = true;
731 if (wroteSomething == false) { m->mothurOut("Your file only contains OTUs from " + taxons + "."); m->mothurOutEndLine(); }
732 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
734 m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file."); m->mothurOutEndLine();
739 catch(exception& e) {
740 m->errorOut(e, "RemoveLineageCommand", "readConsList");
744 //**********************************************************************************************************************
745 int RemoveLineageCommand::getListVector(){
747 InputData input(listfile, "list");
748 list = input.getListVector();
749 string lastLabel = list->getLabel();
751 if (label == "") { label = lastLabel; return 0; }
753 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
754 set<string> labels; labels.insert(label);
755 set<string> processedLabels;
756 set<string> userLabels = labels;
758 //as long as you are not at the end of the file or done wih the lines you want
759 while((list != NULL) && (userLabels.size() != 0)) {
760 if (m->control_pressed) { return 0; }
762 if(labels.count(list->getLabel()) == 1){
763 processedLabels.insert(list->getLabel());
764 userLabels.erase(list->getLabel());
768 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
769 string saveLabel = list->getLabel();
772 list = input.getListVector(lastLabel);
774 processedLabels.insert(list->getLabel());
775 userLabels.erase(list->getLabel());
777 //restore real lastlabel to save below
778 list->setLabel(saveLabel);
782 lastLabel = list->getLabel();
784 //get next line to process
785 //prevent memory leak
787 list = input.getListVector();
791 if (m->control_pressed) { return 0; }
793 //output error messages about any remaining user labels
794 set<string>::iterator it;
795 bool needToRun = false;
796 for (it = userLabels.begin(); it != userLabels.end(); it++) {
797 m->mothurOut("Your file does not include the label " + *it);
798 if (processedLabels.count(lastLabel) != 1) {
799 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
802 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
806 //run last label if you need to
807 if (needToRun == true) {
809 list = input.getListVector(lastLabel);
814 catch(exception& e) {
815 m->errorOut(e, "RemoveLineageCommand", "getListVector");
820 //**********************************************************************************************************************
821 int RemoveLineageCommand::readShared(){
826 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
828 vector<string> newLabels;
830 //create new "filtered" lookup
831 vector<SharedRAbundVector*> newLookup;
832 for (int i = 0; i < lookup.size(); i++) {
833 SharedRAbundVector* temp = new SharedRAbundVector();
834 temp->setLabel(lookup[i]->getLabel());
835 temp->setGroup(lookup[i]->getGroup());
836 newLookup.push_back(temp);
839 bool wroteSomething = false;
841 for (int i = 0; i < lookup[0]->getNumBins(); i++) {
843 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
845 //is this otu on the list
846 if (names.count(m->currentBinLabels[i]) == 0) {
847 wroteSomething = true;
848 newLabels.push_back(m->currentBinLabels[i]);
849 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
850 newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
852 }else { numRemoved++; }
855 string thisOutputDir = outputDir;
856 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
857 map<string, string> variables;
858 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
859 variables["[extension]"] = m->getExtension(sharedfile);
860 variables["[distance]"] = lookup[0]->getLabel();
861 string outputFileName = getOutputFileName("shared", variables);
863 m->openOutputFile(outputFileName, out);
864 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
866 for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
868 m->currentBinLabels = newLabels;
870 newLookup[0]->printHeaders(out);
872 for (int i = 0; i < newLookup.size(); i++) {
873 out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
874 newLookup[i]->print(out);
878 for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
880 if (wroteSomething == false) { m->mothurOut("Your file only contains OTUs from " + taxons + "."); m->mothurOutEndLine(); }
882 m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file."); m->mothurOutEndLine();
886 catch(exception& e) {
887 m->errorOut(e, "RemoveLineageCommand", "readShared");
891 //**********************************************************************************************************************
892 int RemoveLineageCommand::getShared(){
894 InputData input(sharedfile, "sharedfile");
895 lookup = input.getSharedRAbundVectors();
896 string lastLabel = lookup[0]->getLabel();
898 if (label == "") { label = lastLabel; return 0; }
900 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
901 set<string> labels; labels.insert(label);
902 set<string> processedLabels;
903 set<string> userLabels = labels;
905 //as long as you are not at the end of the file or done wih the lines you want
906 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
907 if (m->control_pressed) { return 0; }
909 if(labels.count(lookup[0]->getLabel()) == 1){
910 processedLabels.insert(lookup[0]->getLabel());
911 userLabels.erase(lookup[0]->getLabel());
915 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
916 string saveLabel = lookup[0]->getLabel();
918 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
919 lookup = input.getSharedRAbundVectors(lastLabel);
921 processedLabels.insert(lookup[0]->getLabel());
922 userLabels.erase(lookup[0]->getLabel());
924 //restore real lastlabel to save below
925 lookup[0]->setLabel(saveLabel);
929 lastLabel = lookup[0]->getLabel();
931 //get next line to process
932 //prevent memory leak
933 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
934 lookup = input.getSharedRAbundVectors();
938 if (m->control_pressed) { return 0; }
940 //output error messages about any remaining user labels
941 set<string>::iterator it;
942 bool needToRun = false;
943 for (it = userLabels.begin(); it != userLabels.end(); it++) {
944 m->mothurOut("Your file does not include the label " + *it);
945 if (processedLabels.count(lastLabel) != 1) {
946 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
949 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
953 //run last label if you need to
954 if (needToRun == true) {
955 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
956 lookup = input.getSharedRAbundVectors(lastLabel);
961 catch(exception& e) {
962 m->errorOut(e, "RemoveLineageCommand", "getShared");
968 //**********************************************************************************************************************
969 int RemoveLineageCommand::readGroup(){
971 string thisOutputDir = outputDir;
972 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
973 map<string, string> variables;
974 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
975 variables["[extension]"] = m->getExtension(groupfile);
976 string outputFileName = getOutputFileName("group", variables);
979 m->openOutputFile(outputFileName, out);
982 m->openInputFile(groupfile, in);
985 bool wroteSomething = false;
988 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
990 in >> name; //read from first column
991 in >> group; //read from second column
993 //if this name is in the accnos file
994 if (names.count(name) == 0) {
995 wroteSomething = true;
996 out << name << '\t' << group << endl;
1004 if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
1005 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
1009 catch(exception& e) {
1010 m->errorOut(e, "RemoveLineageCommand", "readGroup");
1014 //**********************************************************************************************************************
1015 int RemoveLineageCommand::readTax(){
1017 string thisOutputDir = outputDir;
1018 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
1019 map<string, string> variables;
1020 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1021 variables["[extension]"] = m->getExtension(taxfile);
1022 string outputFileName = getOutputFileName("taxonomy", variables);
1024 m->openOutputFile(outputFileName, out);
1027 m->openInputFile(taxfile, in);
1030 bool wroteSomething = false;
1032 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1033 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1034 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1036 for (int i = 0; i < listOfTaxons.size(); i++) {
1037 noConfidenceTaxons[i] = listOfTaxons[i];
1038 int hasConPos = listOfTaxons[i].find_first_of('(');
1039 if (hasConPos != string::npos) {
1040 taxonsHasConfidence[i] = true;
1041 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1042 noConfidenceTaxons[i] = listOfTaxons[i];
1043 m->removeConfidences(noConfidenceTaxons[i]);
1050 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1052 in >> name; //read from first column
1053 in >> tax; //read from second column
1055 bool remove = false;
1057 string noQuotesTax = m->removeQuotes(tax);
1059 for (int j = 0; j < listOfTaxons.size(); j++) {
1060 string newtax = noQuotesTax;
1062 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1063 if (!taxonsHasConfidence[j]) {
1065 int hasConfidences = noQuotesTax.find_first_of('(');
1066 if (hasConfidences != string::npos) {
1067 newtax = noQuotesTax;
1068 m->removeConfidences(newtax);
1071 int pos = newtax.find(noConfidenceTaxons[j]);
1073 if (pos == string::npos) {
1074 //wroteSomething = true;
1075 //out << name << '\t' << tax << endl;
1076 }else{ //this sequence contains the taxon the user wants to remove
1081 }else{//if taxons has them and you don't them remove taxons
1082 int hasConfidences = noQuotesTax.find_first_of('(');
1083 if (hasConfidences == string::npos) {
1085 int pos = newtax.find(noConfidenceTaxons[j]);
1087 if (pos == string::npos) {
1088 //wroteSomething = true;
1089 //out << name << '\t' << tax << endl;
1090 }else{ //this sequence contains the taxon the user wants to remove
1094 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1095 //first remove confidences from both and see if the taxonomy exists
1097 string noNewTax = noQuotesTax;
1098 int hasConfidences = noQuotesTax.find_first_of('(');
1099 if (hasConfidences != string::npos) {
1100 noNewTax = noQuotesTax;
1101 m->removeConfidences(noNewTax);
1104 int pos = noNewTax.find(noConfidenceTaxons[j]);
1106 if (pos != string::npos) { //if yes, then are the confidences okay
1108 vector< map<string, float> > usersTaxon = getTaxons(newtax);
1110 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1111 //we want to "line them up", so we will find the the index where the searchstring starts
1113 for (int i = 0; i < usersTaxon.size(); i++) {
1115 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1118 bool goodspot = true;
1119 //is this really the start, or are we dealing with a taxon of the same name?
1120 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1121 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1125 if (goodspot) { break; }
1129 for (int i = 0; i < searchTaxons[j].size(); i++) {
1131 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1132 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1142 //passed the test so remove you
1147 //wroteSomething = true;
1148 //out << name << '\t' << tax << endl;
1151 //wroteSomething = true;
1152 //out << name << '\t' << tax << endl;
1159 if (!remove) { wroteSomething = true; out << name << '\t' << tax << endl; }
1165 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
1166 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
1171 catch(exception& e) {
1172 m->errorOut(e, "RemoveLineageCommand", "readTax");
1176 //**********************************************************************************************************************
1177 int RemoveLineageCommand::readConsTax(){
1179 string thisOutputDir = outputDir;
1180 if (outputDir == "") { thisOutputDir += m->hasPath(constaxonomy); }
1181 map<string, string> variables;
1182 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomy));
1183 variables["[extension]"] = m->getExtension(constaxonomy);
1184 string outputFileName = getOutputFileName("constaxonomy", variables);
1186 m->openOutputFile(outputFileName, out);
1189 m->openInputFile(constaxonomy, in);
1190 string otuLabel, tax;
1192 bool wroteSomething = false;
1195 string headers = m->getline(in);
1196 out << headers << endl;
1198 //bool wroteSomething = false;
1199 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1200 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1201 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1203 for (int i = 0; i < listOfTaxons.size(); i++) {
1204 noConfidenceTaxons[i] = listOfTaxons[i];
1205 int hasConPos = listOfTaxons[i].find_first_of('(');
1206 if (hasConPos != string::npos) {
1207 taxonsHasConfidence[i] = true;
1208 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1209 noConfidenceTaxons[i] = listOfTaxons[i];
1210 m->removeConfidences(noConfidenceTaxons[i]);
1217 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1219 in >> otuLabel; m->gobble(in);
1220 in >> numReps; m->gobble(in);
1221 in >> tax; m->gobble(in);
1223 bool remove = false;
1225 string noQuotesTax = m->removeQuotes(tax);
1227 for (int j = 0; j < listOfTaxons.size(); j++) {
1228 string newtax = noQuotesTax;
1230 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1231 if (!taxonsHasConfidence[j]) {
1233 int hasConfidences = noQuotesTax.find_first_of('(');
1234 if (hasConfidences != string::npos) {
1235 newtax = noQuotesTax;
1236 m->removeConfidences(newtax);
1239 int pos = newtax.find(noConfidenceTaxons[j]);
1241 if (pos == string::npos) {
1242 //wroteSomething = true;
1243 //out << name << '\t' << tax << endl;
1244 }else{ //this sequence contains the taxon the user wants to remove
1245 names.insert(otuLabel);
1249 }else{//if taxons has them and you don't them remove taxons
1250 int hasConfidences = noQuotesTax.find_first_of('(');
1251 if (hasConfidences == string::npos) {
1253 int pos = newtax.find(noConfidenceTaxons[j]);
1255 if (pos == string::npos) {
1256 //wroteSomething = true;
1257 //out << name << '\t' << tax << endl;
1258 }else{ //this sequence contains the taxon the user wants to remove
1259 names.insert(otuLabel);
1262 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1263 //first remove confidences from both and see if the taxonomy exists
1265 string noNewTax = noQuotesTax;
1266 int hasConfidences = noQuotesTax.find_first_of('(');
1267 if (hasConfidences != string::npos) {
1268 noNewTax = noQuotesTax;
1269 m->removeConfidences(noNewTax);
1272 int pos = noNewTax.find(noConfidenceTaxons[j]);
1274 if (pos != string::npos) { //if yes, then are the confidences okay
1276 vector< map<string, float> > usersTaxon = getTaxons(newtax);
1278 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1279 //we want to "line them up", so we will find the the index where the searchstring starts
1281 for (int i = 0; i < usersTaxon.size(); i++) {
1283 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1286 bool goodspot = true;
1287 //is this really the start, or are we dealing with a taxon of the same name?
1288 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1289 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1293 if (goodspot) { break; }
1297 for (int i = 0; i < searchTaxons[j].size(); i++) {
1299 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1300 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1310 //passed the test so remove you
1312 names.insert(otuLabel);
1315 //wroteSomething = true;
1316 //out << name << '\t' << tax << endl;
1319 //wroteSomething = true;
1320 //out << name << '\t' << tax << endl;
1327 if (!remove) { wroteSomething = true; out << otuLabel << '\t' << numReps << '\t' << tax << endl; }
1333 if (names.size() == 0) { m->mothurOut("Your constaxonomy file contains OTUs only from " + taxons + "."); m->mothurOutEndLine(); }
1334 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
1339 catch(exception& e) {
1340 m->errorOut(e, "RemoveLineageCommand", "readConsTax");
1344 /**************************************************************************************************/
1345 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
1348 vector< map<string, float> > t;
1350 int taxLength = tax.length();
1351 for(int i=0;i<taxLength;i++){
1354 int openParen = taxon.find_last_of('(');
1355 int closeParen = taxon.find_last_of(')');
1357 string newtaxon, confidence;
1358 if ((openParen != string::npos) && (closeParen != string::npos)) {
1359 string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
1360 if (m->isNumeric1(confidenceScore)) { //its a confidence
1361 newtaxon = taxon.substr(0, openParen); //rip off confidence
1362 confidence = taxon.substr((openParen+1), (closeParen-openParen-1));
1363 }else { //its part of the taxon
1372 convert(confidence, con);
1374 map<string, float> temp;
1375 temp[newtaxon] = con;
1386 catch(exception& e) {
1387 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
1391 //**********************************************************************************************************************
1392 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
1393 int RemoveLineageCommand::readAlign(){
1395 string thisOutputDir = outputDir;
1396 if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
1397 map<string, string> variables;
1398 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1399 variables["[extension]"] = m->getExtension(alignfile);
1400 string outputFileName = getOutputFileName("alignreport", variables);
1403 m->openOutputFile(outputFileName, out);
1406 m->openInputFile(alignfile, in);
1409 bool wroteSomething = false;
1411 //read column headers
1412 for (int i = 0; i < 16; i++) {
1413 if (!in.eof()) { in >> junk; out << junk << '\t'; }
1419 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1421 in >> name; //read from first column
1423 //if this name is in the accnos file
1424 if (names.count(name) == 0) {
1425 wroteSomething = true;
1427 out << name << '\t';
1430 for (int i = 0; i < 15; i++) {
1431 if (!in.eof()) { in >> junk; out << junk << '\t'; }
1436 }else {//still read just don't do anything with it
1439 for (int i = 0; i < 15; i++) {
1440 if (!in.eof()) { in >> junk; }
1450 if (wroteSomething == false) { m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
1451 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
1456 catch(exception& e) {
1457 m->errorOut(e, "RemoveLineageCommand", "readAlign");
1461 //**********************************************************************************************************************