2 * removelineagecommand.cpp
5 * Created by westcott on 9/24/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
15 //**********************************************************************************************************************
16 vector<string> RemoveLineageCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
22 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
23 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
24 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
25 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
26 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "RemoveLineageCommand", "setParameters");
39 //**********************************************************************************************************************
40 string RemoveLineageCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
44 helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
45 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
46 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
48 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
49 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
50 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
51 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
52 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
53 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
54 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
58 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
62 //**********************************************************************************************************************
63 string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=""){
65 string outputFileName = "";
66 map<string, vector<string> >::iterator it;
68 //is this a type this command creates
69 it = outputTypes.find(type);
70 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
72 if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
73 else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
74 else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
75 else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
76 else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
77 else if (type == "count") { outputFileName = "pick.count_table"; }
78 else if (type == "alignreport") { outputFileName = "pick.align.report"; }
79 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
81 return outputFileName;
84 m->errorOut(e, "RemoveLineageCommand", "getOutputFileNameTag");
88 //**********************************************************************************************************************
89 RemoveLineageCommand::RemoveLineageCommand(){
91 abort = true; calledHelp = true;
93 vector<string> tempOutNames;
94 outputTypes["fasta"] = tempOutNames;
95 outputTypes["taxonomy"] = tempOutNames;
96 outputTypes["name"] = tempOutNames;
97 outputTypes["group"] = tempOutNames;
98 outputTypes["alignreport"] = tempOutNames;
99 outputTypes["list"] = tempOutNames;
100 outputTypes["count"] = tempOutNames;
102 catch(exception& e) {
103 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
107 //**********************************************************************************************************************
108 RemoveLineageCommand::RemoveLineageCommand(string option) {
110 abort = false; calledHelp = false;
112 //allow user to run help
113 if(option == "help") { help(); abort = true; calledHelp = true; }
114 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117 vector<string> myArray = setParameters();
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
123 map<string,string>::iterator it;
125 //check to make sure all parameters are valid for command
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["fasta"] = tempOutNames;
133 outputTypes["taxonomy"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["group"] = tempOutNames;
136 outputTypes["alignreport"] = tempOutNames;
137 outputTypes["list"] = tempOutNames;
138 outputTypes["count"] = tempOutNames;
140 //if the user changes the output directory command factory will send this info to us in the output parameter
141 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
143 //if the user changes the input directory command factory will send this info to us in the output parameter
144 string inputDir = validParameter.validFile(parameters, "inputdir", false);
145 if (inputDir == "not found"){ inputDir = ""; }
148 it = parameters.find("alignreport");
149 //user has given a template file
150 if(it != parameters.end()){
151 path = m->hasPath(it->second);
152 //if the user has not given a path then, add inputdir. else leave path alone.
153 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
156 it = parameters.find("fasta");
157 //user has given a template file
158 if(it != parameters.end()){
159 path = m->hasPath(it->second);
160 //if the user has not given a path then, add inputdir. else leave path alone.
161 if (path == "") { parameters["fasta"] = inputDir + it->second; }
164 it = parameters.find("list");
165 //user has given a template file
166 if(it != parameters.end()){
167 path = m->hasPath(it->second);
168 //if the user has not given a path then, add inputdir. else leave path alone.
169 if (path == "") { parameters["list"] = inputDir + it->second; }
172 it = parameters.find("name");
173 //user has given a template file
174 if(it != parameters.end()){
175 path = m->hasPath(it->second);
176 //if the user has not given a path then, add inputdir. else leave path alone.
177 if (path == "") { parameters["name"] = inputDir + it->second; }
180 it = parameters.find("group");
181 //user has given a template file
182 if(it != parameters.end()){
183 path = m->hasPath(it->second);
184 //if the user has not given a path then, add inputdir. else leave path alone.
185 if (path == "") { parameters["group"] = inputDir + it->second; }
188 it = parameters.find("taxonomy");
189 //user has given a template file
190 if(it != parameters.end()){
191 path = m->hasPath(it->second);
192 //if the user has not given a path then, add inputdir. else leave path alone.
193 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
196 it = parameters.find("count");
197 //user has given a template file
198 if(it != parameters.end()){
199 path = m->hasPath(it->second);
200 //if the user has not given a path then, add inputdir. else leave path alone.
201 if (path == "") { parameters["count"] = inputDir + it->second; }
206 //check for required parameters
207 fastafile = validParameter.validFile(parameters, "fasta", true);
208 if (fastafile == "not open") { fastafile = ""; abort = true; }
209 else if (fastafile == "not found") { fastafile = ""; }
210 else { m->setFastaFile(fastafile); }
212 namefile = validParameter.validFile(parameters, "name", true);
213 if (namefile == "not open") { namefile = ""; abort = true; }
214 else if (namefile == "not found") { namefile = ""; }
215 else { m->setNameFile(namefile); }
217 groupfile = validParameter.validFile(parameters, "group", true);
218 if (groupfile == "not open") { abort = true; }
219 else if (groupfile == "not found") { groupfile = ""; }
220 else { m->setGroupFile(groupfile); }
222 alignfile = validParameter.validFile(parameters, "alignreport", true);
223 if (alignfile == "not open") { abort = true; }
224 else if (alignfile == "not found") { alignfile = ""; }
226 listfile = validParameter.validFile(parameters, "list", true);
227 if (listfile == "not open") { abort = true; }
228 else if (listfile == "not found") { listfile = ""; }
229 else { m->setListFile(listfile); }
231 taxfile = validParameter.validFile(parameters, "taxonomy", true);
232 if (taxfile == "not open") { taxfile = ""; abort = true; }
233 else if (taxfile == "not found") {
234 taxfile = m->getTaxonomyFile();
235 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
236 else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
237 }else { m->setTaxonomyFile(taxfile); }
239 countfile = validParameter.validFile(parameters, "count", true);
240 if (countfile == "not open") { countfile = ""; abort = true; }
241 else if (countfile == "not found") { countfile = ""; }
242 else { m->setCountTableFile(countfile); }
244 if ((namefile != "") && (countfile != "")) {
245 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
248 if ((groupfile != "") && (countfile != "")) {
249 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
252 string usedDups = "true";
253 string temp = validParameter.validFile(parameters, "dups", false);
254 if (temp == "not found") {
255 if (namefile != "") { temp = "true"; }
256 else { temp = "false"; usedDups = ""; }
258 dups = m->isTrue(temp);
260 taxons = validParameter.validFile(parameters, "taxon", false);
261 if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine(); abort = true; }
264 if (taxons[0] == '\'') { taxons = taxons.substr(1); }
265 if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); }
267 m->splitAtChar(taxons, listOfTaxons, '-');
269 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
271 if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
273 if (countfile == "") {
274 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
275 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
276 parser.getNameFile(files);
283 catch(exception& e) {
284 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
288 //**********************************************************************************************************************
290 int RemoveLineageCommand::execute(){
293 if (abort == true) { if (calledHelp) { return 0; } return 2; }
295 if (m->control_pressed) { return 0; }
297 if (countfile != "") {
298 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
299 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
303 //read through the correct file and output lines you want to keep
304 if (taxfile != "") { readTax(); } //fills the set of names to remove
305 if (namefile != "") { readName(); }
306 if (fastafile != "") { readFasta(); }
307 if (groupfile != "") { readGroup(); }
308 if (alignfile != "") { readAlign(); }
309 if (listfile != "") { readList(); }
310 if (countfile != "") { readCount(); }
313 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
315 if (outputNames.size() != 0) {
316 m->mothurOutEndLine();
317 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
318 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
319 m->mothurOutEndLine();
321 //set fasta file as new current fastafile
323 itTypes = outputTypes.find("fasta");
324 if (itTypes != outputTypes.end()) {
325 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
328 itTypes = outputTypes.find("name");
329 if (itTypes != outputTypes.end()) {
330 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
333 itTypes = outputTypes.find("group");
334 if (itTypes != outputTypes.end()) {
335 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
338 itTypes = outputTypes.find("list");
339 if (itTypes != outputTypes.end()) {
340 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
343 itTypes = outputTypes.find("taxonomy");
344 if (itTypes != outputTypes.end()) {
345 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
348 itTypes = outputTypes.find("count");
349 if (itTypes != outputTypes.end()) {
350 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
357 catch(exception& e) {
358 m->errorOut(e, "RemoveLineageCommand", "execute");
363 //**********************************************************************************************************************
364 int RemoveLineageCommand::readFasta(){
366 string thisOutputDir = outputDir;
367 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
368 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
370 m->openOutputFile(outputFileName, out);
373 m->openInputFile(fastafile, in);
376 bool wroteSomething = false;
379 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
381 Sequence currSeq(in);
382 name = currSeq.getName();
385 //if this name is in the accnos file
386 if (names.count(name) == 0) {
387 wroteSomething = true;
389 currSeq.printSequence(out);
397 if (wroteSomething == false) { m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
398 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
403 catch(exception& e) {
404 m->errorOut(e, "RemoveLineageCommand", "readFasta");
408 //**********************************************************************************************************************
409 int RemoveLineageCommand::readList(){
411 string thisOutputDir = outputDir;
412 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
413 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
415 m->openOutputFile(outputFileName, out);
418 m->openInputFile(listfile, in);
420 bool wroteSomething = false;
423 //read in list vector
426 //make a new list vector
428 newList.setLabel(list.getLabel());
431 for (int i = 0; i < list.getNumBins(); i++) {
432 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
434 //parse out names that are in accnos file
435 string binnames = list.get(i);
437 string newNames = "";
438 while (binnames.find_first_of(',') != -1) {
439 string name = binnames.substr(0,binnames.find_first_of(','));
440 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
442 //if that name is in the .accnos file, add it
443 if (names.count(name) == 0) { newNames += name + ","; }
447 if (names.count(binnames) == 0) { newNames += binnames + ","; }
449 //if there are names in this bin add to new list
450 if (newNames != "") {
451 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
452 newList.push_back(newNames);
456 //print new listvector
457 if (newList.getNumBins() != 0) {
458 wroteSomething = true;
467 if (wroteSomething == false) { m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
468 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
473 catch(exception& e) {
474 m->errorOut(e, "RemoveLineageCommand", "readList");
478 //**********************************************************************************************************************
479 int RemoveLineageCommand::readName(){
481 string thisOutputDir = outputDir;
482 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
483 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
485 m->openOutputFile(outputFileName, out);
488 m->openInputFile(namefile, in);
489 string name, firstCol, secondCol;
491 bool wroteSomething = false;
494 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
499 vector<string> parsedNames;
500 m->splitAtComma(secondCol, parsedNames);
502 vector<string> validSecond; validSecond.clear();
503 for (int i = 0; i < parsedNames.size(); i++) {
504 if (names.count(parsedNames[i]) == 0) {
505 validSecond.push_back(parsedNames[i]);
509 if ((dups) && (validSecond.size() != parsedNames.size())) { //if dups is true and we want to get rid of anyone, get rid of everyone
510 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
512 //if the name in the first column is in the set then print it and any other names in second column also in set
513 if (names.count(firstCol) == 0) {
515 wroteSomething = true;
517 out << firstCol << '\t';
519 //you know you have at least one valid second since first column is valid
520 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
521 out << validSecond[validSecond.size()-1] << endl;
523 //make first name in set you come to first column and then add the remaining names to second column
526 //you want part of this row
527 if (validSecond.size() != 0) {
529 wroteSomething = true;
531 out << validSecond[0] << '\t';
533 //you know you have at least one valid second since first column is valid
534 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
535 out << validSecond[validSecond.size()-1] << endl;
544 if (wroteSomething == false) { m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
545 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
549 catch(exception& e) {
550 m->errorOut(e, "RemoveLineageCommand", "readName");
554 //**********************************************************************************************************************
555 int RemoveLineageCommand::readCount(){
557 string thisOutputDir = outputDir;
558 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
559 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
562 m->openOutputFile(outputFileName, out);
565 m->openInputFile(countfile, in);
567 bool wroteSomething = false;
569 string headers = m->getline(in); m->gobble(in);
570 out << headers << endl;
572 string name, rest; int thisTotal;
575 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
577 in >> name; m->gobble(in);
578 in >> thisTotal; m->gobble(in);
579 rest = m->getline(in); m->gobble(in);
580 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
582 if (names.count(name) == 0) {
583 out << name << '\t' << thisTotal << '\t' << rest << endl;
584 wroteSomething = true;
590 //check for groups that have been eliminated
592 if (ct.testGroups(outputFileName)) {
593 ct.readTable(outputFileName);
594 ct.printTable(outputFileName);
597 if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
598 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
602 catch(exception& e) {
603 m->errorOut(e, "RemoveLineageCommand", "readCount");
607 //**********************************************************************************************************************
608 int RemoveLineageCommand::readGroup(){
610 string thisOutputDir = outputDir;
611 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
612 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
614 m->openOutputFile(outputFileName, out);
617 m->openInputFile(groupfile, in);
620 bool wroteSomething = false;
623 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
625 in >> name; //read from first column
626 in >> group; //read from second column
628 //if this name is in the accnos file
629 if (names.count(name) == 0) {
630 wroteSomething = true;
631 out << name << '\t' << group << endl;
639 if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
640 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
644 catch(exception& e) {
645 m->errorOut(e, "RemoveLineageCommand", "readGroup");
649 //**********************************************************************************************************************
650 int RemoveLineageCommand::readTax(){
652 string thisOutputDir = outputDir;
653 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
654 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
656 m->openOutputFile(outputFileName, out);
659 m->openInputFile(taxfile, in);
662 bool wroteSomething = false;
664 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
665 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
666 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
668 for (int i = 0; i < listOfTaxons.size(); i++) {
669 noConfidenceTaxons[i] = listOfTaxons[i];
670 int hasConPos = listOfTaxons[i].find_first_of('(');
671 if (hasConPos != string::npos) {
672 taxonsHasConfidence[i] = true;
673 searchTaxons[i] = getTaxons(listOfTaxons[i]);
674 noConfidenceTaxons[i] = listOfTaxons[i];
675 m->removeConfidences(noConfidenceTaxons[i]);
682 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
684 in >> name; //read from first column
685 in >> tax; //read from second column
689 string noQuotesTax = m->removeQuotes(tax);
691 for (int j = 0; j < listOfTaxons.size(); j++) {
692 string newtax = noQuotesTax;
694 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
695 if (!taxonsHasConfidence[j]) {
697 int hasConfidences = noQuotesTax.find_first_of('(');
698 if (hasConfidences != string::npos) {
699 newtax = noQuotesTax;
700 m->removeConfidences(newtax);
703 int pos = newtax.find(noConfidenceTaxons[j]);
705 if (pos == string::npos) {
706 //wroteSomething = true;
707 //out << name << '\t' << tax << endl;
708 }else{ //this sequence contains the taxon the user wants to remove
713 }else{//if taxons has them and you don't them remove taxons
714 int hasConfidences = noQuotesTax.find_first_of('(');
715 if (hasConfidences == string::npos) {
717 int pos = newtax.find(noConfidenceTaxons[j]);
719 if (pos == string::npos) {
720 //wroteSomething = true;
721 //out << name << '\t' << tax << endl;
722 }else{ //this sequence contains the taxon the user wants to remove
726 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
727 //first remove confidences from both and see if the taxonomy exists
729 string noNewTax = noQuotesTax;
730 int hasConfidences = noQuotesTax.find_first_of('(');
731 if (hasConfidences != string::npos) {
732 noNewTax = noQuotesTax;
733 m->removeConfidences(noNewTax);
736 int pos = noNewTax.find(noConfidenceTaxons[j]);
738 if (pos != string::npos) { //if yes, then are the confidences okay
740 vector< map<string, float> > usersTaxon = getTaxons(newtax);
742 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
743 //we want to "line them up", so we will find the the index where the searchstring starts
745 for (int i = 0; i < usersTaxon.size(); i++) {
747 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
750 bool goodspot = true;
751 //is this really the start, or are we dealing with a taxon of the same name?
752 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
753 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
757 if (goodspot) { break; }
761 for (int i = 0; i < searchTaxons[j].size(); i++) {
763 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
764 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
774 //passed the test so remove you
779 //wroteSomething = true;
780 //out << name << '\t' << tax << endl;
783 //wroteSomething = true;
784 //out << name << '\t' << tax << endl;
791 if (!remove) { wroteSomething = true; out << name << '\t' << tax << endl; }
797 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
798 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
803 catch(exception& e) {
804 m->errorOut(e, "RemoveLineageCommand", "readTax");
808 /**************************************************************************************************/
809 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
812 vector< map<string, float> > t;
814 int taxLength = tax.length();
815 for(int i=0;i<taxLength;i++){
818 int openParen = taxon.find_first_of('(');
819 int closeParen = taxon.find_last_of(')');
821 string newtaxon, confidence;
822 if ((openParen != string::npos) && (closeParen != string::npos)) {
823 newtaxon = taxon.substr(0, openParen); //rip off confidence
824 confidence = taxon.substr((openParen+1), (closeParen-openParen-1));
830 convert(confidence, con);
832 map<string, float> temp;
833 temp[newtaxon] = con;
845 catch(exception& e) {
846 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
850 //**********************************************************************************************************************
851 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
852 int RemoveLineageCommand::readAlign(){
854 string thisOutputDir = outputDir;
855 if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
856 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
859 m->openOutputFile(outputFileName, out);
862 m->openInputFile(alignfile, in);
865 bool wroteSomething = false;
867 //read column headers
868 for (int i = 0; i < 16; i++) {
869 if (!in.eof()) { in >> junk; out << junk << '\t'; }
875 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
877 in >> name; //read from first column
879 //if this name is in the accnos file
880 if (names.count(name) == 0) {
881 wroteSomething = true;
886 for (int i = 0; i < 15; i++) {
887 if (!in.eof()) { in >> junk; out << junk << '\t'; }
892 }else {//still read just don't do anything with it
895 for (int i = 0; i < 15; i++) {
896 if (!in.eof()) { in >> junk; }
906 if (wroteSomething == false) { m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
907 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
912 catch(exception& e) {
913 m->errorOut(e, "RemoveLineageCommand", "readAlign");
917 //**********************************************************************************************************************