2 * removelineagecommand.cpp
5 * Created by westcott on 9/24/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
15 //**********************************************************************************************************************
16 vector<string> RemoveLineageCommand::setParameters(){
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
19 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
20 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
21 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
22 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
23 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,true,true); parameters.push_back(ptaxonomy);
24 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
25 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon);
26 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "RemoveLineageCommand", "setParameters");
39 //**********************************************************************************************************************
40 string RemoveLineageCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
44 helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
45 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
46 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
48 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
49 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
50 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
51 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
52 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
53 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
54 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
58 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
62 //**********************************************************************************************************************
63 string RemoveLineageCommand::getOutputPattern(string type) {
67 if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
68 else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
69 else if (type == "name") { pattern = "[filename],pick,[extension]"; }
70 else if (type == "group") { pattern = "[filename],pick,[extension]"; }
71 else if (type == "count") { pattern = "[filename],pick,[extension]"; }
72 else if (type == "list") { pattern = "[filename],pick,[extension]"; }
73 else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
74 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
79 m->errorOut(e, "RemoveLineageCommand", "getOutputPattern");
84 //**********************************************************************************************************************
85 RemoveLineageCommand::RemoveLineageCommand(){
87 abort = true; calledHelp = true;
89 vector<string> tempOutNames;
90 outputTypes["fasta"] = tempOutNames;
91 outputTypes["taxonomy"] = tempOutNames;
92 outputTypes["name"] = tempOutNames;
93 outputTypes["group"] = tempOutNames;
94 outputTypes["alignreport"] = tempOutNames;
95 outputTypes["list"] = tempOutNames;
96 outputTypes["count"] = tempOutNames;
99 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
103 //**********************************************************************************************************************
104 RemoveLineageCommand::RemoveLineageCommand(string option) {
106 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters = parser.getParameters();
118 ValidParameters validParameter;
119 map<string,string>::iterator it;
121 //check to make sure all parameters are valid for command
122 for (it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["fasta"] = tempOutNames;
129 outputTypes["taxonomy"] = tempOutNames;
130 outputTypes["name"] = tempOutNames;
131 outputTypes["group"] = tempOutNames;
132 outputTypes["alignreport"] = tempOutNames;
133 outputTypes["list"] = tempOutNames;
134 outputTypes["count"] = tempOutNames;
136 //if the user changes the output directory command factory will send this info to us in the output parameter
137 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
139 //if the user changes the input directory command factory will send this info to us in the output parameter
140 string inputDir = validParameter.validFile(parameters, "inputdir", false);
141 if (inputDir == "not found"){ inputDir = ""; }
144 it = parameters.find("alignreport");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
152 it = parameters.find("fasta");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["fasta"] = inputDir + it->second; }
160 it = parameters.find("list");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["list"] = inputDir + it->second; }
168 it = parameters.find("name");
169 //user has given a template file
170 if(it != parameters.end()){
171 path = m->hasPath(it->second);
172 //if the user has not given a path then, add inputdir. else leave path alone.
173 if (path == "") { parameters["name"] = inputDir + it->second; }
176 it = parameters.find("group");
177 //user has given a template file
178 if(it != parameters.end()){
179 path = m->hasPath(it->second);
180 //if the user has not given a path then, add inputdir. else leave path alone.
181 if (path == "") { parameters["group"] = inputDir + it->second; }
184 it = parameters.find("taxonomy");
185 //user has given a template file
186 if(it != parameters.end()){
187 path = m->hasPath(it->second);
188 //if the user has not given a path then, add inputdir. else leave path alone.
189 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
192 it = parameters.find("count");
193 //user has given a template file
194 if(it != parameters.end()){
195 path = m->hasPath(it->second);
196 //if the user has not given a path then, add inputdir. else leave path alone.
197 if (path == "") { parameters["count"] = inputDir + it->second; }
202 //check for required parameters
203 fastafile = validParameter.validFile(parameters, "fasta", true);
204 if (fastafile == "not open") { fastafile = ""; abort = true; }
205 else if (fastafile == "not found") { fastafile = ""; }
206 else { m->setFastaFile(fastafile); }
208 namefile = validParameter.validFile(parameters, "name", true);
209 if (namefile == "not open") { namefile = ""; abort = true; }
210 else if (namefile == "not found") { namefile = ""; }
211 else { m->setNameFile(namefile); }
213 groupfile = validParameter.validFile(parameters, "group", true);
214 if (groupfile == "not open") { abort = true; }
215 else if (groupfile == "not found") { groupfile = ""; }
216 else { m->setGroupFile(groupfile); }
218 alignfile = validParameter.validFile(parameters, "alignreport", true);
219 if (alignfile == "not open") { abort = true; }
220 else if (alignfile == "not found") { alignfile = ""; }
222 listfile = validParameter.validFile(parameters, "list", true);
223 if (listfile == "not open") { abort = true; }
224 else if (listfile == "not found") { listfile = ""; }
225 else { m->setListFile(listfile); }
227 taxfile = validParameter.validFile(parameters, "taxonomy", true);
228 if (taxfile == "not open") { taxfile = ""; abort = true; }
229 else if (taxfile == "not found") {
230 taxfile = m->getTaxonomyFile();
231 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
232 else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
233 }else { m->setTaxonomyFile(taxfile); }
235 countfile = validParameter.validFile(parameters, "count", true);
236 if (countfile == "not open") { countfile = ""; abort = true; }
237 else if (countfile == "not found") { countfile = ""; }
238 else { m->setCountTableFile(countfile); }
240 if ((namefile != "") && (countfile != "")) {
241 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
244 if ((groupfile != "") && (countfile != "")) {
245 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
248 string usedDups = "true";
249 string temp = validParameter.validFile(parameters, "dups", false);
250 if (temp == "not found") {
251 if (namefile != "") { temp = "true"; }
252 else { temp = "false"; usedDups = ""; }
254 dups = m->isTrue(temp);
256 taxons = validParameter.validFile(parameters, "taxon", false);
257 if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine(); abort = true; }
260 if (taxons[0] == '\'') { taxons = taxons.substr(1); }
261 if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); }
263 m->splitAtChar(taxons, listOfTaxons, '-');
265 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
267 if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
269 if (countfile == "") {
270 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
271 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
272 parser.getNameFile(files);
279 catch(exception& e) {
280 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
284 //**********************************************************************************************************************
286 int RemoveLineageCommand::execute(){
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 if (m->control_pressed) { return 0; }
293 if (countfile != "") {
294 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
295 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
299 //read through the correct file and output lines you want to keep
300 if (taxfile != "") { readTax(); } //fills the set of names to remove
301 if (namefile != "") { readName(); }
302 if (fastafile != "") { readFasta(); }
303 if (groupfile != "") { readGroup(); }
304 if (alignfile != "") { readAlign(); }
305 if (listfile != "") { readList(); }
306 if (countfile != "") { readCount(); }
309 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
311 if (outputNames.size() != 0) {
312 m->mothurOutEndLine();
313 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
314 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
315 m->mothurOutEndLine();
317 //set fasta file as new current fastafile
319 itTypes = outputTypes.find("fasta");
320 if (itTypes != outputTypes.end()) {
321 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
324 itTypes = outputTypes.find("name");
325 if (itTypes != outputTypes.end()) {
326 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
329 itTypes = outputTypes.find("group");
330 if (itTypes != outputTypes.end()) {
331 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
334 itTypes = outputTypes.find("list");
335 if (itTypes != outputTypes.end()) {
336 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
339 itTypes = outputTypes.find("taxonomy");
340 if (itTypes != outputTypes.end()) {
341 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
344 itTypes = outputTypes.find("count");
345 if (itTypes != outputTypes.end()) {
346 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
353 catch(exception& e) {
354 m->errorOut(e, "RemoveLineageCommand", "execute");
359 //**********************************************************************************************************************
360 int RemoveLineageCommand::readFasta(){
362 string thisOutputDir = outputDir;
363 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
364 map<string, string> variables;
365 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
366 variables["[extension]"] = m->getExtension(fastafile);
367 string outputFileName = getOutputFileName("fasta", variables);
369 m->openOutputFile(outputFileName, out);
372 m->openInputFile(fastafile, in);
375 bool wroteSomething = false;
378 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
380 Sequence currSeq(in);
381 name = currSeq.getName();
384 //if this name is in the accnos file
385 if (names.count(name) == 0) {
386 wroteSomething = true;
388 currSeq.printSequence(out);
396 if (wroteSomething == false) { m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
397 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
402 catch(exception& e) {
403 m->errorOut(e, "RemoveLineageCommand", "readFasta");
407 //**********************************************************************************************************************
408 int RemoveLineageCommand::readList(){
410 string thisOutputDir = outputDir;
411 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
412 map<string, string> variables;
413 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
414 variables["[extension]"] = m->getExtension(listfile);
415 string outputFileName = getOutputFileName("list", variables);
417 m->openOutputFile(outputFileName, out);
420 m->openInputFile(listfile, in);
422 bool wroteSomething = false;
425 //read in list vector
428 //make a new list vector
430 newList.setLabel(list.getLabel());
433 for (int i = 0; i < list.getNumBins(); i++) {
434 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
436 //parse out names that are in accnos file
437 string binnames = list.get(i);
439 string newNames = "";
440 while (binnames.find_first_of(',') != -1) {
441 string name = binnames.substr(0,binnames.find_first_of(','));
442 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
444 //if that name is in the .accnos file, add it
445 if (names.count(name) == 0) { newNames += name + ","; }
449 if (names.count(binnames) == 0) { newNames += binnames + ","; }
451 //if there are names in this bin add to new list
452 if (newNames != "") {
453 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
454 newList.push_back(newNames);
458 //print new listvector
459 if (newList.getNumBins() != 0) {
460 wroteSomething = true;
469 if (wroteSomething == false) { m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
470 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
475 catch(exception& e) {
476 m->errorOut(e, "RemoveLineageCommand", "readList");
480 //**********************************************************************************************************************
481 int RemoveLineageCommand::readName(){
483 string thisOutputDir = outputDir;
484 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
485 map<string, string> variables;
486 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
487 variables["[extension]"] = m->getExtension(namefile);
488 string outputFileName = getOutputFileName("name", variables);
490 m->openOutputFile(outputFileName, out);
493 m->openInputFile(namefile, in);
494 string name, firstCol, secondCol;
496 bool wroteSomething = false;
499 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
504 vector<string> parsedNames;
505 m->splitAtComma(secondCol, parsedNames);
507 vector<string> validSecond; validSecond.clear();
508 for (int i = 0; i < parsedNames.size(); i++) {
509 if (names.count(parsedNames[i]) == 0) {
510 validSecond.push_back(parsedNames[i]);
514 if ((dups) && (validSecond.size() != parsedNames.size())) { //if dups is true and we want to get rid of anyone, get rid of everyone
515 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
517 //if the name in the first column is in the set then print it and any other names in second column also in set
518 if (names.count(firstCol) == 0) {
520 wroteSomething = true;
522 out << firstCol << '\t';
524 //you know you have at least one valid second since first column is valid
525 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
526 out << validSecond[validSecond.size()-1] << endl;
528 //make first name in set you come to first column and then add the remaining names to second column
531 //you want part of this row
532 if (validSecond.size() != 0) {
534 wroteSomething = true;
536 out << validSecond[0] << '\t';
538 //you know you have at least one valid second since first column is valid
539 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
540 out << validSecond[validSecond.size()-1] << endl;
549 if (wroteSomething == false) { m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
550 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
554 catch(exception& e) {
555 m->errorOut(e, "RemoveLineageCommand", "readName");
559 //**********************************************************************************************************************
560 int RemoveLineageCommand::readCount(){
562 string thisOutputDir = outputDir;
563 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
564 map<string, string> variables;
565 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
566 variables["[extension]"] = m->getExtension(countfile);
567 string outputFileName = getOutputFileName("count", variables);
571 m->openOutputFile(outputFileName, out);
574 m->openInputFile(countfile, in);
576 bool wroteSomething = false;
578 string headers = m->getline(in); m->gobble(in);
579 out << headers << endl;
581 string name, rest; int thisTotal;
584 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
586 in >> name; m->gobble(in);
587 in >> thisTotal; m->gobble(in);
588 rest = m->getline(in); m->gobble(in);
589 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
591 if (names.count(name) == 0) {
592 out << name << '\t' << thisTotal << '\t' << rest << endl;
593 wroteSomething = true;
599 //check for groups that have been eliminated
601 if (ct.testGroups(outputFileName)) {
602 ct.readTable(outputFileName);
603 ct.printTable(outputFileName);
606 if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
607 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
611 catch(exception& e) {
612 m->errorOut(e, "RemoveLineageCommand", "readCount");
616 //**********************************************************************************************************************
617 int RemoveLineageCommand::readGroup(){
619 string thisOutputDir = outputDir;
620 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
621 map<string, string> variables;
622 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
623 variables["[extension]"] = m->getExtension(groupfile);
624 string outputFileName = getOutputFileName("group", variables);
627 m->openOutputFile(outputFileName, out);
630 m->openInputFile(groupfile, in);
633 bool wroteSomething = false;
636 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
638 in >> name; //read from first column
639 in >> group; //read from second column
641 //if this name is in the accnos file
642 if (names.count(name) == 0) {
643 wroteSomething = true;
644 out << name << '\t' << group << endl;
652 if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
653 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
657 catch(exception& e) {
658 m->errorOut(e, "RemoveLineageCommand", "readGroup");
662 //**********************************************************************************************************************
663 int RemoveLineageCommand::readTax(){
665 string thisOutputDir = outputDir;
666 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
667 map<string, string> variables;
668 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
669 variables["[extension]"] = m->getExtension(taxfile);
670 string outputFileName = getOutputFileName("taxonomy", variables);
672 m->openOutputFile(outputFileName, out);
675 m->openInputFile(taxfile, in);
678 bool wroteSomething = false;
680 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
681 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
682 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
684 for (int i = 0; i < listOfTaxons.size(); i++) {
685 noConfidenceTaxons[i] = listOfTaxons[i];
686 int hasConPos = listOfTaxons[i].find_first_of('(');
687 if (hasConPos != string::npos) {
688 taxonsHasConfidence[i] = true;
689 searchTaxons[i] = getTaxons(listOfTaxons[i]);
690 noConfidenceTaxons[i] = listOfTaxons[i];
691 m->removeConfidences(noConfidenceTaxons[i]);
698 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
700 in >> name; //read from first column
701 in >> tax; //read from second column
705 string noQuotesTax = m->removeQuotes(tax);
707 for (int j = 0; j < listOfTaxons.size(); j++) {
708 string newtax = noQuotesTax;
710 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
711 if (!taxonsHasConfidence[j]) {
713 int hasConfidences = noQuotesTax.find_first_of('(');
714 if (hasConfidences != string::npos) {
715 newtax = noQuotesTax;
716 m->removeConfidences(newtax);
719 int pos = newtax.find(noConfidenceTaxons[j]);
721 if (pos == string::npos) {
722 //wroteSomething = true;
723 //out << name << '\t' << tax << endl;
724 }else{ //this sequence contains the taxon the user wants to remove
729 }else{//if taxons has them and you don't them remove taxons
730 int hasConfidences = noQuotesTax.find_first_of('(');
731 if (hasConfidences == string::npos) {
733 int pos = newtax.find(noConfidenceTaxons[j]);
735 if (pos == string::npos) {
736 //wroteSomething = true;
737 //out << name << '\t' << tax << endl;
738 }else{ //this sequence contains the taxon the user wants to remove
742 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
743 //first remove confidences from both and see if the taxonomy exists
745 string noNewTax = noQuotesTax;
746 int hasConfidences = noQuotesTax.find_first_of('(');
747 if (hasConfidences != string::npos) {
748 noNewTax = noQuotesTax;
749 m->removeConfidences(noNewTax);
752 int pos = noNewTax.find(noConfidenceTaxons[j]);
754 if (pos != string::npos) { //if yes, then are the confidences okay
756 vector< map<string, float> > usersTaxon = getTaxons(newtax);
758 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
759 //we want to "line them up", so we will find the the index where the searchstring starts
761 for (int i = 0; i < usersTaxon.size(); i++) {
763 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
766 bool goodspot = true;
767 //is this really the start, or are we dealing with a taxon of the same name?
768 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
769 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
773 if (goodspot) { break; }
777 for (int i = 0; i < searchTaxons[j].size(); i++) {
779 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
780 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
790 //passed the test so remove you
795 //wroteSomething = true;
796 //out << name << '\t' << tax << endl;
799 //wroteSomething = true;
800 //out << name << '\t' << tax << endl;
807 if (!remove) { wroteSomething = true; out << name << '\t' << tax << endl; }
813 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
814 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
819 catch(exception& e) {
820 m->errorOut(e, "RemoveLineageCommand", "readTax");
824 /**************************************************************************************************/
825 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
828 vector< map<string, float> > t;
830 int taxLength = tax.length();
831 for(int i=0;i<taxLength;i++){
834 int openParen = taxon.find_last_of('(');
835 int closeParen = taxon.find_last_of(')');
837 string newtaxon, confidence;
838 if ((openParen != string::npos) && (closeParen != string::npos)) {
839 string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
840 if (m->isNumeric1(confidenceScore)) { //its a confidence
841 newtaxon = taxon.substr(0, openParen); //rip off confidence
842 confidence = taxon.substr((openParen+1), (closeParen-openParen-1));
843 }else { //its part of the taxon
852 convert(confidence, con);
854 map<string, float> temp;
855 temp[newtaxon] = con;
866 catch(exception& e) {
867 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
871 //**********************************************************************************************************************
872 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
873 int RemoveLineageCommand::readAlign(){
875 string thisOutputDir = outputDir;
876 if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
877 map<string, string> variables;
878 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
879 variables["[extension]"] = m->getExtension(alignfile);
880 string outputFileName = getOutputFileName("alignreport", variables);
883 m->openOutputFile(outputFileName, out);
886 m->openInputFile(alignfile, in);
889 bool wroteSomething = false;
891 //read column headers
892 for (int i = 0; i < 16; i++) {
893 if (!in.eof()) { in >> junk; out << junk << '\t'; }
899 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
901 in >> name; //read from first column
903 //if this name is in the accnos file
904 if (names.count(name) == 0) {
905 wroteSomething = true;
910 for (int i = 0; i < 15; i++) {
911 if (!in.eof()) { in >> junk; out << junk << '\t'; }
916 }else {//still read just don't do anything with it
919 for (int i = 0; i < 15; i++) {
920 if (!in.eof()) { in >> junk; }
930 if (wroteSomething == false) { m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); }
931 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
936 catch(exception& e) {
937 m->errorOut(e, "RemoveLineageCommand", "readAlign");
941 //**********************************************************************************************************************