2 * getlineagecommand.cpp
5 * Created by westcott on 9/24/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
14 //**********************************************************************************************************************
15 vector<string> GetLineageCommand::setParameters(){
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
19 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
20 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
22 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
23 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
24 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
25 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29 vector<string> myArray;
30 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
34 m->errorOut(e, "GetLineageCommand", "setParameters");
38 //**********************************************************************************************************************
39 string GetLineageCommand::getHelpString(){
41 string helpString = "";
42 helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
43 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
44 helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, taxonomy, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n";
45 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
46 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
47 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
48 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
49 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
50 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
51 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
52 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
53 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
57 m->errorOut(e, "GetLineageCommand", "getHelpString");
61 //**********************************************************************************************************************
62 string GetLineageCommand::getOutputFileNameTag(string type, string inputName=""){
64 string outputFileName = "";
65 map<string, vector<string> >::iterator it;
67 //is this a type this command creates
68 it = outputTypes.find(type);
69 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
71 if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
72 else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
73 else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
74 else if (type == "count") { outputFileName = "pick.count.table"; }
75 else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
76 else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
77 else if (type == "alignreport") { outputFileName = "pick.align.report"; }
78 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
80 return outputFileName;
83 m->errorOut(e, "GetLineageCommand", "getOutputFileNameTag");
87 //**********************************************************************************************************************
88 GetLineageCommand::GetLineageCommand(){
90 abort = true; calledHelp = true;
92 vector<string> tempOutNames;
93 outputTypes["fasta"] = tempOutNames;
94 outputTypes["taxonomy"] = tempOutNames;
95 outputTypes["name"] = tempOutNames;
96 outputTypes["group"] = tempOutNames;
97 outputTypes["alignreport"] = tempOutNames;
98 outputTypes["list"] = tempOutNames;
99 outputTypes["count"] = tempOutNames;
101 catch(exception& e) {
102 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
106 //**********************************************************************************************************************
107 GetLineageCommand::GetLineageCommand(string option) {
109 abort = false; calledHelp = false;
111 //allow user to run help
112 if(option == "help") { help(); abort = true; calledHelp = true; }
113 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
116 vector<string> myArray = setParameters();
118 OptionParser parser(option);
119 map<string,string> parameters = parser.getParameters();
121 ValidParameters validParameter;
122 map<string,string>::iterator it;
124 //check to make sure all parameters are valid for command
125 for (it = parameters.begin(); it != parameters.end(); it++) {
126 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
129 //initialize outputTypes
130 vector<string> tempOutNames;
131 outputTypes["fasta"] = tempOutNames;
132 outputTypes["taxonomy"] = tempOutNames;
133 outputTypes["name"] = tempOutNames;
134 outputTypes["group"] = tempOutNames;
135 outputTypes["alignreport"] = tempOutNames;
136 outputTypes["list"] = tempOutNames;
137 outputTypes["count"] = tempOutNames;
139 //if the user changes the output directory command factory will send this info to us in the output parameter
140 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
142 //if the user changes the input directory command factory will send this info to us in the output parameter
143 string inputDir = validParameter.validFile(parameters, "inputdir", false);
144 if (inputDir == "not found"){ inputDir = ""; }
147 it = parameters.find("alignreport");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
155 it = parameters.find("fasta");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["fasta"] = inputDir + it->second; }
163 it = parameters.find("list");
164 //user has given a template file
165 if(it != parameters.end()){
166 path = m->hasPath(it->second);
167 //if the user has not given a path then, add inputdir. else leave path alone.
168 if (path == "") { parameters["list"] = inputDir + it->second; }
171 it = parameters.find("name");
172 //user has given a template file
173 if(it != parameters.end()){
174 path = m->hasPath(it->second);
175 //if the user has not given a path then, add inputdir. else leave path alone.
176 if (path == "") { parameters["name"] = inputDir + it->second; }
179 it = parameters.find("group");
180 //user has given a template file
181 if(it != parameters.end()){
182 path = m->hasPath(it->second);
183 //if the user has not given a path then, add inputdir. else leave path alone.
184 if (path == "") { parameters["group"] = inputDir + it->second; }
187 it = parameters.find("taxonomy");
188 //user has given a template file
189 if(it != parameters.end()){
190 path = m->hasPath(it->second);
191 //if the user has not given a path then, add inputdir. else leave path alone.
192 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
195 it = parameters.find("count");
196 //user has given a template file
197 if(it != parameters.end()){
198 path = m->hasPath(it->second);
199 //if the user has not given a path then, add inputdir. else leave path alone.
200 if (path == "") { parameters["count"] = inputDir + it->second; }
205 //check for required parameters
206 fastafile = validParameter.validFile(parameters, "fasta", true);
207 if (fastafile == "not open") { fastafile = ""; abort = true; }
208 else if (fastafile == "not found") { fastafile = ""; }
209 else { m->setFastaFile(fastafile); }
211 namefile = validParameter.validFile(parameters, "name", true);
212 if (namefile == "not open") { namefile = ""; abort = true; }
213 else if (namefile == "not found") { namefile = ""; }
214 else { m->setNameFile(namefile); }
216 groupfile = validParameter.validFile(parameters, "group", true);
217 if (groupfile == "not open") { abort = true; }
218 else if (groupfile == "not found") { groupfile = ""; }
219 else { m->setGroupFile(groupfile); }
221 alignfile = validParameter.validFile(parameters, "alignreport", true);
222 if (alignfile == "not open") { abort = true; }
223 else if (alignfile == "not found") { alignfile = ""; }
225 listfile = validParameter.validFile(parameters, "list", true);
226 if (listfile == "not open") { abort = true; }
227 else if (listfile == "not found") { listfile = ""; }
228 else { m->setListFile(listfile); }
230 taxfile = validParameter.validFile(parameters, "taxonomy", true);
231 if (taxfile == "not open") { taxfile = ""; abort = true; }
232 else if (taxfile == "not found") {
233 taxfile = m->getTaxonomyFile();
234 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
235 else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
236 }else { m->setTaxonomyFile(taxfile); }
238 string usedDups = "true";
239 string temp = validParameter.validFile(parameters, "dups", false);
240 if (temp == "not found") {
241 if (namefile != "") { temp = "true"; }
242 else { temp = "false"; usedDups = ""; }
244 dups = m->isTrue(temp);
246 countfile = validParameter.validFile(parameters, "count", true);
247 if (countfile == "not open") { countfile = ""; abort = true; }
248 else if (countfile == "not found") { countfile = ""; }
249 else { m->setCountTableFile(countfile); }
251 if ((namefile != "") && (countfile != "")) {
252 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
255 if ((groupfile != "") && (countfile != "")) {
256 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
259 taxons = validParameter.validFile(parameters, "taxon", false);
260 if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine(); abort = true; }
263 if (taxons[0] == '\'') { taxons = taxons.substr(1); }
264 if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); }
266 m->splitAtChar(taxons, listOfTaxons, '-');
268 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
270 if (countfile == "") {
271 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
272 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
273 parser.getNameFile(files);
279 catch(exception& e) {
280 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
284 //**********************************************************************************************************************
286 int GetLineageCommand::execute(){
289 if (abort == true) { if (calledHelp) { return 0; } return 2; }
291 if (m->control_pressed) { return 0; }
293 if (countfile != "") {
294 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
295 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
299 //read through the correct file and output lines you want to keep
300 if (taxfile != "") { readTax(); } //fills the set of names to get
301 if (namefile != "") { readName(); }
302 if (fastafile != "") { readFasta(); }
303 if (countfile != "") { readCount(); }
304 if (groupfile != "") { readGroup(); }
305 if (alignfile != "") { readAlign(); }
306 if (listfile != "") { readList(); }
309 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
311 if (outputNames.size() != 0) {
312 m->mothurOutEndLine();
313 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
314 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
315 m->mothurOutEndLine();
317 //set fasta file as new current fastafile
319 itTypes = outputTypes.find("fasta");
320 if (itTypes != outputTypes.end()) {
321 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
324 itTypes = outputTypes.find("name");
325 if (itTypes != outputTypes.end()) {
326 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
329 itTypes = outputTypes.find("group");
330 if (itTypes != outputTypes.end()) {
331 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
334 itTypes = outputTypes.find("list");
335 if (itTypes != outputTypes.end()) {
336 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
339 itTypes = outputTypes.find("taxonomy");
340 if (itTypes != outputTypes.end()) {
341 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
344 itTypes = outputTypes.find("count");
345 if (itTypes != outputTypes.end()) {
346 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
353 catch(exception& e) {
354 m->errorOut(e, "GetLineageCommand", "execute");
359 //**********************************************************************************************************************
360 int GetLineageCommand::readFasta(){
362 string thisOutputDir = outputDir;
363 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
364 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
366 m->openOutputFile(outputFileName, out);
370 m->openInputFile(fastafile, in);
373 bool wroteSomething = false;
377 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
379 Sequence currSeq(in);
380 name = currSeq.getName();
383 //if this name is in the accnos file
384 if (names.count(name) != 0) {
385 wroteSomething = true;
387 currSeq.printSequence(out);
395 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
396 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
401 catch(exception& e) {
402 m->errorOut(e, "GetLineageCommand", "readFasta");
406 //**********************************************************************************************************************
407 int GetLineageCommand::readCount(){
409 string thisOutputDir = outputDir;
410 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
411 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
414 m->openOutputFile(outputFileName, out);
417 m->openInputFile(countfile, in);
419 bool wroteSomething = false;
421 string headers = m->getline(in); m->gobble(in);
422 out << headers << endl;
424 string name, rest; int thisTotal;
427 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
429 in >> name; m->gobble(in);
430 in >> thisTotal; m->gobble(in);
431 rest = m->getline(in); m->gobble(in);
432 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
434 if (names.count(name) != 0) {
435 out << name << '\t' << thisTotal << '\t' << rest << endl;
436 wroteSomething = true;
442 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
443 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
447 catch(exception& e) {
448 m->errorOut(e, "GetLineageCommand", "readCount");
452 //**********************************************************************************************************************
453 int GetLineageCommand::readList(){
455 string thisOutputDir = outputDir;
456 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
457 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
459 m->openOutputFile(outputFileName, out);
462 m->openInputFile(listfile, in);
464 bool wroteSomething = false;
468 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
470 //read in list vector
473 //make a new list vector
475 newList.setLabel(list.getLabel());
478 for (int i = 0; i < list.getNumBins(); i++) {
480 //parse out names that are in accnos file
481 string binnames = list.get(i);
483 string newNames = "";
484 while (binnames.find_first_of(',') != -1) {
485 string name = binnames.substr(0,binnames.find_first_of(','));
486 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
488 //if that name is in the .accnos file, add it
489 if (names.count(name) != 0) { newNames += name + ","; }
493 if (names.count(binnames) != 0) { newNames += binnames + ","; }
495 //if there are names in this bin add to new list
496 if (newNames != "") {
497 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
498 newList.push_back(newNames);
502 //print new listvector
503 if (newList.getNumBins() != 0) {
504 wroteSomething = true;
513 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
514 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
519 catch(exception& e) {
520 m->errorOut(e, "GetLineageCommand", "readList");
524 //**********************************************************************************************************************
525 int GetLineageCommand::readName(){
527 string thisOutputDir = outputDir;
528 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
529 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
531 m->openOutputFile(outputFileName, out);
535 m->openInputFile(namefile, in);
536 string name, firstCol, secondCol;
538 bool wroteSomething = false;
543 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
549 if (dups) { hold = secondCol; }
551 vector<string> parsedNames;
552 m->splitAtComma(secondCol, parsedNames);
554 vector<string> validSecond;
555 for (int i = 0; i < parsedNames.size(); i++) {
556 if (names.count(parsedNames[i]) != 0) {
557 validSecond.push_back(parsedNames[i]);
561 if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
562 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
563 out << firstCol << '\t' << hold << endl;
564 wroteSomething = true;
566 //if the name in the first column is in the set then print it and any other names in second column also in set
567 if (names.count(firstCol) != 0) {
569 wroteSomething = true;
571 out << firstCol << '\t';
573 //you know you have at least one valid second since first column is valid
574 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
575 out << validSecond[validSecond.size()-1] << endl;
578 //make first name in set you come to first column and then add the remaining names to second column
580 //you want part of this row
581 if (validSecond.size() != 0) {
583 wroteSomething = true;
585 out << validSecond[0] << '\t';
587 //you know you have at least one valid second since first column is valid
588 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
589 out << validSecond[validSecond.size()-1] << endl;
598 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
599 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
604 catch(exception& e) {
605 m->errorOut(e, "GetLineageCommand", "readName");
610 //**********************************************************************************************************************
611 int GetLineageCommand::readGroup(){
613 string thisOutputDir = outputDir;
614 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
615 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
617 m->openOutputFile(outputFileName, out);
621 m->openInputFile(groupfile, in);
624 bool wroteSomething = false;
628 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
631 in >> name; //read from first column
632 in >> group; //read from second column
634 //if this name is in the accnos file
635 if (names.count(name) != 0) {
636 wroteSomething = true;
638 out << name << '\t' << group << endl;
646 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
647 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
652 catch(exception& e) {
653 m->errorOut(e, "GetLineageCommand", "readGroup");
657 //**********************************************************************************************************************
658 int GetLineageCommand::readTax(){
660 string thisOutputDir = outputDir;
661 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
662 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
664 m->openOutputFile(outputFileName, out);
667 m->openInputFile(taxfile, in);
670 //bool wroteSomething = false;
671 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
672 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
673 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
675 for (int i = 0; i < listOfTaxons.size(); i++) {
676 noConfidenceTaxons[i] = listOfTaxons[i];
677 int hasConPos = listOfTaxons[i].find_first_of('(');
678 if (hasConPos != string::npos) {
679 taxonsHasConfidence[i] = true;
680 searchTaxons[i] = getTaxons(listOfTaxons[i]);
681 noConfidenceTaxons[i] = listOfTaxons[i];
682 m->removeConfidences(noConfidenceTaxons[i]);
689 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
691 in >> name; //read from first column
692 in >> tax; //read from second column
694 for (int j = 0; j < listOfTaxons.size(); j++) {
698 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
699 if (!taxonsHasConfidence[j]) {
700 int hasConfidences = tax.find_first_of('(');
701 if (hasConfidences != string::npos) {
703 m->removeConfidences(newtax);
706 int pos = newtax.find(noConfidenceTaxons[j]);
708 if (pos != string::npos) { //this sequence contains the taxon the user wants
710 out << name << '\t' << tax << endl;
711 //since you belong to at least one of the taxons we want you are included so no need to search for other
714 }else{//if listOfTaxons[i] has them and you don't them remove taxons
715 int hasConfidences = tax.find_first_of('(');
716 if (hasConfidences == string::npos) {
718 int pos = newtax.find(noConfidenceTaxons[j]);
720 if (pos != string::npos) { //this sequence contains the taxon the user wants
722 out << name << '\t' << tax << endl;
723 //since you belong to at least one of the taxons we want you are included so no need to search for other
726 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
727 //first remove confidences from both and see if the taxonomy exists
729 string noNewTax = tax;
730 int hasConfidences = tax.find_first_of('(');
731 if (hasConfidences != string::npos) {
733 m->removeConfidences(noNewTax);
736 int pos = noNewTax.find(noConfidenceTaxons[j]);
738 if (pos != string::npos) { //if yes, then are the confidences okay
741 vector< map<string, float> > usersTaxon = getTaxons(newtax);
743 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
744 //we want to "line them up", so we will find the the index where the searchstring starts
746 for (int i = 0; i < usersTaxon.size(); i++) {
748 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
751 bool goodspot = true;
752 //is this really the start, or are we dealing with a taxon of the same name?
753 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
754 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
758 if (goodspot) { break; }
762 for (int i = 0; i < searchTaxons[j].size(); i++) {
764 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
765 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
775 //passed the test so add you
778 out << name << '\t' << tax << endl;
792 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
793 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
798 catch(exception& e) {
799 m->errorOut(e, "GetLineageCommand", "readTax");
803 /**************************************************************************************************/
804 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
807 vector< map<string, float> > t;
809 int taxLength = tax.length();
810 for(int i=0;i<taxLength;i++){
813 int openParen = taxon.find_first_of('(');
814 int closeParen = taxon.find_last_of(')');
816 string newtaxon, confidence;
817 if ((openParen != string::npos) && (closeParen != string::npos)) {
818 newtaxon = taxon.substr(0, openParen); //rip off confidence
819 confidence = taxon.substr((openParen+1), (closeParen-openParen-1));
825 convert(confidence, con);
827 map<string, float> temp;
828 temp[newtaxon] = con;
840 catch(exception& e) {
841 m->errorOut(e, "GetLineageCommand", "getTaxons");
845 //**********************************************************************************************************************
846 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
847 int GetLineageCommand::readAlign(){
849 string thisOutputDir = outputDir;
850 if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
851 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
853 m->openOutputFile(outputFileName, out);
857 m->openInputFile(alignfile, in);
860 bool wroteSomething = false;
862 //read column headers
863 for (int i = 0; i < 16; i++) {
864 if (!in.eof()) { in >> junk; out << junk << '\t'; }
871 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
874 in >> name; //read from first column
876 //if this name is in the accnos file
877 if (names.count(name) != 0) {
878 wroteSomething = true;
883 for (int i = 0; i < 15; i++) {
884 if (!in.eof()) { in >> junk; out << junk << '\t'; }
889 }else {//still read just don't do anything with it
891 for (int i = 0; i < 15; i++) {
892 if (!in.eof()) { in >> junk; }
902 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
903 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
908 catch(exception& e) {
909 m->errorOut(e, "GetLineageCommand", "readAlign");
913 //**********************************************************************************************************************