2 * getlineagecommand.cpp
5 * Created by westcott on 9/24/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> GetLineageCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false, true); parameters.push_back(pfasta);
20 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false, true); parameters.push_back(pname);
21 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false, true); parameters.push_back(pcount);
22 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false, true); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
24 CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
25 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
26 CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
27 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
28 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
29 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true, true); parameters.push_back(ptaxon);
30 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
31 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
34 vector<string> myArray;
35 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
39 m->errorOut(e, "GetLineageCommand", "setParameters");
43 //**********************************************************************************************************************
44 string GetLineageCommand::getHelpString(){
46 string helpString = "";
47 helpString += "The get.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n";
48 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
49 helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups. You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n";
50 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
51 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
52 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
53 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
54 helpString += "The label parameter is used to analyze specific labels in your input. \n";
55 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
56 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
57 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
58 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
59 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
63 m->errorOut(e, "GetLineageCommand", "getHelpString");
67 //**********************************************************************************************************************
68 string GetLineageCommand::getOutputPattern(string type) {
72 if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
73 else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
74 else if (type == "constaxonomy") { pattern = "[filename],pick,[extension]"; }
75 else if (type == "name") { pattern = "[filename],pick,[extension]"; }
76 else if (type == "group") { pattern = "[filename],pick,[extension]"; }
77 else if (type == "count") { pattern = "[filename],pick,[extension]"; }
78 else if (type == "list") { pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]"; }
79 else if (type == "shared") { pattern = "[filename],[distance],pick,[extension]"; }
80 else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
81 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
86 m->errorOut(e, "GetLineageCommand", "getOutputPattern");
90 //**********************************************************************************************************************
91 GetLineageCommand::GetLineageCommand(){
93 abort = true; calledHelp = true;
95 vector<string> tempOutNames;
96 outputTypes["fasta"] = tempOutNames;
97 outputTypes["taxonomy"] = tempOutNames;
98 outputTypes["name"] = tempOutNames;
99 outputTypes["group"] = tempOutNames;
100 outputTypes["alignreport"] = tempOutNames;
101 outputTypes["list"] = tempOutNames;
102 outputTypes["count"] = tempOutNames;
103 outputTypes["constaxonomy"] = tempOutNames;
104 outputTypes["shared"] = tempOutNames;
106 catch(exception& e) {
107 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
111 //**********************************************************************************************************************
112 GetLineageCommand::GetLineageCommand(string option) {
114 abort = false; calledHelp = false;
116 //allow user to run help
117 if(option == "help") { help(); abort = true; calledHelp = true; }
118 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
121 vector<string> myArray = setParameters();
123 OptionParser parser(option);
124 map<string,string> parameters = parser.getParameters();
126 ValidParameters validParameter;
127 map<string,string>::iterator it;
129 //check to make sure all parameters are valid for command
130 for (it = parameters.begin(); it != parameters.end(); it++) {
131 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
134 //initialize outputTypes
135 vector<string> tempOutNames;
136 outputTypes["fasta"] = tempOutNames;
137 outputTypes["taxonomy"] = tempOutNames;
138 outputTypes["name"] = tempOutNames;
139 outputTypes["group"] = tempOutNames;
140 outputTypes["alignreport"] = tempOutNames;
141 outputTypes["list"] = tempOutNames;
142 outputTypes["count"] = tempOutNames;
143 outputTypes["constaxonomy"] = tempOutNames;
144 outputTypes["shared"] = tempOutNames;
146 //if the user changes the output directory command factory will send this info to us in the output parameter
147 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
149 //if the user changes the input directory command factory will send this info to us in the output parameter
150 string inputDir = validParameter.validFile(parameters, "inputdir", false);
151 if (inputDir == "not found"){ inputDir = ""; }
154 it = parameters.find("alignreport");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
162 it = parameters.find("fasta");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["fasta"] = inputDir + it->second; }
170 it = parameters.find("list");
171 //user has given a template file
172 if(it != parameters.end()){
173 path = m->hasPath(it->second);
174 //if the user has not given a path then, add inputdir. else leave path alone.
175 if (path == "") { parameters["list"] = inputDir + it->second; }
178 it = parameters.find("name");
179 //user has given a template file
180 if(it != parameters.end()){
181 path = m->hasPath(it->second);
182 //if the user has not given a path then, add inputdir. else leave path alone.
183 if (path == "") { parameters["name"] = inputDir + it->second; }
186 it = parameters.find("group");
187 //user has given a template file
188 if(it != parameters.end()){
189 path = m->hasPath(it->second);
190 //if the user has not given a path then, add inputdir. else leave path alone.
191 if (path == "") { parameters["group"] = inputDir + it->second; }
194 it = parameters.find("taxonomy");
195 //user has given a template file
196 if(it != parameters.end()){
197 path = m->hasPath(it->second);
198 //if the user has not given a path then, add inputdir. else leave path alone.
199 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
202 it = parameters.find("count");
203 //user has given a template file
204 if(it != parameters.end()){
205 path = m->hasPath(it->second);
206 //if the user has not given a path then, add inputdir. else leave path alone.
207 if (path == "") { parameters["count"] = inputDir + it->second; }
210 it = parameters.find("constaxonomy");
211 //user has given a template file
212 if(it != parameters.end()){
213 path = m->hasPath(it->second);
214 //if the user has not given a path then, add inputdir. else leave path alone.
215 if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
218 it = parameters.find("shared");
219 //user has given a template file
220 if(it != parameters.end()){
221 path = m->hasPath(it->second);
222 //if the user has not given a path then, add inputdir. else leave path alone.
223 if (path == "") { parameters["shared"] = inputDir + it->second; }
228 //check for required parameters
229 fastafile = validParameter.validFile(parameters, "fasta", true);
230 if (fastafile == "not open") { fastafile = ""; abort = true; }
231 else if (fastafile == "not found") { fastafile = ""; }
232 else { m->setFastaFile(fastafile); }
234 namefile = validParameter.validFile(parameters, "name", true);
235 if (namefile == "not open") { namefile = ""; abort = true; }
236 else if (namefile == "not found") { namefile = ""; }
237 else { m->setNameFile(namefile); }
239 groupfile = validParameter.validFile(parameters, "group", true);
240 if (groupfile == "not open") { abort = true; }
241 else if (groupfile == "not found") { groupfile = ""; }
242 else { m->setGroupFile(groupfile); }
244 alignfile = validParameter.validFile(parameters, "alignreport", true);
245 if (alignfile == "not open") { abort = true; }
246 else if (alignfile == "not found") { alignfile = ""; }
248 listfile = validParameter.validFile(parameters, "list", true);
249 if (listfile == "not open") { abort = true; }
250 else if (listfile == "not found") { listfile = ""; }
251 else { m->setListFile(listfile); }
253 taxfile = validParameter.validFile(parameters, "taxonomy", true);
254 if (taxfile == "not open") { taxfile = ""; abort = true; }
255 else if (taxfile == "not found") { taxfile = ""; }
256 else { m->setTaxonomyFile(taxfile); }
258 sharedfile = validParameter.validFile(parameters, "shared", true);
259 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
260 else if (sharedfile == "not found") { sharedfile = ""; }
261 else { m->setSharedFile(sharedfile); }
264 constaxonomy = validParameter.validFile(parameters, "constaxonomy", true);
265 if (constaxonomy == "not open") { constaxonomy = ""; abort = true; }
266 else if (constaxonomy == "not found") { constaxonomy = ""; }
268 if ((constaxonomy == "") && (taxfile == "")) {
269 taxfile = m->getTaxonomyFile();
270 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
272 m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
275 string usedDups = "true";
276 string temp = validParameter.validFile(parameters, "dups", false);
277 if (temp == "not found") {
278 if (namefile != "") { temp = "true"; }
279 else { temp = "false"; usedDups = ""; }
281 dups = m->isTrue(temp);
283 countfile = validParameter.validFile(parameters, "count", true);
284 if (countfile == "not open") { countfile = ""; abort = true; }
285 else if (countfile == "not found") { countfile = ""; }
286 else { m->setCountTableFile(countfile); }
288 if ((namefile != "") && (countfile != "")) {
289 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
292 if ((groupfile != "") && (countfile != "")) {
293 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
296 taxons = validParameter.validFile(parameters, "taxon", false);
297 if (taxons == "not found") { taxons = ""; m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine(); abort = true; }
300 if (taxons[0] == '\'') { taxons = taxons.substr(1); }
301 if (taxons[(taxons.length()-1)] == '\'') { taxons = taxons.substr(0, (taxons.length()-1)); }
303 m->splitAtChar(taxons, listOfTaxons, '-');
305 if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile."); m->mothurOutEndLine(); abort = true; }
307 if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) {
308 m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n"); abort = true;
311 if ((constaxonomy != "") && (taxfile != "")) {
312 m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true;
315 if ((sharedfile != "") && (taxfile != "")) {
316 m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true;
319 if ((sharedfile != "") || (listfile != "")) {
320 label = validParameter.validFile(parameters, "label", false);
321 if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); }
324 if (countfile == "") {
325 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
326 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
327 parser.getNameFile(files);
333 catch(exception& e) {
334 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
338 //**********************************************************************************************************************
340 int GetLineageCommand::execute(){
343 if (abort == true) { if (calledHelp) { return 0; } return 2; }
345 if (m->control_pressed) { return 0; }
347 if (countfile != "") {
348 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
349 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
353 //read through the correct file and output lines you want to keep
355 readTax(); //fills the set of names to get
356 if (namefile != "") { readName(); }
357 if (fastafile != "") { readFasta(); }
358 if (countfile != "") { readCount(); }
359 if (groupfile != "") { readGroup(); }
360 if (alignfile != "") { readAlign(); }
361 if (listfile != "") { readList(); }
365 if (listfile != "") { readConsList(); }
366 if (sharedfile != "") { readShared(); }
370 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
372 if (outputNames.size() != 0) {
373 m->mothurOutEndLine();
374 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
375 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
376 m->mothurOutEndLine();
378 //set fasta file as new current fastafile
380 itTypes = outputTypes.find("fasta");
381 if (itTypes != outputTypes.end()) {
382 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
385 itTypes = outputTypes.find("name");
386 if (itTypes != outputTypes.end()) {
387 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
390 itTypes = outputTypes.find("group");
391 if (itTypes != outputTypes.end()) {
392 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
395 itTypes = outputTypes.find("list");
396 if (itTypes != outputTypes.end()) {
397 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
400 itTypes = outputTypes.find("shared");
401 if (itTypes != outputTypes.end()) {
402 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
405 itTypes = outputTypes.find("taxonomy");
406 if (itTypes != outputTypes.end()) {
407 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
410 itTypes = outputTypes.find("count");
411 if (itTypes != outputTypes.end()) {
412 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
419 catch(exception& e) {
420 m->errorOut(e, "GetLineageCommand", "execute");
425 //**********************************************************************************************************************
426 int GetLineageCommand::readFasta(){
428 string thisOutputDir = outputDir;
429 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
430 map<string, string> variables;
431 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
432 variables["[extension]"] = m->getExtension(fastafile);
433 string outputFileName = getOutputFileName("fasta", variables);
435 m->openOutputFile(outputFileName, out);
439 m->openInputFile(fastafile, in);
442 bool wroteSomething = false;
446 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
448 Sequence currSeq(in);
449 name = currSeq.getName();
452 //if this name is in the accnos file
453 if (names.count(name) != 0) {
454 wroteSomething = true;
456 currSeq.printSequence(out);
464 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
465 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
470 catch(exception& e) {
471 m->errorOut(e, "GetLineageCommand", "readFasta");
475 //**********************************************************************************************************************
476 int GetLineageCommand::readCount(){
478 string thisOutputDir = outputDir;
479 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
480 map<string, string> variables;
481 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
482 variables["[extension]"] = m->getExtension(countfile);
483 string outputFileName = getOutputFileName("count", variables);
486 m->openOutputFile(outputFileName, out);
489 m->openInputFile(countfile, in);
491 bool wroteSomething = false;
493 string headers = m->getline(in); m->gobble(in);
494 out << headers << endl;
496 string name, rest; int thisTotal;
499 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
501 in >> name; m->gobble(in);
502 in >> thisTotal; m->gobble(in);
503 rest = m->getline(in); m->gobble(in);
504 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
506 if (names.count(name) != 0) {
507 out << name << '\t' << thisTotal << '\t' << rest << endl;
508 wroteSomething = true;
514 //check for groups that have been eliminated
516 if (ct.testGroups(outputFileName)) {
517 ct.readTable(outputFileName, true);
518 ct.printTable(outputFileName);
522 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
523 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
527 catch(exception& e) {
528 m->errorOut(e, "GetLineageCommand", "readCount");
532 //**********************************************************************************************************************
533 int GetLineageCommand::readList(){
535 string thisOutputDir = outputDir;
536 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
537 map<string, string> variables;
538 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
539 variables["[extension]"] = m->getExtension(listfile);
540 string outputFileName = getOutputFileName("list", variables);
542 m->openOutputFile(outputFileName, out);
545 m->openInputFile(listfile, in);
547 bool wroteSomething = false;
551 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
553 //read in list vector
556 //make a new list vector
558 newList.setLabel(list.getLabel());
561 for (int i = 0; i < list.getNumBins(); i++) {
563 //parse out names that are in accnos file
564 string binnames = list.get(i);
565 vector<string> bnames;
566 m->splitAtComma(binnames, bnames);
568 string newNames = "";
569 for (int j = 0; j < bnames.size(); j++) {
570 string name = bnames[j];
571 //if that name is in the .accnos file, add it
572 if (names.count(name) != 0) { newNames += name + ","; }
576 //if there are names in this bin add to new list
577 if (newNames != "") {
578 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
579 newList.push_back(newNames);
583 //print new listvector
584 if (newList.getNumBins() != 0) {
585 wroteSomething = true;
594 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
595 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
600 catch(exception& e) {
601 m->errorOut(e, "GetLineageCommand", "readList");
605 //**********************************************************************************************************************
606 int GetLineageCommand::readConsList(){
610 if (m->control_pressed) { delete list; return 0;}
613 newList.setLabel(list->getLabel());
614 int selectedCount = 0;
615 bool wroteSomething = false;
616 string snumBins = toString(list->getNumBins());
618 for (int i = 0; i < list->getNumBins(); i++) {
620 if (m->control_pressed) { delete list; return 0;}
622 //create a label for this otu
623 string otuLabel = "Otu";
624 string sbinNumber = toString(i+1);
625 if (sbinNumber.length() < snumBins.length()) {
626 int diff = snumBins.length() - sbinNumber.length();
627 for (int h = 0; h < diff; h++) { otuLabel += "0"; }
629 otuLabel += sbinNumber;
631 if (names.count(otuLabel) != 0) {
633 newList.push_back(list->get(i));
637 string thisOutputDir = outputDir;
638 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
639 map<string, string> variables;
640 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
641 variables["[extension]"] = m->getExtension(listfile);
642 variables["[distance]"] = list->getLabel();
643 string outputFileName = getOutputFileName("list", variables);
645 m->openOutputFile(outputFileName, out);
648 //print new listvector
649 if (newList.getNumBins() != 0) {
650 wroteSomething = true;
655 if (wroteSomething == false) { m->mothurOut("Your file does not contain OTUs from " + taxons + "."); m->mothurOutEndLine(); }
656 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
658 m->mothurOut("Selected " + toString(selectedCount) + " OTUs from your list file."); m->mothurOutEndLine();
663 catch(exception& e) {
664 m->errorOut(e, "GetLineageCommand", "readConsList");
668 //**********************************************************************************************************************
669 int GetLineageCommand::getListVector(){
671 InputData input(listfile, "list");
672 list = input.getListVector();
673 string lastLabel = list->getLabel();
675 if (label == "") { label = lastLabel; return 0; }
677 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
678 set<string> labels; labels.insert(label);
679 set<string> processedLabels;
680 set<string> userLabels = labels;
682 //as long as you are not at the end of the file or done wih the lines you want
683 while((list != NULL) && (userLabels.size() != 0)) {
684 if (m->control_pressed) { return 0; }
686 if(labels.count(list->getLabel()) == 1){
687 processedLabels.insert(list->getLabel());
688 userLabels.erase(list->getLabel());
692 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
693 string saveLabel = list->getLabel();
696 list = input.getListVector(lastLabel);
698 processedLabels.insert(list->getLabel());
699 userLabels.erase(list->getLabel());
701 //restore real lastlabel to save below
702 list->setLabel(saveLabel);
706 lastLabel = list->getLabel();
708 //get next line to process
709 //prevent memory leak
711 list = input.getListVector();
715 if (m->control_pressed) { return 0; }
717 //output error messages about any remaining user labels
718 set<string>::iterator it;
719 bool needToRun = false;
720 for (it = userLabels.begin(); it != userLabels.end(); it++) {
721 m->mothurOut("Your file does not include the label " + *it);
722 if (processedLabels.count(lastLabel) != 1) {
723 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
726 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
730 //run last label if you need to
731 if (needToRun == true) {
733 list = input.getListVector(lastLabel);
738 catch(exception& e) {
739 m->errorOut(e, "GetLineageCommand", "getListVector");
744 //**********************************************************************************************************************
745 int GetLineageCommand::readShared(){
750 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
752 vector<string> newLabels;
754 //create new "filtered" lookup
755 vector<SharedRAbundVector*> newLookup;
756 for (int i = 0; i < lookup.size(); i++) {
757 SharedRAbundVector* temp = new SharedRAbundVector();
758 temp->setLabel(lookup[i]->getLabel());
759 temp->setGroup(lookup[i]->getGroup());
760 newLookup.push_back(temp);
763 bool wroteSomething = false;
765 for (int i = 0; i < lookup[0]->getNumBins(); i++) {
767 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
769 //is this otu on the list
770 if (names.count(m->currentBinLabels[i]) != 0) {
771 numSelected++; wroteSomething = true;
772 newLabels.push_back(m->currentBinLabels[i]);
773 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
774 newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
779 string thisOutputDir = outputDir;
780 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
781 map<string, string> variables;
782 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
783 variables["[extension]"] = m->getExtension(sharedfile);
784 variables["[distance]"] = lookup[0]->getLabel();
785 string outputFileName = getOutputFileName("shared", variables);
787 m->openOutputFile(outputFileName, out);
788 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
790 for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
792 m->currentBinLabels = newLabels;
794 newLookup[0]->printHeaders(out);
796 for (int i = 0; i < newLookup.size(); i++) {
797 out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
798 newLookup[i]->print(out);
802 for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
804 if (wroteSomething == false) { m->mothurOut("Your file does not contain OTUs from " + taxons + "."); m->mothurOutEndLine(); }
806 m->mothurOut("Selected " + toString(numSelected) + " OTUs from your shared file."); m->mothurOutEndLine();
810 catch(exception& e) {
811 m->errorOut(e, "GetLineageCommand", "readShared");
815 //**********************************************************************************************************************
816 int GetLineageCommand::getShared(){
818 InputData input(sharedfile, "sharedfile");
819 lookup = input.getSharedRAbundVectors();
820 string lastLabel = lookup[0]->getLabel();
822 if (label == "") { label = lastLabel; return 0; }
824 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
825 set<string> labels; labels.insert(label);
826 set<string> processedLabels;
827 set<string> userLabels = labels;
829 //as long as you are not at the end of the file or done wih the lines you want
830 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
831 if (m->control_pressed) { return 0; }
833 if(labels.count(lookup[0]->getLabel()) == 1){
834 processedLabels.insert(lookup[0]->getLabel());
835 userLabels.erase(lookup[0]->getLabel());
839 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
840 string saveLabel = lookup[0]->getLabel();
842 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
843 lookup = input.getSharedRAbundVectors(lastLabel);
845 processedLabels.insert(lookup[0]->getLabel());
846 userLabels.erase(lookup[0]->getLabel());
848 //restore real lastlabel to save below
849 lookup[0]->setLabel(saveLabel);
853 lastLabel = lookup[0]->getLabel();
855 //get next line to process
856 //prevent memory leak
857 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
858 lookup = input.getSharedRAbundVectors();
862 if (m->control_pressed) { return 0; }
864 //output error messages about any remaining user labels
865 set<string>::iterator it;
866 bool needToRun = false;
867 for (it = userLabels.begin(); it != userLabels.end(); it++) {
868 m->mothurOut("Your file does not include the label " + *it);
869 if (processedLabels.count(lastLabel) != 1) {
870 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
873 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
877 //run last label if you need to
878 if (needToRun == true) {
879 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
880 lookup = input.getSharedRAbundVectors(lastLabel);
885 catch(exception& e) {
886 m->errorOut(e, "GetLineageCommand", "getShared");
891 //**********************************************************************************************************************
892 int GetLineageCommand::readName(){
894 string thisOutputDir = outputDir;
895 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
896 map<string, string> variables;
897 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
898 variables["[extension]"] = m->getExtension(namefile);
899 string outputFileName = getOutputFileName("name", variables);
901 m->openOutputFile(outputFileName, out);
905 m->openInputFile(namefile, in);
906 string name, firstCol, secondCol;
908 bool wroteSomething = false;
913 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
919 if (dups) { hold = secondCol; }
921 vector<string> parsedNames;
922 m->splitAtComma(secondCol, parsedNames);
924 vector<string> validSecond;
925 for (int i = 0; i < parsedNames.size(); i++) {
926 if (names.count(parsedNames[i]) != 0) {
927 validSecond.push_back(parsedNames[i]);
931 if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
932 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
933 out << firstCol << '\t' << hold << endl;
934 wroteSomething = true;
936 //if the name in the first column is in the set then print it and any other names in second column also in set
937 if (names.count(firstCol) != 0) {
939 wroteSomething = true;
941 out << firstCol << '\t';
943 //you know you have at least one valid second since first column is valid
944 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
945 out << validSecond[validSecond.size()-1] << endl;
948 //make first name in set you come to first column and then add the remaining names to second column
950 //you want part of this row
951 if (validSecond.size() != 0) {
953 wroteSomething = true;
955 out << validSecond[0] << '\t';
957 //you know you have at least one valid second since first column is valid
958 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
959 out << validSecond[validSecond.size()-1] << endl;
968 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
969 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
974 catch(exception& e) {
975 m->errorOut(e, "GetLineageCommand", "readName");
980 //**********************************************************************************************************************
981 int GetLineageCommand::readGroup(){
983 string thisOutputDir = outputDir;
984 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
985 map<string, string> variables;
986 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
987 variables["[extension]"] = m->getExtension(groupfile);
988 string outputFileName = getOutputFileName("group", variables);
990 m->openOutputFile(outputFileName, out);
994 m->openInputFile(groupfile, in);
997 bool wroteSomething = false;
1001 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1004 in >> name; //read from first column
1005 in >> group; //read from second column
1007 //if this name is in the accnos file
1008 if (names.count(name) != 0) {
1009 wroteSomething = true;
1011 out << name << '\t' << group << endl;
1019 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
1020 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
1025 catch(exception& e) {
1026 m->errorOut(e, "GetLineageCommand", "readGroup");
1030 //**********************************************************************************************************************
1031 int GetLineageCommand::readTax(){
1033 string thisOutputDir = outputDir;
1034 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
1035 map<string, string> variables;
1036 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1037 variables["[extension]"] = m->getExtension(taxfile);
1038 string outputFileName = getOutputFileName("taxonomy", variables);
1040 m->openOutputFile(outputFileName, out);
1043 m->openInputFile(taxfile, in);
1046 //bool wroteSomething = false;
1047 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1048 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1049 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1051 for (int i = 0; i < listOfTaxons.size(); i++) {
1052 noConfidenceTaxons[i] = listOfTaxons[i];
1053 int hasConPos = listOfTaxons[i].find_first_of('(');
1054 if (hasConPos != string::npos) {
1055 taxonsHasConfidence[i] = true;
1056 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1057 noConfidenceTaxons[i] = listOfTaxons[i];
1058 m->removeConfidences(noConfidenceTaxons[i]);
1065 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1067 in >> name; //read from first column
1068 in >> tax; //read from second column
1070 string noQuotesTax = m->removeQuotes(tax);
1072 for (int j = 0; j < listOfTaxons.size(); j++) {
1074 string newtax = noQuotesTax;
1076 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1077 if (!taxonsHasConfidence[j]) {
1078 int hasConfidences = noQuotesTax.find_first_of('(');
1079 if (hasConfidences != string::npos) {
1080 newtax = noQuotesTax;
1081 m->removeConfidences(newtax);
1084 int pos = newtax.find(noConfidenceTaxons[j]);
1086 if (pos != string::npos) { //this sequence contains the taxon the user wants
1088 out << name << '\t' << tax << endl;
1089 //since you belong to at least one of the taxons we want you are included so no need to search for other
1092 }else{//if listOfTaxons[i] has them and you don't them remove taxons
1093 int hasConfidences = noQuotesTax.find_first_of('(');
1094 if (hasConfidences == string::npos) {
1096 int pos = newtax.find(noConfidenceTaxons[j]);
1098 if (pos != string::npos) { //this sequence contains the taxon the user wants
1100 out << name << '\t' << tax << endl;
1101 //since you belong to at least one of the taxons we want you are included so no need to search for other
1104 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1105 //first remove confidences from both and see if the taxonomy exists
1107 string noNewTax = noQuotesTax;
1108 int hasConfidences = noQuotesTax.find_first_of('(');
1109 if (hasConfidences != string::npos) {
1110 noNewTax = noQuotesTax;
1111 m->removeConfidences(noNewTax);
1114 int pos = noNewTax.find(noConfidenceTaxons[j]);
1116 if (pos != string::npos) { //if yes, then are the confidences okay
1119 vector< map<string, float> > usersTaxon = getTaxons(newtax);
1121 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1122 //we want to "line them up", so we will find the the index where the searchstring starts
1124 for (int i = 0; i < usersTaxon.size(); i++) {
1126 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1129 bool goodspot = true;
1130 //is this really the start, or are we dealing with a taxon of the same name?
1131 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1132 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1136 if (goodspot) { break; }
1140 for (int i = 0; i < searchTaxons[j].size(); i++) {
1142 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1143 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1153 //passed the test so add you
1156 out << name << '\t' << tax << endl;
1170 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
1171 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
1176 catch(exception& e) {
1177 m->errorOut(e, "GetLineageCommand", "readTax");
1181 //**********************************************************************************************************************
1182 int GetLineageCommand::readConsTax(){
1184 string thisOutputDir = outputDir;
1185 if (outputDir == "") { thisOutputDir += m->hasPath(constaxonomy); }
1186 map<string, string> variables;
1187 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomy));
1188 variables["[extension]"] = m->getExtension(constaxonomy);
1189 string outputFileName = getOutputFileName("constaxonomy", variables);
1191 m->openOutputFile(outputFileName, out);
1194 m->openInputFile(constaxonomy, in);
1195 string otuLabel, tax;
1199 string headers = m->getline(in);
1200 out << headers << endl;
1202 //bool wroteSomething = false;
1203 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1204 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1205 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1207 for (int i = 0; i < listOfTaxons.size(); i++) {
1208 noConfidenceTaxons[i] = listOfTaxons[i];
1209 int hasConPos = listOfTaxons[i].find_first_of('(');
1210 if (hasConPos != string::npos) {
1211 taxonsHasConfidence[i] = true;
1212 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1213 noConfidenceTaxons[i] = listOfTaxons[i];
1214 m->removeConfidences(noConfidenceTaxons[i]);
1221 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1223 in >> otuLabel; m->gobble(in);
1224 in >> numReps; m->gobble(in);
1225 in >> tax; m->gobble(in);
1227 string noQuotesTax = m->removeQuotes(tax);
1229 for (int j = 0; j < listOfTaxons.size(); j++) {
1231 string newtax = noQuotesTax;
1233 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1234 if (!taxonsHasConfidence[j]) {
1235 int hasConfidences = noQuotesTax.find_first_of('(');
1236 if (hasConfidences != string::npos) {
1237 newtax = noQuotesTax;
1238 m->removeConfidences(newtax);
1241 int pos = newtax.find(noConfidenceTaxons[j]);
1243 if (pos != string::npos) { //this sequence contains the taxon the user wants
1244 names.insert(otuLabel);
1245 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1246 //since you belong to at least one of the taxons we want you are included so no need to search for other
1249 }else{//if listOfTaxons[i] has them and you don't them remove taxons
1250 int hasConfidences = noQuotesTax.find_first_of('(');
1251 if (hasConfidences == string::npos) {
1253 int pos = newtax.find(noConfidenceTaxons[j]);
1255 if (pos != string::npos) { //this sequence contains the taxon the user wants
1256 names.insert(otuLabel);
1257 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1258 //since you belong to at least one of the taxons we want you are included so no need to search for other
1261 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1262 //first remove confidences from both and see if the taxonomy exists
1264 string noNewTax = noQuotesTax;
1265 int hasConfidences = noQuotesTax.find_first_of('(');
1266 if (hasConfidences != string::npos) {
1267 noNewTax = noQuotesTax;
1268 m->removeConfidences(noNewTax);
1271 int pos = noNewTax.find(noConfidenceTaxons[j]);
1273 if (pos != string::npos) { //if yes, then are the confidences okay
1276 vector< map<string, float> > usersTaxon = getTaxons(newtax);
1278 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1279 //we want to "line them up", so we will find the the index where the searchstring starts
1281 for (int i = 0; i < usersTaxon.size(); i++) {
1283 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1286 bool goodspot = true;
1287 //is this really the start, or are we dealing with a taxon of the same name?
1288 while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1289 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1293 if (goodspot) { break; }
1297 for (int i = 0; i < searchTaxons[j].size(); i++) {
1299 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1300 if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1310 //passed the test so add you
1312 names.insert(otuLabel);
1313 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1325 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any OTUs from " + taxons + "."); m->mothurOutEndLine(); }
1326 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
1331 catch(exception& e) {
1332 m->errorOut(e, "GetLineageCommand", "readConsTax");
1336 /**************************************************************************************************/
1337 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
1340 vector< map<string, float> > t;
1342 int taxLength = tax.length();
1344 for(int i=0;i<taxLength;i++){
1347 int openParen = taxon.find_last_of('(');
1348 int closeParen = taxon.find_last_of(')');
1350 string newtaxon, confidence;
1351 if ((openParen != string::npos) && (closeParen != string::npos)) {
1352 string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
1353 if (m->isNumeric1(confidenceScore)) { //its a confidence
1354 newtaxon = taxon.substr(0, openParen); //rip off confidence
1355 confidence = taxon.substr((openParen+1), (closeParen-openParen-1));
1356 }else { //its part of the taxon
1365 convert(confidence, con);
1367 map<string, float> temp;
1368 temp[newtaxon] = con;
1381 catch(exception& e) {
1382 m->errorOut(e, "GetLineageCommand", "getTaxons");
1386 //**********************************************************************************************************************
1387 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
1388 int GetLineageCommand::readAlign(){
1390 string thisOutputDir = outputDir;
1391 if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
1392 map<string, string> variables;
1393 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1394 variables["[extension]"] = m->getExtension(alignfile);
1395 string outputFileName = getOutputFileName("alignreport", variables);
1398 m->openOutputFile(outputFileName, out);
1402 m->openInputFile(alignfile, in);
1405 bool wroteSomething = false;
1407 //read column headers
1408 for (int i = 0; i < 16; i++) {
1409 if (!in.eof()) { in >> junk; out << junk << '\t'; }
1416 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
1419 in >> name; //read from first column
1421 //if this name is in the accnos file
1422 if (names.count(name) != 0) {
1423 wroteSomething = true;
1425 out << name << '\t';
1428 for (int i = 0; i < 15; i++) {
1429 if (!in.eof()) { in >> junk; out << junk << '\t'; }
1434 }else {//still read just don't do anything with it
1436 for (int i = 0; i < 15; i++) {
1437 if (!in.eof()) { in >> junk; }
1447 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine(); }
1448 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
1453 catch(exception& e) {
1454 m->errorOut(e, "GetLineageCommand", "readAlign");
1458 //**********************************************************************************************************************