X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=removelineagecommand.cpp;h=5b6fc252c4ae8c78619d0f3ad76afbb9792cbddd;hp=b72f4b669d43b4b44fa1c20df4812155aaff82fc;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=19fcbbdba99658f5eca244803280f9ee7f9f6607 diff --git a/removelineagecommand.cpp b/removelineagecommand.cpp index b72f4b6..5b6fc25 100644 --- a/removelineagecommand.cpp +++ b/removelineagecommand.cpp @@ -10,20 +10,22 @@ #include "removelineagecommand.h" #include "sequence.hpp" #include "listvector.hpp" +#include "counttable.h" //********************************************************************************************************************** vector RemoveLineageCommand::setParameters(){ try { - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup); - CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist); - CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy); - CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport); - CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon); - CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup); + CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist); + CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,true,true); parameters.push_back(ptaxonomy); + CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport); + CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon); + CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -38,9 +40,9 @@ vector RemoveLineageCommand::setParameters(){ string RemoveLineageCommand::getHelpString(){ try { string helpString = ""; - helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n"; + helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n"; helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n"; - helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n"; + helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups. You must provide taxonomy unless you have a valid current taxonomy file.\n"; helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n"; helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n"; helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n"; @@ -57,7 +59,27 @@ string RemoveLineageCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string RemoveLineageCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "fasta") { pattern = "[filename],pick,[extension]"; } + else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; } + else if (type == "name") { pattern = "[filename],pick,[extension]"; } + else if (type == "group") { pattern = "[filename],pick,[extension]"; } + else if (type == "count") { pattern = "[filename],pick,[extension]"; } + else if (type == "list") { pattern = "[filename],pick,[extension]"; } + else if (type == "alignreport") { pattern = "[filename],pick.align.report"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "RemoveLineageCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** RemoveLineageCommand::RemoveLineageCommand(){ @@ -71,6 +93,7 @@ RemoveLineageCommand::RemoveLineageCommand(){ outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["list"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand"); @@ -108,6 +131,7 @@ RemoveLineageCommand::RemoveLineageCommand(string option) { outputTypes["group"] = tempOutNames; outputTypes["alignreport"] = tempOutNames; outputTypes["list"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -164,17 +188,25 @@ RemoveLineageCommand::RemoveLineageCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["taxonomy"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } //check for required parameters fastafile = validParameter.validFile(parameters, "fasta", true); - if (fastafile == "not open") { abort = true; } + if (fastafile == "not open") { fastafile = ""; abort = true; } else if (fastafile == "not found") { fastafile = ""; } else { m->setFastaFile(fastafile); } namefile = validParameter.validFile(parameters, "name", true); - if (namefile == "not open") { abort = true; } + if (namefile == "not open") { namefile = ""; abort = true; } else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } @@ -193,13 +225,26 @@ RemoveLineageCommand::RemoveLineageCommand(string option) { else { m->setListFile(listfile); } taxfile = validParameter.validFile(parameters, "taxonomy", true); - if (taxfile == "not open") { abort = true; } + if (taxfile == "not open") { taxfile = ""; abort = true; } else if (taxfile == "not found") { taxfile = m->getTaxonomyFile(); if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); } else { m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; } }else { m->setTaxonomyFile(taxfile); } + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + string usedDups = "true"; string temp = validParameter.validFile(parameters, "dups", false); if (temp == "not found") { @@ -217,10 +262,17 @@ RemoveLineageCommand::RemoveLineageCommand(string option) { } m->splitAtChar(taxons, listOfTaxons, '-'); - if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; } + if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; } if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; } - + + if (countfile == "") { + if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){ + vector files; files.push_back(fastafile); files.push_back(taxfile); + parser.getNameFile(files); + } + } + } } @@ -237,6 +289,12 @@ int RemoveLineageCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } if (m->control_pressed) { return 0; } + + if (countfile != "") { + if ((fastafile != "") || (listfile != "") || (taxfile != "")) { + m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); + } + } //read through the correct file and output lines you want to keep if (taxfile != "") { readTax(); } //fills the set of names to remove @@ -245,6 +303,7 @@ int RemoveLineageCommand::execute(){ if (groupfile != "") { readGroup(); } if (alignfile != "") { readAlign(); } if (listfile != "") { readList(); } + if (countfile != "") { readCount(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -281,6 +340,11 @@ int RemoveLineageCommand::execute(){ if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } + + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } } return 0; @@ -297,8 +361,10 @@ int RemoveLineageCommand::readFasta(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile); - + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)); + variables["[extension]"] = m->getExtension(fastafile); + string outputFileName = getOutputFileName("fasta", variables); ofstream out; m->openOutputFile(outputFileName, out); @@ -343,8 +409,10 @@ int RemoveLineageCommand::readList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile); - + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); + variables["[extension]"] = m->getExtension(listfile); + string outputFileName = getOutputFileName("list", variables); ofstream out; m->openOutputFile(outputFileName, out); @@ -414,8 +482,10 @@ int RemoveLineageCommand::readName(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(namefile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile); - + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile)); + variables["[extension]"] = m->getExtension(namefile); + string outputFileName = getOutputFileName("name", variables); ofstream out; m->openOutputFile(outputFileName, out); @@ -486,14 +556,73 @@ int RemoveLineageCommand::readName(){ exit(1); } } +//********************************************************************************************************************** +int RemoveLineageCommand::readCount(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); + variables["[extension]"] = m->getExtension(countfile); + string outputFileName = getOutputFileName("count", variables); + + ofstream out; + m->openOutputFile(outputFileName, out); + + ifstream in; + m->openInputFile(countfile, in); + + bool wroteSomething = false; + + string headers = m->getline(in); m->gobble(in); + out << headers << endl; + + string name, rest; int thisTotal; + while (!in.eof()) { + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } + + in >> name; m->gobble(in); + in >> thisTotal; m->gobble(in); + rest = m->getline(in); m->gobble(in); + if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); } + + if (names.count(name) == 0) { + out << name << '\t' << thisTotal << '\t' << rest << endl; + wroteSomething = true; + } + } + in.close(); + out.close(); + + //check for groups that have been eliminated + CountTable ct; + if (ct.testGroups(outputFileName)) { + ct.readTable(outputFileName); + ct.printTable(outputFileName); + } + + if (wroteSomething == false) { m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine(); } + outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "RemoveLineageCommand", "readCount"); + exit(1); + } +} //********************************************************************************************************************** int RemoveLineageCommand::readGroup(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile); - + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); + variables["[extension]"] = m->getExtension(groupfile); + string outputFileName = getOutputFileName("group", variables); + ofstream out; m->openOutputFile(outputFileName, out); @@ -535,7 +664,10 @@ int RemoveLineageCommand::readTax(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile); + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)); + variables["[extension]"] = m->getExtension(taxfile); + string outputFileName = getOutputFileName("taxonomy", variables); ofstream out; m->openOutputFile(outputFileName, out); @@ -555,7 +687,8 @@ int RemoveLineageCommand::readTax(){ if (hasConPos != string::npos) { taxonsHasConfidence[i] = true; searchTaxons[i] = getTaxons(listOfTaxons[i]); - noConfidenceTaxons[i] = removeConfidences(listOfTaxons[i]); + noConfidenceTaxons[i] = listOfTaxons[i]; + m->removeConfidences(noConfidenceTaxons[i]); } } @@ -569,15 +702,18 @@ int RemoveLineageCommand::readTax(){ bool remove = false; + string noQuotesTax = m->removeQuotes(tax); + for (int j = 0; j < listOfTaxons.size(); j++) { - string newtax = tax; + string newtax = noQuotesTax; //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them if (!taxonsHasConfidence[j]) { - int hasConfidences = tax.find_first_of('('); + int hasConfidences = noQuotesTax.find_first_of('('); if (hasConfidences != string::npos) { - newtax = removeConfidences(tax); + newtax = noQuotesTax; + m->removeConfidences(newtax); } int pos = newtax.find(noConfidenceTaxons[j]); @@ -591,7 +727,7 @@ int RemoveLineageCommand::readTax(){ } }else{//if taxons has them and you don't them remove taxons - int hasConfidences = tax.find_first_of('('); + int hasConfidences = noQuotesTax.find_first_of('('); if (hasConfidences == string::npos) { int pos = newtax.find(noConfidenceTaxons[j]); @@ -606,10 +742,11 @@ int RemoveLineageCommand::readTax(){ }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons //first remove confidences from both and see if the taxonomy exists - string noNewTax = tax; - int hasConfidences = tax.find_first_of('('); + string noNewTax = noQuotesTax; + int hasConfidences = noQuotesTax.find_first_of('('); if (hasConfidences != string::npos) { - noNewTax = removeConfidences(tax); + noNewTax = noQuotesTax; + m->removeConfidences(noNewTax); } int pos = noNewTax.find(noConfidenceTaxons[j]); @@ -693,25 +830,30 @@ vector< map > RemoveLineageCommand::getTaxons(string tax) { int taxLength = tax.length(); for(int i=0;iisNumeric1(confidenceScore)) { //its a confidence + newtaxon = taxon.substr(0, openParen); //rip off confidence + confidence = taxon.substr((openParen+1), (closeParen-openParen-1)); + }else { //its part of the taxon + newtaxon = taxon; + confidence = "0"; + } }else{ newtaxon = taxon; confidence = "0"; - } + } float con = 0; convert(confidence, con); map temp; temp[newtaxon] = con; t.push_back(temp); - taxon = ""; } else{ @@ -726,36 +868,16 @@ vector< map > RemoveLineageCommand::getTaxons(string tax) { exit(1); } } -/**************************************************************************************************/ -string RemoveLineageCommand::removeConfidences(string tax) { - try { - - string taxon = ""; - int taxLength = tax.length(); - for(int i=0;ierrorOut(e, "RemoveLineageCommand", "removeConfidences"); - exit(1); - } -} //********************************************************************************************************************** //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name int RemoveLineageCommand::readAlign(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report"; + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)); + variables["[extension]"] = m->getExtension(alignfile); + string outputFileName = getOutputFileName("alignreport", variables); ofstream out; m->openOutputFile(outputFileName, out);