X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=screenseqscommand.cpp;h=2b5ebc1a844420e699b04df0dcd6e144befbf1fe;hb=79a7d3273749b08d4f9f8dfe350c964ff0c4351e;hp=7ac910d4a74b9fc1bffeacb32f5893941f3eb60f;hpb=91a27e0483827c06c21c4fe89558923bbfe86573;p=mothur.git diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 7ac910d..2b5ebc1 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -8,14 +8,15 @@ */ #include "screenseqscommand.h" - +#include "counttable.h" //********************************************************************************************************************** vector ScreenSeqsCommand::setParameters(){ try { CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile); CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport); CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax); @@ -44,12 +45,12 @@ vector ScreenSeqsCommand::setParameters(){ string ScreenSeqsCommand::getHelpString(){ try { string helpString = ""; - helpString += "The screen.seqs command reads a fastafile and creates .....\n"; - helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n"; + helpString += "The screen.seqs command reads a fastafile and screens sequences.\n"; + helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n"; - helpString += "The start parameter .... The default is -1.\n"; - helpString += "The end parameter .... The default is -1.\n"; + helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n"; + helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n"; helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; helpString += "The minlength parameter allows you to set and minimum sequence length. \n"; @@ -70,6 +71,34 @@ string ScreenSeqsCommand::getHelpString(){ exit(1); } } +//********************************************************************************************************************** +string ScreenSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "fasta") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "taxonomy") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "name") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "count") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "group") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "accnos") { outputFileName = "bad.accnos"; } + else if (type == "qfile") { outputFileName = "good" + m->getExtension(inputName); } + else if (type == "alignreport") { outputFileName = "good.align.report"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ScreenSeqsCommand", "getOutputFileNameTag"); + exit(1); + } +} + //********************************************************************************************************************** ScreenSeqsCommand::ScreenSeqsCommand(){ try { @@ -83,6 +112,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(){ outputTypes["accnos"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; + outputTypes["count"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand"); @@ -122,6 +152,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { outputTypes["accnos"] = tempOutNames; outputTypes["qfile"] = tempOutNames; outputTypes["taxonomy"] = tempOutNames; + outputTypes["count"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -175,6 +206,14 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["taxonomy"] = inputDir + it->second; } } + + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } } //check for required parameters @@ -202,6 +241,19 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { countfile = ""; abort = true; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } + + if ((namefile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true; + } + + if ((groupfile != "") && (countfile != "")) { + m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true; + } + alignreport = validParameter.validFile(parameters, "alignreport", true); if (alignreport == "not open") { abort = true; } else if (alignreport == "not found") { alignreport = ""; } @@ -261,10 +313,12 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option) { temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){ temp = "90"; } m->mothurConvert(temp, criteria); - if (namefile == "") { - vector files; files.push_back(fastafile); - parser.getNameFile(files); - } + if (countfile == "") { + if (namefile == "") { + vector files; files.push_back(fastafile); + parser.getNameFile(files); + } + } } } @@ -285,6 +339,11 @@ int ScreenSeqsCommand::execute(){ if (optimize.size() != 0) { //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here //use the namefile to optimize correctly if (namefile != "") { nameMap = m->readNames(namefile); } + else if (countfile != "") { + CountTable ct; + ct.readTable(countfile); + nameMap = ct.getNameMap(); + } getSummary(positions); } else { @@ -309,8 +368,8 @@ int ScreenSeqsCommand::execute(){ #endif } - string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "good" + m->getExtension(fastafile); - string badAccnosFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos"; + string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile); + string badAccnosFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos"); int numFastaSeqs = 0; set badSeqNames; @@ -445,7 +504,9 @@ int ScreenSeqsCommand::execute(){ screenNameGroupFile(badSeqNames); if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } }else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the group - + else if (countfile != "") { screenCountFile(badSeqNames); } + + if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; } if(alignreport != "") { screenAlignReport(badSeqNames); } @@ -492,6 +553,11 @@ int ScreenSeqsCommand::execute(){ if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } + + itTypes = outputTypes.find("count"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } + } m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); @@ -514,7 +580,7 @@ int ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ string seqName, seqList, group; set::iterator it; - string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + "good" + m->getExtension(namefile); + string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile); outputNames.push_back(goodNameFile); outputTypes["name"].push_back(goodNameFile); ofstream goodNameOut; m->openOutputFile(goodNameFile, goodNameOut); @@ -560,7 +626,7 @@ int ScreenSeqsCommand::screenNameGroupFile(set badSeqNames){ ifstream inputGroups; m->openInputFile(groupfile, inputGroups); - string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile); + string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile); outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile); ofstream goodGroupOut; m->openOutputFile(goodGroupFile, goodGroupOut); @@ -893,8 +959,8 @@ int ScreenSeqsCommand::screenGroupFile(set badSeqNames){ string seqName, group; set::iterator it; - string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile); - outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile); + string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile); + outputNames.push_back(goodGroupFile); outputTypes["group"].push_back(goodGroupFile); ofstream goodGroupOut; m->openOutputFile(goodGroupFile, goodGroupOut); while(!inputGroups.eof()){ @@ -935,7 +1001,69 @@ int ScreenSeqsCommand::screenGroupFile(set badSeqNames){ exit(1); } } +//*************************************************************************************************************** +int ScreenSeqsCommand::screenCountFile(set badSeqNames){ + try { + ifstream in; + m->openInputFile(countfile, in); + set::iterator it; + + string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile); + outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile); + ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut); + + string headers = m->getline(in); m->gobble(in); + goodCountOut << headers << endl; + + string name, rest; int thisTotal; + while (!in.eof()) { + if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; } + + in >> name; m->gobble(in); + in >> thisTotal; m->gobble(in); + rest = m->getline(in); m->gobble(in); + + it = badSeqNames.find(name); + + if(it != badSeqNames.end()){ + badSeqNames.erase(it); + } + else{ + goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl; + } + } + + if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; } + + //we were unable to remove some of the bad sequences + if (badSeqNames.size() != 0) { + for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) { + m->mothurOut("Your count file does not include the sequence " + *it + " please correct."); + m->mothurOutEndLine(); + } + } + + in.close(); + goodCountOut.close(); + + //check for groups that have been eliminated + CountTable ct; + if (ct.testGroups(goodCountFile)) { + ct.readTable(goodCountFile); + ct.printTable(goodCountFile); + } + + if (m->control_pressed) { m->mothurRemove(goodCountFile); } + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ScreenSeqsCommand", "screenCountFile"); + exit(1); + } +} //*************************************************************************************************************** int ScreenSeqsCommand::screenAlignReport(set badSeqNames){ @@ -945,7 +1073,7 @@ int ScreenSeqsCommand::screenAlignReport(set badSeqNames){ string seqName, group; set::iterator it; - string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + "good" + m->getExtension(alignreport); + string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + getOutputFileNameTag("alignreport"); outputNames.push_back(goodAlignReportFile); outputTypes["alignreport"].push_back(goodAlignReportFile); ofstream goodAlignReportOut; m->openOutputFile(goodAlignReportFile, goodAlignReportOut); @@ -1009,7 +1137,7 @@ int ScreenSeqsCommand::screenTaxonomy(set badSeqNames){ string seqName, tax; set::iterator it; - string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + "good" + m->getExtension(taxonomy); + string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + getOutputFileNameTag("taxonomy", taxonomy); outputNames.push_back(goodTaxFile); outputTypes["taxonomy"].push_back(goodTaxFile); ofstream goodTaxOut; m->openOutputFile(goodTaxFile, goodTaxOut); @@ -1058,7 +1186,7 @@ int ScreenSeqsCommand::screenQual(set badSeqNames){ m->openInputFile(qualfile, in); set::iterator it; - string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + "good" + m->getExtension(qualfile); + string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile); outputNames.push_back(goodQualFile); outputTypes["qfile"].push_back(goodQualFile); ofstream goodQual; m->openOutputFile(goodQualFile, goodQual); @@ -1209,7 +1337,6 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& int length = MPIPos[start+i+1] - MPIPos[start+i]; char* buf4 = new char[length]; - memcpy(buf4, outputString.c_str(), length); MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);