X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraslayercommand.cpp;h=a1be2359c1516aa26395a2ba00c6101fb3a996f3;hb=7d9e2bc58855132983da7ac9342880bc95055f77;hp=bd9bdbf2f3dcb4d5ed3ce3a7ced35dfe7a4a1ba5;hpb=6ede3bf5c0a9eedb23f24577a97da81ab3e1f7df;p=mothur.git diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index bd9bdbf..a1be235 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -16,33 +16,35 @@ //********************************************************************************************************************** vector ChimeraSlayerCommand::setParameters(){ try { - CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate); - CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); - CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); - CommandParameter pwindow("window", "Number", "", "50", "", "", "",false,false); parameters.push_back(pwindow); - CommandParameter pksize("ksize", "Number", "", "7", "", "", "",false,false); parameters.push_back(pksize); - CommandParameter pmatch("match", "Number", "", "5.0", "", "", "",false,false); parameters.push_back(pmatch); - CommandParameter pmismatch("mismatch", "Number", "", "-4.0", "", "", "",false,false); parameters.push_back(pmismatch); - CommandParameter pminsim("minsim", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminsim); - CommandParameter pmincov("mincov", "Number", "", "70", "", "", "",false,false); parameters.push_back(pmincov); - CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminsnp); - CommandParameter pminbs("minbs", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminbs); - CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "",false,false); parameters.push_back(psearch); - CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter prealign("realign", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(prealign); - CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptrim); - CommandParameter psplit("split", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psplit); - CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "",false,false); parameters.push_back(pnumwanted); - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "",false,false); parameters.push_back(pdivergence); - CommandParameter pparents("parents", "Number", "", "3", "", "", "",false,false); parameters.push_back(pparents); - CommandParameter pincrement("increment", "Number", "", "5", "", "", "",false,false); parameters.push_back(pincrement); - CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "",false,false); parameters.push_back(pblastlocation); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); - CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave); + CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate); + CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); + CommandParameter pwindow("window", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pwindow); + CommandParameter pksize("ksize", "Number", "", "7", "", "", "","",false,false); parameters.push_back(pksize); + CommandParameter pmatch("match", "Number", "", "5.0", "", "", "","",false,false); parameters.push_back(pmatch); + CommandParameter pmismatch("mismatch", "Number", "", "-4.0", "", "", "","",false,false); parameters.push_back(pmismatch); + CommandParameter pminsim("minsim", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminsim); + CommandParameter pmincov("mincov", "Number", "", "70", "", "", "","",false,false); parameters.push_back(pmincov); + CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminsnp); + CommandParameter pminbs("minbs", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pminbs); + CommandParameter psearch("search", "Multiple", "kmer-blast", "blast", "", "", "","",false,false); parameters.push_back(psearch); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); + + CommandParameter prealign("realign", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(prealign); + CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "","fasta",false,false); parameters.push_back(ptrim); + CommandParameter psplit("split", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psplit); + CommandParameter pnumwanted("numwanted", "Number", "", "15", "", "", "","",false,false); parameters.push_back(pnumwanted); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "","",false,false); parameters.push_back(pdivergence); + CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups); + CommandParameter pparents("parents", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pparents); + CommandParameter pincrement("increment", "Number", "", "5", "", "", "","",false,false); parameters.push_back(pincrement); + CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "","",false,false); parameters.push_back(pblastlocation); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -59,7 +61,7 @@ string ChimeraSlayerCommand::getHelpString(){ string helpString = ""; helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n"; - helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n"; + helpString += "The chimera.slayer command parameters are fasta, name, group, template, processors, dereplicate, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; @@ -70,6 +72,7 @@ string ChimeraSlayerCommand::getHelpString(){ #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; #endif + helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The trim parameter allows you to output a new fasta file containing your sequences with the chimeric ones trimmed to include only their longest piece, default=F. \n"; helpString += "The split parameter allows you to check both pieces of non-chimeric sequence for chimeras, thus looking for trimeras and quadmeras. default=F. \n"; helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=50. \n"; @@ -101,26 +104,21 @@ string ChimeraSlayerCommand::getHelpString(){ } } //********************************************************************************************************************** -string ChimeraSlayerCommand::getOutputFileNameTag(string type, string inputName=""){ - try { - string outputFileName = ""; - map >::iterator it; +string ChimeraSlayerCommand::getOutputPattern(string type) { + try { + string pattern = ""; - //is this a type this command creates - it = outputTypes.find(type); - if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } - else { - if (type == "chimera") { outputFileName = "slayer.chimeras"; } - else if (type == "accnos") { outputFileName = "slayer.accnos"; } - else if (type == "fasta") { outputFileName = "slayer.fasta"; } - else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } - } - return outputFileName; - } - catch(exception& e) { - m->errorOut(e, "ChimeraSlayerCommand", "getOutputFileNameTag"); - exit(1); - } + if (type == "chimera") { pattern = "[filename],slayer.chimeras"; } + else if (type == "accnos") { pattern = "[filename],slayer.accnos"; } + else if (type == "fasta") { pattern = "[filename],slayer.fasta"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "ChimeraSlayerCommand", "getOutputPattern"); + exit(1); + } } //********************************************************************************************************************** ChimeraSlayerCommand::ChimeraSlayerCommand(){ @@ -600,6 +598,13 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) { temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "15"; } m->mothurConvert(temp, numwanted); + + temp = validParameter.validFile(parameters, "dereplicate", false); + if (temp == "not found") { + if (groupfile != "") { temp = "false"; } + else { temp = "true"; } + } + dups = m->isTrue(temp); blastlocation = validParameter.validFile(parameters, "blastlocation", false); if (blastlocation == "not found") { blastlocation = ""; } @@ -666,10 +671,12 @@ int ChimeraSlayerCommand::execute(){ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); - if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera"); - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos"); - string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("fasta"); + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it + map variables; + variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); + string outputFileName = getOutputFileName("chimera", variables); + string accnosFileName = getOutputFileName("accnos", variables); + string trimFastaFileName = getOutputFileName("fasta", variables); //clears files ofstream out, out1, out2; @@ -752,8 +759,10 @@ int ChimeraSlayerCommand::execute(){ if (pid == 0) { #endif - totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName); - m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); + if (!dups) { + totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, trimFastaFileName); + m->mothurOutEndLine(); m->mothurOut(toString(totalChimeras) + " chimera found."); m->mothurOutEndLine(); + } #ifdef USE_MPI } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait @@ -1488,6 +1497,9 @@ int ChimeraSlayerCommand::createProcessesGroups(string outputFName, string accno //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ + if (pDataArray[i]->count != pDataArray[i]->end) { + m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; + } num += pDataArray[i]->count; CloseHandle(hThreadArray[i]); delete pDataArray[i];