X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimerauchimecommand.cpp;h=6f7ba106ead04e9ccd3b7b290dce4c3a55dabd20;hb=a9dbc22713bfc056a797361dd757b1a5c98e1c01;hp=54b1d9b4caace40335934db39c6306687ddafdf5;hpb=e0dc0bcef2a0f7e1f63abb531dbb1ad533da98ca;p=mothur.git diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 54b1d9b..6f7ba10 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -12,15 +12,16 @@ //#include "uc.h" #include "sequence.hpp" #include "referencedb.h" - +#include "systemcommand.h" //********************************************************************************************************************** vector ChimeraUchimeCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); - CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -34,6 +35,8 @@ vector ChimeraUchimeCommand::setParameters(){ CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks); CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk); CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow); + CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdups); + //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp); CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps); @@ -58,11 +61,13 @@ string ChimeraUchimeCommand::getHelpString(){ string helpString = ""; helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n"; - helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; + helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; + helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; + helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n"; @@ -98,6 +103,28 @@ string ChimeraUchimeCommand::getHelpString(){ } } //********************************************************************************************************************** +string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "chimera") { outputFileName = "uchime.chimeras"; } + else if (type == "accnos") { outputFileName = "uchime.accnos"; } + else if (type == "alns") { outputFileName = "uchime.alns"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** ChimeraUchimeCommand::ChimeraUchimeCommand(){ try { abort = true; calledHelp = true; @@ -115,7 +142,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){ //*************************************************************************************************************** ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; hasName=false; hasCount=false; ReferenceDB* rdb = ReferenceDB::getInstance(); //allow user to run help @@ -225,9 +252,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { //check for required parameters - bool hasName = true; namefile = validParameter.validFile(parameters, "name", false); - if (namefile == "not found") { namefile = ""; hasName = false; } + if (namefile == "not found") { namefile = ""; } else { m->splitAtDash(namefile, nameFileNames); @@ -294,12 +320,91 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { } } } + } + + if (nameFileNames.size() != 0) { hasName = true; } + + //check for required parameters + vector countfileNames; + countfile = validParameter.validFile(parameters, "count", false); + if (countfile == "not found") { + countfile = ""; + }else { + m->splitAtDash(countfile, countfileNames); - //make sure there is at least one valid file left - if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; } + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < countfileNames.size(); i++) { + + bool ignore = false; + if (countfileNames[i] == "current") { + countfileNames[i] = m->getCountTableFile(); + if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + countfileNames.erase(countfileNames.begin()+i); + i--; + } + } + + if (!ignore) { + + if (inputDir != "") { + string path = m->hasPath(countfileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; } + } + + int ableToOpen; + ifstream in; + + ableToOpen = m->openInputFile(countfileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]); + m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + countfileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]); + m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + countfileNames[i] = tryPath; + } + } + + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + countfileNames.erase(countfileNames.begin()+i); + i--; + }else { + m->setCountTableFile(countfileNames[i]); + } + } + } } - - if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + + if (countfileNames.size() != 0) { hasCount = true; } + + //make sure there is at least one valid file left + if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + + if (!hasName && hasCount) { nameFileNames = countfileNames; } + + if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } bool hasGroup = true; groupfile = validParameter.validFile(parameters, "group", false); @@ -377,6 +482,10 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; } + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -405,6 +514,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { } } }else if (hasName) { templatefile = "self"; } + else if (hasCount) { templatefile = "self"; } else { if (rdb->getSavedReference() != "") { templatefile = rdb->getSavedReference(); @@ -418,7 +528,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; } @@ -450,6 +560,15 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; } skipgaps2 = m->isTrue(temp); + + string usedDups = "false"; + temp = validParameter.validFile(parameters, "dereplicate", false); + if (temp == "not found") { + if (groupfile != "") { temp = "false"; } + else { temp = "true"; usedDups = ""; } + } + dups = m->isTrue(temp); + if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } @@ -461,18 +580,46 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { path = path.substr(0, (tempPath.find_last_of('m'))); string uchimeCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability + if (m->debug) { + m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); + Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); + newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + } #else uchimeCommand = path + "uchime.exe"; #endif - + //test to make sure uchime exists ifstream in; uchimeCommand = m->getFullPathName(uchimeCommand); int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } - } + if(ableToOpen == 1) { + m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n"); + //check to see if uchime is in the path?? + + string uLocation = m->findProgramPath("uchime"); + + + ifstream in2; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close(); +#else + ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close(); +#endif + + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } + else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; } + }else { uchimeLocation = uchimeCommand; } + + uchimeLocation = m->getFullPathName(uchimeLocation); + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand"); @@ -483,7 +630,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { int ChimeraUchimeCommand::execute(){ try{ - if (abort == true) { if (calledHelp) { return 0; } return 2; } + + if (abort == true) { if (calledHelp) { return 0; } return 2; } m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n"); @@ -494,28 +642,47 @@ int ChimeraUchimeCommand::execute(){ int start = time(NULL); string nameFile = ""; if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera"; - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos"; - string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns"; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera"); + string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos"); + string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns"); string newFasta = m->getRootName(fastaFileNames[s]) + "temp"; //you provided a groupfile string groupFile = ""; - if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; } - - if ((templatefile == "self") && (groupFile == "")) { //you want to run uchime with a reference template + bool hasGroup = false; + if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; } + else if (hasCount) { + CountTable ct; + if (ct.testGroups(nameFileNames[s])) { hasGroup = true; } + } + + if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; }else { nameFile = getNamesFile(fastaFileNames[s]); } - + map seqs; readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } //read namefile vector nameMapCount; - int error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + int error; + if (hasCount) { + CountTable ct; + ct.readTable(nameFile); + for(map::iterator it = seqs.begin(); it != seqs.end(); it++) { + int num = ct.getNumSeqs(it->first); + if (num == 0) { error = 1; } + else { + seqPriorityNode temp(num, it->second, it->first); + nameMapCount.push_back(temp); + } + } + }else { + error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + } if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } @@ -525,14 +692,23 @@ int ChimeraUchimeCommand::execute(){ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - if (groupFile != "") { + if (hasGroup) { if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; }else { nameFile = getNamesFile(fastaFileNames[s]); } //Parse sequences by group - SequenceParser parser(groupFile, fastaFileNames[s], nameFile); - vector groups = parser.getNamesOfGroups(); + vector groups; + map uniqueNames; + if (hasCount) { + cparser = new SequenceCountParser(nameFile, fastaFileNames[s]); + groups = cparser->getNamesOfGroups(); + uniqueNames = cparser->getAllSeqsMap(); + }else{ + sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile); + groups = sparser->getNamesOfGroups(); + uniqueNames = sparser->getAllSeqsMap(); + } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } @@ -543,16 +719,20 @@ int ChimeraUchimeCommand::execute(){ if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); } int totalSeqs = 0; - if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); } - else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups); } + if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); } + else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName); - - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine(); - m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); + if (hasCount) { delete cparser; } + else { delete sparser; } + + if (!dups) { + int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName); + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine(); + m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); + } + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } }else{ @@ -607,9 +787,8 @@ int ChimeraUchimeCommand::execute(){ } } //********************************************************************************************************************** -int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outputFileName, string accnosFileName, string alnsFileName){ +int ChimeraUchimeCommand::deconvoluteResults(map& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){ try { - map uniqueNames = parser.getAllSeqsMap(); map::iterator itUnique; int total = 0; @@ -635,7 +814,7 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp //find unique name itUnique = uniqueNames.find(name); - if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; } + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; } else { itChimeras = chimerasInFile.find((itUnique->second)); @@ -927,14 +1106,15 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){ string inputString = "fasta=" + inputFile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - + m->mothurCalling = true; + Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; - + m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); nameFile = filenames["name"][0]; @@ -948,7 +1128,7 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){ } } //********************************************************************************************************************** -int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, int start, int end, vector groups){ +int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector groups){ try { int totalSeqs = 0; @@ -956,8 +1136,10 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam for (int i = start; i < end; i++) { int start = time(NULL); if (m->control_pressed) { return 0; } - - int error = parser.getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } + + int error; + if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } } + else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } } int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras); totalSeqs += numSeqs; @@ -965,7 +1147,8 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam if (m->control_pressed) { return 0; } //remove file made for uchime - m->mothurRemove(filename); + if (!m->debug) { m->mothurRemove(filename); } + else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); } //append files m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i])); @@ -974,7 +1157,6 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine(); } - return totalSeqs; } @@ -987,6 +1169,11 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){ try { + + outputFName = m->getFullPathName(outputFName); + filename = m->getFullPathName(filename); + alns = m->getFullPathName(alns); + //to allow for spaces in the path outputFName = "\"" + outputFName + "\""; filename = "\"" + filename + "\""; @@ -994,36 +1181,21 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc vector cPara; - string path = m->argv; - string tempPath = path; - for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } - path = path.substr(0, (tempPath.find_last_of('m'))); - - string uchimeCommand = path; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - uchimeCommand += "uchime "; -#else - uchimeCommand += "uchime"; - uchimeCommand = "\"" + uchimeCommand + "\""; -#endif - - char* tempUchime; + string uchimeCommand = uchimeLocation; + uchimeCommand = "\"" + uchimeCommand + "\" "; + + char* tempUchime; tempUchime= new char[uchimeCommand.length()+1]; *tempUchime = '\0'; strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length()); cPara.push_back(tempUchime); - char* tempIn = new char[8]; - *tempIn = '\0'; strncat(tempIn, "--input", 7); - //strcpy(tempIn, "--input"); - cPara.push_back(tempIn); - char* temp = new char[filename.length()+1]; - *temp = '\0'; strncat(temp, filename.c_str(), filename.length()); - //strcpy(temp, filename.c_str()); - cPara.push_back(temp); - - //are you using a reference file + //are you using a reference file if (templatefile != "self") { + string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted"; + prepFile(filename.substr(1, filename.length()-2), outputFileName); + filename = outputFileName; + filename = "\"" + filename + "\""; //add reference file char* tempRef = new char[5]; //strcpy(tempRef, "--db"); @@ -1035,6 +1207,15 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc cPara.push_back(tempR); } + char* tempIn = new char[8]; + *tempIn = '\0'; strncat(tempIn, "--input", 7); + //strcpy(tempIn, "--input"); + cPara.push_back(tempIn); + char* temp = new char[filename.length()+1]; + *temp = '\0'; strncat(temp, filename.c_str(), filename.length()); + //strcpy(temp, filename.c_str()); + cPara.push_back(temp); + char* tempO = new char[12]; *tempO = '\0'; strncat(tempO, "--uchimeout", 11); //strcpy(tempO, "--uchimeout"); @@ -1238,10 +1419,11 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc //uchime_main(numArgs, uchimeParameters); //cout << "commandString = " << commandString << endl; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else commandString = "\"" + commandString + "\""; #endif + if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory @@ -1270,15 +1452,21 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc string name = ""; string chimeraFlag = ""; - in >> chimeraFlag >> name; - - //fix name if needed - if (templatefile == "self") { - name = name.substr(0, name.length()-1); //rip off last / - name = name.substr(0, name.find_last_of('/')); + //in >> chimeraFlag >> name; + + string line = m->getline(in); + vector pieces = m->splitWhiteSpace(line); + if (pieces.size() > 2) { + name = pieces[1]; + //fix name if needed + if (templatefile == "self") { + name = name.substr(0, name.length()-1); //rip off last / + name = name.substr(0, name.find_last_of('/')); + } + + chimeraFlag = pieces[pieces.size()-1]; } - - for (int i = 0; i < 15; i++) { in >> chimeraFlag; } + //for (int i = 0; i < 15; i++) { in >> chimeraFlag; } m->gobble(in); if (chimeraFlag == "Y") { out << name << endl; numChimeras++; } @@ -1287,6 +1475,8 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc in.close(); out.close(); + //if (templatefile != "self") { m->mothurRemove(filename); } + return num; } catch(exception& e) { @@ -1295,6 +1485,34 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc } } /**************************************************************************************************/ +//uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file. +int ChimeraUchimeCommand::prepFile(string filename, string output) { + try { + + ifstream in; + m->openInputFile(filename, in); + + ofstream out; + m->openOutputFile(output, out); + + while (!in.eof()) { + if (m->control_pressed) { break; } + + Sequence seq(in); m->gobble(in); + + if (seq.getName() != "") { seq.printSequence(out); } + } + in.close(); + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "prepFile"); + exit(1); + } +} +/**************************************************************************************************/ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) { try { @@ -1304,7 +1522,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename int num = 0; vector files; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //break up file into multiple files m->divideFile(filename, processors, files); @@ -1414,8 +1632,8 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename // Allocate memory for thread data. string extension = toString(i) + ".temp"; - uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i); + tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); pDataArray.push_back(tempUchime); @@ -1467,7 +1685,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } /**************************************************************************************************/ -int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector groups) { +int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector groups, string nameFile, string groupFile, string fastaFile) { try { processIDS.clear(); @@ -1487,7 +1705,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o lines.push_back(linePair(startIndex, endIndex)); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -1497,7 +1715,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverGroups(parser, outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); + num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; @@ -1515,7 +1733,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o } //do my part - num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i); + tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); pDataArray.push_back(tempUchime); @@ -1560,7 +1778,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o //using the main process as a worker saves time and memory - num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);