X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimerauchimecommand.cpp;h=6f7ba106ead04e9ccd3b7b290dce4c3a55dabd20;hb=a9dbc22713bfc056a797361dd757b1a5c98e1c01;hp=af427d8b6445e39185661bf51782bec1efc54fe6;hpb=f2408a11ad53c148e3a3329018ca7d04b567f11d;p=mothur.git diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index af427d8..6f7ba10 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -9,16 +9,19 @@ #include "chimerauchimecommand.h" #include "deconvolutecommand.h" -#include "uc.h" +//#include "uc.h" #include "sequence.hpp" - +#include "referencedb.h" +#include "systemcommand.h" //********************************************************************************************************************** vector ChimeraUchimeCommand::setParameters(){ try { CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate); CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -32,7 +35,9 @@ vector ChimeraUchimeCommand::setParameters(){ CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks); CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk); CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow); - CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); + CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdups); + + //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp); CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps); CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2); @@ -40,7 +45,7 @@ vector ChimeraUchimeCommand::setParameters(){ CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen); CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl); CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract); - + vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; @@ -56,10 +61,13 @@ string ChimeraUchimeCommand::getHelpString(){ string helpString = ""; helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n"; - helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; + helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; + helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; + helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; + helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n"; @@ -72,7 +80,7 @@ string ChimeraUchimeCommand::getHelpString(){ helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n"; helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n"; helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n"; - helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n"; + //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n"; helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n"; helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n"; helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n"; @@ -95,6 +103,28 @@ string ChimeraUchimeCommand::getHelpString(){ } } //********************************************************************************************************************** +string ChimeraUchimeCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "chimera") { outputFileName = "uchime.chimeras"; } + else if (type == "accnos") { outputFileName = "uchime.accnos"; } + else if (type == "alns") { outputFileName = "uchime.alns"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** ChimeraUchimeCommand::ChimeraUchimeCommand(){ try { abort = true; calledHelp = true; @@ -112,7 +142,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){ //*************************************************************************************************************** ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; hasName=false; hasCount=false; + ReferenceDB* rdb = ReferenceDB::getInstance(); //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -221,9 +252,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { //check for required parameters - bool hasName = true; namefile = validParameter.validFile(parameters, "name", false); - if (namefile == "not found") { namefile = ""; hasName = false; } + if (namefile == "not found") { namefile = ""; } else { m->splitAtDash(namefile, nameFileNames); @@ -290,17 +320,176 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { } } } + } + + if (nameFileNames.size() != 0) { hasName = true; } + + //check for required parameters + vector countfileNames; + countfile = validParameter.validFile(parameters, "count", false); + if (countfile == "not found") { + countfile = ""; + }else { + m->splitAtDash(countfile, countfileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < countfileNames.size(); i++) { + + bool ignore = false; + if (countfileNames[i] == "current") { + countfileNames[i] = m->getCountTableFile(); + if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + countfileNames.erase(countfileNames.begin()+i); + i--; + } + } + + if (!ignore) { + + if (inputDir != "") { + string path = m->hasPath(countfileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; } + } + + int ableToOpen; + ifstream in; + + ableToOpen = m->openInputFile(countfileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]); + m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + countfileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]); + m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + countfileNames[i] = tryPath; + } + } + + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + countfileNames.erase(countfileNames.begin()+i); + i--; + }else { + m->setCountTableFile(countfileNames[i]); + } + } + } + } + + if (countfileNames.size() != 0) { hasCount = true; } + + //make sure there is at least one valid file left + if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + + if (!hasName && hasCount) { nameFileNames = countfileNames; } + + if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + + bool hasGroup = true; + groupfile = validParameter.validFile(parameters, "group", false); + if (groupfile == "not found") { groupfile = ""; hasGroup = false; } + else { + m->splitAtDash(groupfile, groupFileNames); + + //go through files and make sure they are good, if not, then disregard them + for (int i = 0; i < groupFileNames.size(); i++) { + + bool ignore = false; + if (groupFileNames[i] == "current") { + groupFileNames[i] = m->getGroupFile(); + if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); } + else { + m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true; + //erase from file list + groupFileNames.erase(groupFileNames.begin()+i); + i--; + } + } + + if (!ignore) { + + if (inputDir != "") { + string path = m->hasPath(groupFileNames[i]); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; } + } + + int ableToOpen; + ifstream in; + + ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror"); + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]); + m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + groupFileNames[i] = tryPath; + } + } + + if (ableToOpen == 1) { + if (m->getOutputDir() != "") { //default path is set + string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]); + m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + groupFileNames[i] = tryPath; + } + } + + in.close(); + + if (ableToOpen == 1) { + m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); + //erase from file list + groupFileNames.erase(groupFileNames.begin()+i); + i--; + }else { + m->setGroupFile(groupFileNames[i]); + } + } + } //make sure there is at least one valid file left - if (nameFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid name files."); m->mothurOutEndLine(); abort = true; } + if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; } } - if (hasName && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of namefiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; } + if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; } //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + string path; it = parameters.find("reference"); //user has given a template file @@ -313,15 +502,33 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { templatefile = validParameter.validFile(parameters, "reference", true); if (templatefile == "not open") { abort = true; } - else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true; } + else if (templatefile == "not found") { //check for saved reference sequences + if (rdb->getSavedReference() != "") { + templatefile = rdb->getSavedReference(); + m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine(); + }else { + m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); + m->mothurOutEndLine(); + abort = true; + } + } } }else if (hasName) { templatefile = "self"; } - else { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true; } - - + else if (hasCount) { templatefile = "self"; } + else { + if (rdb->getSavedReference() != "") { + templatefile = rdb->getSavedReference(); + m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine(); + }else { + m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); + m->mothurOutEndLine(); + templatefile = ""; abort = true; + } + } + string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; } @@ -337,7 +544,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; } minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; } idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; } - minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; } + //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; } maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; } minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; } maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; } @@ -353,8 +560,66 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; } skipgaps2 = m->isTrue(temp); + + string usedDups = "false"; + temp = validParameter.validFile(parameters, "dereplicate", false); + if (temp == "not found") { + if (groupfile != "") { temp = "false"; } + else { temp = "true"; usedDups = ""; } + } + dups = m->isTrue(temp); - } + + if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } + if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } + + //look for uchime exe + path = m->argv; + string tempPath = path; + for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } + path = path.substr(0, (tempPath.find_last_of('m'))); + + string uchimeCommand; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability + if (m->debug) { + m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); + Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); + newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + } +#else + uchimeCommand = path + "uchime.exe"; +#endif + + //test to make sure uchime exists + ifstream in; + uchimeCommand = m->getFullPathName(uchimeCommand); + int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close(); + if(ableToOpen == 1) { + m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n"); + //check to see if uchime is in the path?? + + string uLocation = m->findProgramPath("uchime"); + + + ifstream in2; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close(); +#else + ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close(); +#endif + + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } + else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; } + }else { uchimeLocation = uchimeCommand; } + + uchimeLocation = m->getFullPathName(uchimeLocation); + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand"); @@ -365,7 +630,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { int ChimeraUchimeCommand::execute(){ try{ - if (abort == true) { if (calledHelp) { return 0; } return 2; } + + if (abort == true) { if (calledHelp) { return 0; } return 2; } m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n"); @@ -375,109 +641,131 @@ int ChimeraUchimeCommand::execute(){ int start = time(NULL); string nameFile = ""; - - if (templatefile == "self") { //you want to run uchime with a refernce template - - #ifdef USE_MPI - int pid; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - if (pid == 0) { //you are the root process - #endif + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it + string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera"); + string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos"); + string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("alns"); + string newFasta = m->getRootName(fastaFileNames[s]) + "temp"; + //you provided a groupfile + string groupFile = ""; + bool hasGroup = false; + if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; } + else if (hasCount) { + CountTable ct; + if (ct.testGroups(nameFileNames[s])) { hasGroup = true; } + } + + if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups + if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; - }else { - m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine(); - - //use unique.seqs to create new name and fastafile - string inputString = "fasta=" + fastaFileNames[s]; - m->mothurOut("/******************************************/"); m->mothurOutEndLine(); - m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - - Command* uniqueCommand = new DeconvoluteCommand(inputString); - uniqueCommand->execute(); - - map > filenames = uniqueCommand->getOutputFiles(); - - delete uniqueCommand; - - m->mothurOut("/******************************************/"); m->mothurOutEndLine(); - - nameFile = filenames["name"][0]; - fastaFileNames[s] = filenames["fasta"][0]; - } - - //create input file for uchime - //read through fastafile and store info - map seqs; - ifstream in; - m->openInputFile(fastaFileNames[s], in); - - while (!in.eof()) { - - if (m->control_pressed) { in.close(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - - Sequence seq(in); m->gobble(in); - seqs[seq.getName()] = seq.getAligned(); - } - in.close(); - + }else { nameFile = getNamesFile(fastaFileNames[s]); } + + map seqs; + readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + //read namefile vector nameMapCount; - int error = m->readNames(nameFile, nameMapCount, seqs); + int error; + if (hasCount) { + CountTable ct; + ct.readTable(nameFile); + for(map::iterator it = seqs.begin(); it != seqs.end(); it++) { + int num = ct.getNumSeqs(it->first); + if (num == 0) { error = 1; } + else { + seqPriorityNode temp(num, it->second, it->first); + nameMapCount.push_back(temp); + } + } + }else { + error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + } + if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - - if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } + printFile(nameMapCount, newFasta); + fastaFileNames[s] = newFasta; + } + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + if (hasGroup) { + if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one + nameFile = nameFileNames[s]; + }else { nameFile = getNamesFile(fastaFileNames[s]); } - sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes); + //Parse sequences by group + vector groups; + map uniqueNames; + if (hasCount) { + cparser = new SequenceCountParser(nameFile, fastaFileNames[s]); + groups = cparser->getNamesOfGroups(); + uniqueNames = cparser->getAllSeqsMap(); + }else{ + sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile); + groups = sparser->getNamesOfGroups(); + uniqueNames = sparser->getAllSeqsMap(); + } + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + //clears files + ofstream out, out1, out2; + m->openOutputFile(outputFileName, out); out.close(); + m->openOutputFile(accnosFileName, out1); out1.close(); + if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); } + int totalSeqs = 0; - string newFasta = fastaFileNames[s] + ".temp"; - ofstream out; - m->openOutputFile(newFasta, out); + if(processors == 1) { totalSeqs = driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); } + else { totalSeqs = createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); } + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + if (hasCount) { delete cparser; } + else { delete sparser; } + + if (!dups) { + int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName); - //print new file in order of - for (int i = 0; i < nameMapCount.size(); i++) { - out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl; + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine(); + m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); } + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + }else{ + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + int numSeqs = 0; + int numChimeras = 0; + + if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); } + else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); } + + //add headings + ofstream out; + m->openOutputFile(outputFileName+".temp", out); + out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; out.close(); - fastaFileNames[s] = newFasta; - - #ifdef USE_MPI - } - #endif - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - } - - if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera"; - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos"; - string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns"; + m->appendFiles(outputFileName, outputFileName+".temp"); + m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); + + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } + //remove file made for uchime + if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); } - int numSeqs = 0; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); } - else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); } -#else - numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); -#endif - if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - - //remove file made for uchime - if (templatefile == "self") { remove(fastaFileNames[s].c_str()); } + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine(); + } outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); } - - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } - + //set accnos file as new current accnosfile string current = ""; itTypes = outputTypes.find("accnos"); @@ -499,177 +787,609 @@ int ChimeraUchimeCommand::execute(){ } } //********************************************************************************************************************** +int ChimeraUchimeCommand::deconvoluteResults(map& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){ + try { + map::iterator itUnique; + int total = 0; + + //edit accnos file + ifstream in2; + m->openInputFile(accnosFileName, in2); + + ofstream out2; + m->openOutputFile(accnosFileName+".temp", out2); + + string name; + set namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once + set::iterator itNames; + set chimerasInFile; + set::iterator itChimeras; -int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){ + + while (!in2.eof()) { + if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; } + + in2 >> name; m->gobble(in2); + + //find unique name + itUnique = uniqueNames.find(name); + + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else { + itChimeras = chimerasInFile.find((itUnique->second)); + + if (itChimeras == chimerasInFile.end()) { + out2 << itUnique->second << endl; + chimerasInFile.insert((itUnique->second)); + total++; + } + } + } + in2.close(); + out2.close(); + + m->mothurRemove(accnosFileName); + rename((accnosFileName+".temp").c_str(), accnosFileName.c_str()); + + + + //edit chimera file + ifstream in; + m->openInputFile(outputFileName, in); + + ofstream out; + m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; + + float temp1; + string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag; + name = ""; + namesInFile.clear(); + //assumptions - in file each read will always look like - if uchime source is updated, revisit this code. + /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N + 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N + */ + + while (!in.eof()) { + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; } + + bool print = false; + in >> temp1; m->gobble(in); + in >> name; m->gobble(in); + in >> parent1; m->gobble(in); + in >> parent2; m->gobble(in); + in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag; + m->gobble(in); + + //parse name - name will look like U68590/ab=1/ + string restOfName = ""; + int pos = name.find_first_of('/'); + if (pos != string::npos) { + restOfName = name.substr(pos); + name = name.substr(0, pos); + } + + //find unique name + itUnique = uniqueNames.find(name); + + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else { + name = itUnique->second; + //is this name already in the file + itNames = namesInFile.find((name)); + + if (itNames == namesInFile.end()) { //no not in file + if (flag == "N") { //are you really a no?? + //is this sequence really not chimeric?? + itChimeras = chimerasInFile.find(name); + + //then you really are a no so print, otherwise skip + if (itChimeras == chimerasInFile.end()) { print = true; } + }else{ print = true; } + } + } + + if (print) { + out << temp1 << '\t' << name << restOfName << '\t'; + namesInFile.insert(name); + + //parse parent1 names + if (parent1 != "*") { + restOfName = ""; + pos = parent1.find_first_of('/'); + if (pos != string::npos) { + restOfName = parent1.substr(pos); + parent1 = parent1.substr(0, pos); + } + + itUnique = uniqueNames.find(parent1); + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else { out << itUnique->second << restOfName << '\t'; } + }else { out << parent1 << '\t'; } + + //parse parent2 names + if (parent2 != "*") { + restOfName = ""; + pos = parent2.find_first_of('/'); + if (pos != string::npos) { + restOfName = parent2.substr(pos); + parent2 = parent2.substr(0, pos); + } + + itUnique = uniqueNames.find(parent2); + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else { out << itUnique->second << restOfName << '\t'; } + }else { out << parent2 << '\t'; } + + out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl; + } + } + in.close(); + out.close(); + + m->mothurRemove(outputFileName); + rename((outputFileName+".temp").c_str(), outputFileName.c_str()); + + + //edit anls file + //assumptions - in file each read will always look like - if uchime source is updated, revisit this code. + /* + ------------------------------------------------------------------------ + Query ( 179 nt) F21Fcsw_11639/ab=591/ + ParentA ( 179 nt) F11Fcsw_6529/ab=1625/ + ParentB ( 181 nt) F21Fcsw_12128/ab=1827/ + + A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78 + Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78 + B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80 + Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN + Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000 + Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + + A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158 + Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158 + B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160 + Diffs NNN N N N N N BB NNN + Votes 000 0 0 0 0 0 ++ 000 + Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB + + A 159 TGGAACTGAGACACGGTCCAA 179 + Q 159 TGGAACTGAGACACGGTCCAA 179 + B 161 TGGAACTGAGACACGGTCCAA 181 + Diffs + Votes + Model BBBBBBBBBBBBBBBBBBBBB + + Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5% + Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047 + */ + if (chimealns) { + ifstream in3; + m->openInputFile(alnsFileName, in3); + + ofstream out3; + m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint); + + name = ""; + namesInFile.clear(); + string line = ""; + + while (!in3.eof()) { + if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; } + + line = ""; + line = m->getline(in3); + string temp = ""; + + if (line != "") { + istringstream iss(line); + iss >> temp; + + //are you a name line + if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) { + int spot = 0; + for (int i = 0; i < line.length(); i++) { + spot = i; + if (line[i] == ')') { break; } + else { out3 << line[i]; } + } + + if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; } + else { + out << line[spot] << line[spot+1]; + + name = line.substr(spot+2); + + //parse name - name will either look like U68590/ab=1/ or U68590 + string restOfName = ""; + int pos = name.find_first_of('/'); + if (pos != string::npos) { + restOfName = name.substr(pos); + name = name.substr(0, pos); + } + + //find unique name + itUnique = uniqueNames.find(name); + + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; } + else { + //only limit repeats on query names + if (temp == "Query") { + itNames = namesInFile.find((itUnique->second)); + + if (itNames == namesInFile.end()) { + out << itUnique->second << restOfName << endl; + namesInFile.insert((itUnique->second)); + } + }else { out << itUnique->second << restOfName << endl; } + } + + } + + }else { //not need to alter line + out3 << line << endl; + } + }else { out3 << endl; } + } + in3.close(); + out3.close(); + + m->mothurRemove(alnsFileName); + rename((alnsFileName+".temp").c_str(), alnsFileName.c_str()); + } + + return total; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "deconvoluteResults"); + exit(1); + } +} +//********************************************************************************************************************** +int ChimeraUchimeCommand::printFile(vector& nameMapCount, string filename){ try { + sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes); + + ofstream out; + m->openOutputFile(filename, out); + + //print new file in order of + for (int i = 0; i < nameMapCount.size(); i++) { + out << ">" << nameMapCount[i].name << "/ab=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl; + } + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "printFile"); + exit(1); + } +} +//********************************************************************************************************************** +int ChimeraUchimeCommand::readFasta(string filename, map& seqs){ + try { + //create input file for uchime + //read through fastafile and store info + ifstream in; + m->openInputFile(filename, in); + + while (!in.eof()) { + + if (m->control_pressed) { in.close(); return 0; } + + Sequence seq(in); m->gobble(in); + seqs[seq.getName()] = seq.getAligned(); + } + in.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "readFasta"); + exit(1); + } +} +//********************************************************************************************************************** + +string ChimeraUchimeCommand::getNamesFile(string& inputFile){ + try { + string nameFile = ""; + + m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine(); + + //use unique.seqs to create new name and fastafile + string inputString = "fasta=" + inputFile; + m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); + m->mothurCalling = true; + + Command* uniqueCommand = new DeconvoluteCommand(inputString); + uniqueCommand->execute(); + + map > filenames = uniqueCommand->getOutputFiles(); + + delete uniqueCommand; + m->mothurCalling = false; + m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + + nameFile = filenames["name"][0]; + inputFile = filenames["fasta"][0]; + + return nameFile; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "getNamesFile"); + exit(1); + } +} +//********************************************************************************************************************** +int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, string accnos, string alns, int start, int end, vector groups){ + try { + + int totalSeqs = 0; + int numChimeras = 0; + + for (int i = start; i < end; i++) { + int start = time(NULL); if (m->control_pressed) { return 0; } + + int error; + if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } } + else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } } + + int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras); + totalSeqs += numSeqs; + + if (m->control_pressed) { return 0; } + + //remove file made for uchime + if (!m->debug) { m->mothurRemove(filename); } + else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); } + + //append files + m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i])); + m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i])); + if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); } + + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine(); + } + return totalSeqs; + + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "driverGroups"); + exit(1); + } +} +//********************************************************************************************************************** + +int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){ + try { + + outputFName = m->getFullPathName(outputFName); + filename = m->getFullPathName(filename); + alns = m->getFullPathName(alns); + + //to allow for spaces in the path + outputFName = "\"" + outputFName + "\""; + filename = "\"" + filename + "\""; + alns = "\"" + alns + "\""; + vector cPara; - char* tempUchime = new char[8]; - strcpy(tempUchime, "./uchime "); + string uchimeCommand = uchimeLocation; + uchimeCommand = "\"" + uchimeCommand + "\" "; + + char* tempUchime; + tempUchime= new char[uchimeCommand.length()+1]; + *tempUchime = '\0'; + strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length()); cPara.push_back(tempUchime); - char* tempIn = new char[7]; - strcpy(tempIn, "--input"); - cPara.push_back(tempIn); - char* temp = new char[filename.length()]; - strcpy(temp, filename.c_str()); - cPara.push_back(temp); - - //are you using a reference file + //are you using a reference file if (templatefile != "self") { - + string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted"; + prepFile(filename.substr(1, filename.length()-2), outputFileName); + filename = outputFileName; + filename = "\"" + filename + "\""; //add reference file - char* tempRef = new char[4]; - strcpy(tempRef, "--db"); + char* tempRef = new char[5]; + //strcpy(tempRef, "--db"); + *tempRef = '\0'; strncat(tempRef, "--db", 4); cPara.push_back(tempRef); - char* tempR = new char[templatefile.length()]; - strcpy(tempR, templatefile.c_str()); + char* tempR = new char[templatefile.length()+1]; + //strcpy(tempR, templatefile.c_str()); + *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length()); cPara.push_back(tempR); } - char* tempO = new char[11]; - strcpy(tempO, "--uchimeout"); + char* tempIn = new char[8]; + *tempIn = '\0'; strncat(tempIn, "--input", 7); + //strcpy(tempIn, "--input"); + cPara.push_back(tempIn); + char* temp = new char[filename.length()+1]; + *temp = '\0'; strncat(temp, filename.c_str(), filename.length()); + //strcpy(temp, filename.c_str()); + cPara.push_back(temp); + + char* tempO = new char[12]; + *tempO = '\0'; strncat(tempO, "--uchimeout", 11); + //strcpy(tempO, "--uchimeout"); cPara.push_back(tempO); - char* tempout = new char[outputFName.length()]; - strcpy(tempout, outputFName.c_str()); + char* tempout = new char[outputFName.length()+1]; + //strcpy(tempout, outputFName.c_str()); + *tempout = '\0'; strncat(tempout, outputFName.c_str(), outputFName.length()); cPara.push_back(tempout); if (chimealns) { - char* tempA = new char[12]; - strcpy(tempA, "--uchimealns"); + char* tempA = new char[13]; + *tempA = '\0'; strncat(tempA, "--uchimealns", 12); + //strcpy(tempA, "--uchimealns"); cPara.push_back(tempA); - char* tempa = new char[alns.length()]; - strcpy(tempa, alns.c_str()); + char* tempa = new char[alns.length()+1]; + //strcpy(tempa, alns.c_str()); + *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length()); cPara.push_back(tempa); } if (useAbskew) { - char* tempskew = new char[8]; - strcpy(tempskew, "--abskew"); + char* tempskew = new char[9]; + *tempskew = '\0'; strncat(tempskew, "--abskew", 8); + //strcpy(tempskew, "--abskew"); cPara.push_back(tempskew); - char* tempSkew = new char[abskew.length()]; - strcpy(tempSkew, abskew.c_str()); + char* tempSkew = new char[abskew.length()+1]; + //strcpy(tempSkew, abskew.c_str()); + *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length()); cPara.push_back(tempSkew); } if (useMinH) { - char* tempminh = new char[6]; - strcpy(tempminh, "--minh"); + char* tempminh = new char[7]; + *tempminh = '\0'; strncat(tempminh, "--minh", 6); + //strcpy(tempminh, "--minh"); cPara.push_back(tempminh); - char* tempMinH = new char[minh.length()]; - strcpy(tempMinH, minh.c_str()); + char* tempMinH = new char[minh.length()+1]; + *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length()); + //strcpy(tempMinH, minh.c_str()); cPara.push_back(tempMinH); } if (useMindiv) { - char* tempmindiv = new char[8]; - strcpy(tempmindiv, "--mindiv"); + char* tempmindiv = new char[9]; + *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8); + //strcpy(tempmindiv, "--mindiv"); cPara.push_back(tempmindiv); - char* tempMindiv = new char[mindiv.length()]; - strcpy(tempMindiv, mindiv.c_str()); + char* tempMindiv = new char[mindiv.length()+1]; + *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length()); + //strcpy(tempMindiv, mindiv.c_str()); cPara.push_back(tempMindiv); } if (useXn) { - char* tempxn = new char[4]; - strcpy(tempxn, "--xn"); + char* tempxn = new char[5]; + //strcpy(tempxn, "--xn"); + *tempxn = '\0'; strncat(tempxn, "--xn", 4); cPara.push_back(tempxn); - char* tempXn = new char[xn.length()]; - strcpy(tempXn, xn.c_str()); + char* tempXn = new char[xn.length()+1]; + //strcpy(tempXn, xn.c_str()); + *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length()); cPara.push_back(tempXn); } if (useDn) { - char* tempdn = new char[4]; - strcpy(tempdn, "--dn"); + char* tempdn = new char[5]; + //strcpy(tempdn, "--dn"); + *tempdn = '\0'; strncat(tempdn, "--dn", 4); cPara.push_back(tempdn); - char* tempDn = new char[dn.length()]; - strcpy(tempDn, dn.c_str()); + char* tempDn = new char[dn.length()+1]; + *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length()); + //strcpy(tempDn, dn.c_str()); cPara.push_back(tempDn); } if (useXa) { - char* tempxa = new char[4]; - strcpy(tempxa, "--xa"); + char* tempxa = new char[5]; + //strcpy(tempxa, "--xa"); + *tempxa = '\0'; strncat(tempxa, "--xa", 4); cPara.push_back(tempxa); - char* tempXa = new char[xa.length()]; - strcpy(tempXa, xa.c_str()); + char* tempXa = new char[xa.length()+1]; + *tempXa = '\0'; strncat(tempXa, xa.c_str(), xa.length()); + //strcpy(tempXa, xa.c_str()); cPara.push_back(tempXa); } if (useChunks) { - char* tempchunks = new char[8]; - strcpy(tempchunks, "--chunks"); + char* tempchunks = new char[9]; + //strcpy(tempchunks, "--chunks"); + *tempchunks = '\0'; strncat(tempchunks, "--chunks", 8); cPara.push_back(tempchunks); - char* tempChunks = new char[chunks.length()]; - strcpy(tempChunks, chunks.c_str()); + char* tempChunks = new char[chunks.length()+1]; + *tempChunks = '\0'; strncat(tempChunks, chunks.c_str(), chunks.length()); + //strcpy(tempChunks, chunks.c_str()); cPara.push_back(tempChunks); } if (useMinchunk) { - char* tempminchunk = new char[10]; - strcpy(tempminchunk, "--minchunk"); + char* tempminchunk = new char[11]; + //strcpy(tempminchunk, "--minchunk"); + *tempminchunk = '\0'; strncat(tempminchunk, "--minchunk", 10); cPara.push_back(tempminchunk); - char* tempMinchunk = new char[minchunk.length()]; - strcpy(tempMinchunk, minchunk.c_str()); + char* tempMinchunk = new char[minchunk.length()+1]; + *tempMinchunk = '\0'; strncat(tempMinchunk, minchunk.c_str(), minchunk.length()); + //strcpy(tempMinchunk, minchunk.c_str()); cPara.push_back(tempMinchunk); } if (useIdsmoothwindow) { - char* tempidsmoothwindow = new char[16]; - strcpy(tempidsmoothwindow, "--idsmoothwindow"); + char* tempidsmoothwindow = new char[17]; + *tempidsmoothwindow = '\0'; strncat(tempidsmoothwindow, "--idsmoothwindow", 16); + //strcpy(tempidsmoothwindow, "--idsmoothwindow"); cPara.push_back(tempidsmoothwindow); - char* tempIdsmoothwindow = new char[idsmoothwindow.length()]; - strcpy(tempIdsmoothwindow, idsmoothwindow.c_str()); + char* tempIdsmoothwindow = new char[idsmoothwindow.length()+1]; + *tempIdsmoothwindow = '\0'; strncat(tempIdsmoothwindow, idsmoothwindow.c_str(), idsmoothwindow.length()); + //strcpy(tempIdsmoothwindow, idsmoothwindow.c_str()); cPara.push_back(tempIdsmoothwindow); } - if (useMinsmoothid) { - char* tempminsmoothid = new char[13]; - strcpy(tempminsmoothid, "--minsmoothid"); + /*if (useMinsmoothid) { + char* tempminsmoothid = new char[14]; + //strcpy(tempminsmoothid, "--minsmoothid"); + *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13); cPara.push_back(tempminsmoothid); - char* tempMinsmoothid = new char[minsmoothid.length()]; - strcpy(tempMinsmoothid, minsmoothid.c_str()); + char* tempMinsmoothid = new char[minsmoothid.length()+1]; + *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length()); + //strcpy(tempMinsmoothid, minsmoothid.c_str()); cPara.push_back(tempMinsmoothid); - } + }*/ if (useMaxp) { - char* tempmaxp = new char[6]; - strcpy(tempmaxp, "--maxp"); + char* tempmaxp = new char[7]; + //strcpy(tempmaxp, "--maxp"); + *tempmaxp = '\0'; strncat(tempmaxp, "--maxp", 6); cPara.push_back(tempmaxp); - char* tempMaxp = new char[maxp.length()]; - strcpy(tempMaxp, maxp.c_str()); + char* tempMaxp = new char[maxp.length()+1]; + *tempMaxp = '\0'; strncat(tempMaxp, maxp.c_str(), maxp.length()); + //strcpy(tempMaxp, maxp.c_str()); cPara.push_back(tempMaxp); } if (!skipgaps) { - char* tempskipgaps = new char[14]; - strcpy(tempskipgaps, "--[no]skipgaps"); + char* tempskipgaps = new char[13]; + //strcpy(tempskipgaps, "--[no]skipgaps"); + *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12); cPara.push_back(tempskipgaps); } if (!skipgaps2) { - char* tempskipgaps2 = new char[15]; - strcpy(tempskipgaps2, "--[no]skipgaps2"); + char* tempskipgaps2 = new char[14]; + //strcpy(tempskipgaps2, "--[no]skipgaps2"); + *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13); cPara.push_back(tempskipgaps2); } if (useMinlen) { - char* tempminlen = new char[8]; - strcpy(tempminlen, "--minlen"); + char* tempminlen = new char[9]; + *tempminlen = '\0'; strncat(tempminlen, "--minlen", 8); + //strcpy(tempminlen, "--minlen"); cPara.push_back(tempminlen); - char* tempMinlen = new char[minlen.length()]; - strcpy(tempMinlen, minlen.c_str()); + char* tempMinlen = new char[minlen.length()+1]; + //strcpy(tempMinlen, minlen.c_str()); + *tempMinlen = '\0'; strncat(tempMinlen, minlen.c_str(), minlen.length()); cPara.push_back(tempMinlen); } if (useMaxlen) { - char* tempmaxlen = new char[8]; - strcpy(tempmaxlen, "--maxlen"); + char* tempmaxlen = new char[9]; + //strcpy(tempmaxlen, "--maxlen"); + *tempmaxlen = '\0'; strncat(tempmaxlen, "--maxlen", 8); cPara.push_back(tempmaxlen); - char* tempMaxlen = new char[maxlen.length()]; - strcpy(tempMaxlen, maxlen.c_str()); + char* tempMaxlen = new char[maxlen.length()+1]; + *tempMaxlen = '\0'; strncat(tempMaxlen, maxlen.c_str(), maxlen.length()); + //strcpy(tempMaxlen, maxlen.c_str()); cPara.push_back(tempMaxlen); } @@ -680,26 +1400,41 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc } if (useQueryfract) { - char* tempqueryfract = new char[12]; - strcpy(tempqueryfract, "--queryfract"); + char* tempqueryfract = new char[13]; + *tempqueryfract = '\0'; strncat(tempqueryfract, "--queryfract", 12); + //strcpy(tempqueryfract, "--queryfract"); cPara.push_back(tempqueryfract); - char* tempQueryfract = new char[queryfract.length()]; - strcpy(tempQueryfract, queryfract.c_str()); + char* tempQueryfract = new char[queryfract.length()+1]; + *tempQueryfract = '\0'; strncat(tempQueryfract, queryfract.c_str(), queryfract.length()); + //strcpy(tempQueryfract, queryfract.c_str()); cPara.push_back(tempQueryfract); } char** uchimeParameters; uchimeParameters = new char*[cPara.size()]; - for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; } - int numArgs = cPara.size(); + string commandString = ""; + for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } + //int numArgs = cPara.size(); - uchime_main(numArgs, uchimeParameters); + //uchime_main(numArgs, uchimeParameters); + //cout << "commandString = " << commandString << endl; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else + commandString = "\"" + commandString + "\""; +#endif + if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); } + system(commandString.c_str()); //free memory - for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; } + for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] uchimeParameters; + //remove "" from filenames + outputFName = outputFName.substr(1, outputFName.length()-2); + filename = filename.substr(1, filename.length()-2); + alns = alns.substr(1, alns.length()-2); + if (m->control_pressed) { return 0; } //create accnos file from uchime results @@ -710,29 +1445,38 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc m->openOutputFile(accnos, out); int num = 0; + numChimeras = 0; while(!in.eof()) { if (m->control_pressed) { break; } string name = ""; string chimeraFlag = ""; - in >> chimeraFlag >> name; + //in >> chimeraFlag >> name; - //fix name if needed - if (templatefile == "self") { - name = name.substr(0, name.length()-1); //rip off last / - name = name.substr(0, name.find_last_of('/')); + string line = m->getline(in); + vector pieces = m->splitWhiteSpace(line); + if (pieces.size() > 2) { + name = pieces[1]; + //fix name if needed + if (templatefile == "self") { + name = name.substr(0, name.length()-1); //rip off last / + name = name.substr(0, name.find_last_of('/')); + } + + chimeraFlag = pieces[pieces.size()-1]; } - - for (int i = 0; i < 15; i++) { in >> chimeraFlag; } + //for (int i = 0; i < 15; i++) { in >> chimeraFlag; } m->gobble(in); - if (chimeraFlag == "Y") { out << name << endl; } + if (chimeraFlag == "Y") { out << name << endl; numChimeras++; } num++; } in.close(); out.close(); + //if (templatefile != "self") { m->mothurRemove(filename); } + return num; } catch(exception& e) { @@ -741,62 +1485,228 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc } } /**************************************************************************************************/ +//uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file. +int ChimeraUchimeCommand::prepFile(string filename, string output) { + try { + + ifstream in; + m->openInputFile(filename, in); + + ofstream out; + m->openOutputFile(output, out); + + while (!in.eof()) { + if (m->control_pressed) { break; } + + Sequence seq(in); m->gobble(in); + + if (seq.getName() != "") { seq.printSequence(out); } + } + in.close(); + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "prepFile"); + exit(1); + } +} +/**************************************************************************************************/ -int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) { +int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) { try { processIDS.clear(); int process = 1; int num = 0; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - //break up file into multiple files vector files; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //break up file into multiple files m->divideFile(filename, processors, files); if (m->control_pressed) { return 0; } + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", numChimeras); + + //pass numSeqs to parent + ofstream out; + string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + out << num << endl; + out << numChimeras << endl; + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } -#ifdef USE_MPI - int pid, numSeqsPerProcessor; - int tag = 2001; + //do my part + num = driver(outputFileName, files[0], accnos, alns, numChimeras); - MPI_Status status; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - - if (pid == 0) { //you are the root process - num = driver(outputFileName, files[0], accnos, alns); + //force parent to wait until all the processes are done + for (int i=0;iopenInputFile(tempFile, in); + if (!in.eof()) { + int tempNum = 0; + in >> tempNum; m->gobble(in); + num += tempNum; + in >> tempNum; + numChimeras += tempNum; + } + in.close(); m->mothurRemove(tempFile); + } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the preClusterData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + //divide file + int count = 0; + int spot = 0; + map filehandles; + map::iterator it3; + + ofstream* temp; + for (int i = 0; i < processors; i++) { + temp = new ofstream; + filehandles[i] = temp; + m->openOutputFile(filename+toString(i)+".temp", *(temp)); + files.push_back(filename+toString(i)+".temp"); + } + + ifstream in; + m->openInputFile(filename, in); + + while(!in.eof()) { - if (templatefile != "self") { - //wait on chidren - for(int j = 1; j < processors; j++) { - int temp; - MPI_Recv(&temp, 1, MPI_INT, j, tag, MPI_COMM_WORLD, &status); - num += temp; - - m->appendFiles((outputFileName + toString(j) + ".temp"), outputFileName); - remove((outputFileName + toString(j) + ".temp").c_str()); - - m->appendFiles((accnos + toString(j) + ".temp"), accnos); - remove((accnos + toString(j) + ".temp").c_str()); - - if (chimealns) { - m->appendFiles((alns + toString(j) + ".temp"), alns); - remove((alns + toString(j) + ".temp").c_str()); - } - } + if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; } + + Sequence tempSeq(in); m->gobble(in); + + if (tempSeq.getName() != "") { + tempSeq.printSequence(*(filehandles[spot])); + spot++; count++; + if (spot == processors) { spot = 0; } } - }else{ //you are a child process - if (templatefile != "self") { //if template=self we can only use 1 processor - num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp"); - - //send numSeqs to parent - MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); + } + in.close(); + + //delete memory + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(it3->second)).close(); + delete it3->second; + } + + //sanity check for number of processors + if (count < processors) { processors = count; } + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction + + //Create processor worker threads. + for( int i=1; isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); + tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); + + pDataArray.push_back(tempUchime); + processIDS.push_back(i); + + //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + + //using the main process as a worker saves time and memory + num = driver(outputFileName, files[0], accnos, alns, numChimeras); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + numChimeras += pDataArray[i]->numChimeras; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + //append output files + for(int i=0;iappendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); + m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp")); + + m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos); + m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); + + if (chimealns) { + m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns); + m->mothurRemove((alns + toString(processIDS[i]) + ".temp")); } } + + //get rid of the file pieces. + for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); } + return num; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "createProcesses"); + exit(1); + } +} +/**************************************************************************************************/ - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case -#else +int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, vector groups, string nameFile, string groupFile, string fastaFile) { + try { + + processIDS.clear(); + int process = 1; + int num = 0; + //sanity check + if (groups.size() < processors) { processors = groups.size(); } + + //divide the groups between the processors + vector lines; + int numGroupsPerProcessor = groups.size() / processors; + for (int i = 0; i < processors; i++) { + int startIndex = i * numGroupsPerProcessor; + int endIndex = (i+1) * numGroupsPerProcessor; + if(i == (processors - 1)){ endIndex = groups.size(); } + lines.push_back(linePair(startIndex, endIndex)); + } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -805,11 +1715,11 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp"); + num = driverGroups(outputFName + toString(getpid()) + ".temp", filename + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp", lines[process].start, lines[process].end, groups); //pass numSeqs to parent ofstream out; - string tempFile = outputFileName + toString(getpid()) + ".num.temp"; + string tempFile = outputFName + toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); @@ -823,7 +1733,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } //do my part - num = driver(outputFileName, files[0], accnos, alns); + num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); //force parent to wait until all the processes are done for (int i=0;iopenInputFile(tempFile, in); if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } - in.close(); remove(tempFile.c_str()); + in.close(); m->mothurRemove(tempFile); + } + +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the uchimeData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); + tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); + + pDataArray.push_back(tempUchime); + processIDS.push_back(i); + + //MyUchimeThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); } + //using the main process as a worker saves time and memory + num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + //append output files - for(int i=0;iappendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); - remove((outputFileName + toString(processIDS[i]) + ".temp").c_str()); + for(int i=0;iappendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName); + m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp")); m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos); - remove((accnos + toString(processIDS[i]) + ".temp").c_str()); + m->mothurRemove((accnos + toString(processIDS[i]) + ".temp")); if (chimealns) { m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns); - remove((alns + toString(processIDS[i]) + ".temp").c_str()); + m->mothurRemove((alns + toString(processIDS[i]) + ".temp")); } } -#endif - //get rid of the file pieces. - for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); } -#endif + return num; + } catch(exception& e) { - m->errorOut(e, "ChimeraUchimeCommand", "createProcesses"); + m->errorOut(e, "ChimeraUchimeCommand", "createProcessesGroups"); exit(1); } } - /**************************************************************************************************/