X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimerauchimecommand.cpp;h=478156814cb4e429bebc7f7f1c4f4d55db148d4a;hb=cc19310422f125d6628980bd1148e1e816792382;hp=bad4c96b8c74d645208be1158ad11cd2ef924faf;hpb=01f8d2c7d982a6209211f5abbcf2a086fdf60d0a;p=mothur.git diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index bad4c96..4781568 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -35,6 +35,8 @@ vector ChimeraUchimeCommand::setParameters(){ CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks); CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk); CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow); + CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups); + //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp); CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps); @@ -59,12 +61,13 @@ string ChimeraUchimeCommand::getHelpString(){ string helpString = ""; helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n"; - helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; + helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dups, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n"; + helpString += "If the dups parameter is true, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=t.\n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n"; @@ -557,6 +560,15 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; } skipgaps2 = m->isTrue(temp); + + string usedDups = "true"; + temp = validParameter.validFile(parameters, "dups", false); + if (temp == "not found") { + if (groupfile != "") { temp = "true"; } + else { temp = "false"; usedDups = ""; } + } + dups = m->isTrue(temp); + if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } @@ -713,12 +725,14 @@ int ChimeraUchimeCommand::execute(){ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } if (hasCount) { delete cparser; } else { delete sparser; } - - int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName); - - m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine(); - m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); + + if (dups) { + int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName); + m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(totalSeqs) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine(); + m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); + } + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } }else{ @@ -800,7 +814,7 @@ int ChimeraUchimeCommand::deconvoluteResults(map& uniqueNames, s //find unique name itUnique = uniqueNames.find(name); - if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; } + if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; } else { itChimeras = chimerasInFile.find((itUnique->second)); @@ -1168,29 +1182,20 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc vector cPara; string uchimeCommand = uchimeLocation; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) - uchimeCommand += " "; -#else - uchimeCommand = "\"" + uchimeCommand + "\""; -#endif - - char* tempUchime; + uchimeCommand = "\"" + uchimeCommand + "\" "; + + char* tempUchime; tempUchime= new char[uchimeCommand.length()+1]; *tempUchime = '\0'; strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length()); cPara.push_back(tempUchime); - char* tempIn = new char[8]; - *tempIn = '\0'; strncat(tempIn, "--input", 7); - //strcpy(tempIn, "--input"); - cPara.push_back(tempIn); - char* temp = new char[filename.length()+1]; - *temp = '\0'; strncat(temp, filename.c_str(), filename.length()); - //strcpy(temp, filename.c_str()); - cPara.push_back(temp); - - //are you using a reference file + //are you using a reference file if (templatefile != "self") { + string outputFileName = filename.substr(1, filename.length()-2) + ".uchime_formatted"; + prepFile(filename.substr(1, filename.length()-2), outputFileName); + filename = outputFileName; + filename = "\"" + filename + "\""; //add reference file char* tempRef = new char[5]; //strcpy(tempRef, "--db"); @@ -1202,6 +1207,15 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc cPara.push_back(tempR); } + char* tempIn = new char[8]; + *tempIn = '\0'; strncat(tempIn, "--input", 7); + //strcpy(tempIn, "--input"); + cPara.push_back(tempIn); + char* temp = new char[filename.length()+1]; + *temp = '\0'; strncat(temp, filename.c_str(), filename.length()); + //strcpy(temp, filename.c_str()); + cPara.push_back(temp); + char* tempO = new char[12]; *tempO = '\0'; strncat(tempO, "--uchimeout", 11); //strcpy(tempO, "--uchimeout"); @@ -1455,6 +1469,8 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc in.close(); out.close(); + //if (templatefile != "self") { m->mothurRemove(filename); } + return num; } catch(exception& e) { @@ -1463,6 +1479,34 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc } } /**************************************************************************************************/ +//uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file. +int ChimeraUchimeCommand::prepFile(string filename, string output) { + try { + + ifstream in; + m->openInputFile(filename, in); + + ofstream out; + m->openOutputFile(output, out); + + while (!in.eof()) { + if (m->control_pressed) { break; } + + Sequence seq(in); m->gobble(in); + + if (seq.getName() != "") { seq.printSequence(out); } + } + in.close(); + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ChimeraUchimeCommand", "prepFile"); + exit(1); + } +} +/**************************************************************************************************/ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) { try { @@ -1583,7 +1627,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename string extension = toString(i) + ".temp"; uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); pDataArray.push_back(tempUchime); @@ -1715,7 +1759,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen string extension = toString(i) + ".temp"; uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); pDataArray.push_back(tempUchime); @@ -1728,7 +1772,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen //using the main process as a worker saves time and memory - num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);