From: westcott Date: Tue, 17 May 2011 19:06:36 +0000 (+0000) Subject: added parameter to chimera.uchime X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=1e8d08e96f4fe99604a6b3502568de464bf60891 added parameter to chimera.uchime --- diff --git a/addtargets2.cpp b/addtargets2.cpp index 4e0dbd1..57a4ef5 100644 --- a/addtargets2.cpp +++ b/addtargets2.cpp @@ -1,5 +1,5 @@ //#if UCHIMES - +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. #include "myutils.h" #include "chime.h" #include "ultra.h" diff --git a/alignchime.cpp b/alignchime.cpp index d7b05a8..c97c333 100644 --- a/alignchime.cpp +++ b/alignchime.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "seq.h" #include "chime.h" diff --git a/alignchimel.cpp b/alignchimel.cpp index ae152af..8f4cbd5 100644 --- a/alignchimel.cpp +++ b/alignchimel.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "seq.h" #include "chime.h" diff --git a/alnheuristics.h b/alnheuristics.h index 9a8d283..cd346cf 100644 --- a/alnheuristics.h +++ b/alnheuristics.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef alnheuristics_h #define alnheuristics_h diff --git a/alnparams.cpp b/alnparams.cpp index d1b9036..41ee738 100644 --- a/alnparams.cpp +++ b/alnparams.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include // for FLT_MAX #include "mx.h" @@ -6,12 +8,12 @@ #define TEST 0 -void SetBLOSUM62(); +void SetBLOSUM62(); void SetNucSubstMx(double Match, double Mismatch); void ReadSubstMx(const string &FileName, Mx &Mxf); - -extern Mx g_SubstMxf; -extern float **g_SubstMx; + +extern Mx g_SubstMxf; +extern float **g_SubstMx; void AlnParams::Clear() { @@ -313,14 +315,14 @@ void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr) void AlnParams::SetMxFromCmdLine(bool IsNucleo) { if (IsNucleo) - SetNucSubstMx(opt_match, opt_mismatch); + SetNucSubstMx(opt_match, opt_mismatch); else { if (opt_matrix == "") { SubstMxName = "BLOSUM62"; - SetBLOSUM62(); - } + SetBLOSUM62(); + } else { ReadSubstMx(opt_matrix, g_SubstMxf); @@ -361,9 +363,9 @@ void AlnParams::InitFromCmdLine(bool IsNucleo) // Global if (IsNucleo) - Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5); + Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5); else - Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5); + Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5); SetPenalties(opt_gapopen, opt_gapext); } diff --git a/alnparams.h b/alnparams.h index 4037912..b159179 100644 --- a/alnparams.h +++ b/alnparams.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef alnparams_h #define alnparams_h diff --git a/alpha.cpp b/alpha.cpp index 0efca3b..9d82f35 100644 --- a/alpha.cpp +++ b/alpha.cpp @@ -1,4 +1,5 @@ // Generated by /p/py/alphac.py +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. #include "alpha.h" unsigned g_CharToLetterAminoStop[256] = diff --git a/alpha.h b/alpha.h index e021b7f..e491c9c 100644 --- a/alpha.h +++ b/alpha.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef alpha_h #define alpha_h diff --git a/alpha2.cpp b/alpha2.cpp index 26bc1c6..08f6e21 100644 --- a/alpha2.cpp +++ b/alpha2.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "alpha.h" #include "timing.h" diff --git a/blastdb.cpp b/blastdb.cpp index 70349e5..b1a7b48 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -83,8 +83,10 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);; blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); #else - blastCommand = "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);; + blastCommand = "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + "\"" + dbFileName + "\"" + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n); blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); + //wrap entire string in "" + blastCommand = "\"" + blastCommand + "\""; #endif system(blastCommand.c_str()); @@ -140,8 +142,11 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); #else - blastCommand = "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn + blastCommand = "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + "\"" + dbFileName + "\"" + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); + //wrap entire string in "" + blastCommand = "\"" + blastCommand + "\""; + #endif system(blastCommand.c_str()); @@ -213,7 +218,9 @@ void BlastDB::generateDB() { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability #else - formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + dbFileName; + formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + "\"" + dbFileName + "\""; + //wrap entire string in "" + formatdbCommand = "\"" + formatdbCommand + "\""; #endif system(formatdbCommand.c_str()); // to get the right sequence names, i think. -p F // option tells formatdb that seqs are DNA, not prot diff --git a/chainer.h b/chainer.h index a954dc0..7f54131 100644 --- a/chainer.h +++ b/chainer.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef chainer_h #define chainer_h diff --git a/chime.h b/chime.h index 1b0662a..f143ea1 100644 --- a/chime.h +++ b/chime.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef chime_h #define chime_h diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 891b77b..a47aed0 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -22,6 +22,24 @@ vector ChimeraUchimeCommand::setParameters(){ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "",false,false); parameters.push_back(pabskew); + CommandParameter pchimealns("chimealns", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pchimealns); + CommandParameter pminh("minh", "Number", "", "0.3", "", "", "",false,false); parameters.push_back(pminh); + CommandParameter pmindiv("mindiv", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pmindiv); + CommandParameter pxn("xn", "Number", "", "8.0", "", "", "",false,false); parameters.push_back(pxn); + CommandParameter pdn("dn", "Number", "", "1.4", "", "", "",false,false); parameters.push_back(pdn); + CommandParameter pxa("xa", "Number", "", "1", "", "", "",false,false); parameters.push_back(pxa); + CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks); + CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk); + CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow); + CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); + CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp); + CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps); + CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2); + CommandParameter pminlen("minlen", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminlen); + CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen); + CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl); + CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -38,12 +56,30 @@ string ChimeraUchimeCommand::getHelpString(){ string helpString = ""; helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n"; helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n"; - helpString += "The chimera.uchime command parameters are fasta, name, reference and processors.\n"; + helpString += "The chimera.uchime command parameters are fasta, name, reference, processors, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n"; helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n"; helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n"; helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n"; helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n"; helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; + helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n"; + helpString += "The chimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n"; + helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n"; + helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease --min h, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n"; + helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n"; + helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n"; + helpString += "The xa parameter - weight of an abstain vote. Default 1. So far, results do not seem to be very sensitive to this parameter, but if you have a good training set might be worth trying. Reasonable values might range from 0.1 to 2.\n"; + helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n"; + helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n"; + helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n"; + helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n"; + helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n"; + helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n"; + helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n"; + helpString += "The minlen parameter is the minimum unaligned sequence length. Defaults 10. Applies to both query and reference sequences.\n"; + helpString += "The maxlen parameter is the maximum unaligned sequence length. Defaults 10000. Applies to both query and reference sequences.\n"; + helpString += "The ucl parameter - use local-X alignments. Default is global-X or false. On tests so far, global-X is always better; this option is retained because it just might work well on some future type of data.\n"; + helpString += "The queryfract parameter - minimum fraction of the query sequence that must be covered by a local-X alignment. Default 0.5. Applies only when ucl is true.\n"; #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; #endif @@ -66,6 +102,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){ vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["alns"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "ChimeraUchimeCommand", "ChimeraUchimeCommand"); @@ -98,6 +135,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { vector tempOutNames; outputTypes["chimera"] = tempOutNames; outputTypes["accnos"] = tempOutNames; + outputTypes["alns"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -278,6 +316,38 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); convert(temp, processors); + + abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } + if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; } + + temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; } + chimealns = m->isTrue(temp); + + minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.3"; } else{ useMinH = true; } + mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.5"; } else{ useMindiv = true; } + xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; } + dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; } + xa = validParameter.validFile(parameters, "xa", false); if (xa == "not found") { useXa = false; xa = "1"; } else{ useXa = true; } + chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; } + minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; } + idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; } + minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; } + maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; } + minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; } + maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; } + + temp = validParameter.validFile(parameters, "ucl", false); if (temp == "not found") { temp = "f"; } + ucl = m->isTrue(temp); + + queryfract = validParameter.validFile(parameters, "queryfract", false); if (queryfract == "not found") { useQueryfract = false; queryfract = "0.5"; } else{ useQueryfract = true; } + if (!ucl && useQueryfract) { m->mothurOut("queryfact may only be used when ucl=t, ignoring."); m->mothurOutEndLine(); useQueryfract = false; } + + temp = validParameter.validFile(parameters, "skipgaps", false); if (temp == "not found") { temp = "t"; } + skipgaps = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; } + skipgaps2 = m->isTrue(temp); + } } catch(exception& e) { @@ -291,6 +361,8 @@ int ChimeraUchimeCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } + m->mothurOut("\nuchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\n\n"); + for (int s = 0; s < fastaFileNames.size(); s++) { m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); @@ -375,23 +447,27 @@ int ChimeraUchimeCommand::execute(){ } if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it - string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera"; - string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos"; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.chimera"; + string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.accnos"; + string alnsFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "uchime.alns"; if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } int numSeqs = 0; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName); } - else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); } + if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); } + else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); } #else - numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName); + numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName); #endif if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; } - + + //remove file made for uchime + if (templatefile == "self") { remove(fastaFileNames[s].c_str()); } outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); + if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } @@ -418,7 +494,7 @@ int ChimeraUchimeCommand::execute(){ } //********************************************************************************************************************** -int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos){ +int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns){ try { vector cPara; @@ -453,6 +529,160 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc strcpy(tempout, outputFName.c_str()); cPara.push_back(tempout); + if (chimealns) { + char* tempA = new char[12]; + strcpy(tempA, "--uchimealns"); + cPara.push_back(tempA); + char* tempa = new char[alns.length()]; + strcpy(tempa, alns.c_str()); + cPara.push_back(tempa); + } + + if (useAbskew) { + char* tempskew = new char[8]; + strcpy(tempskew, "--abskew"); + cPara.push_back(tempskew); + char* tempSkew = new char[abskew.length()]; + strcpy(tempSkew, abskew.c_str()); + cPara.push_back(tempSkew); + } + + if (useMinH) { + char* tempminh = new char[6]; + strcpy(tempminh, "--minh"); + cPara.push_back(tempminh); + char* tempMinH = new char[minh.length()]; + strcpy(tempMinH, minh.c_str()); + cPara.push_back(tempMinH); + } + + if (useMindiv) { + char* tempmindiv = new char[8]; + strcpy(tempmindiv, "--mindiv"); + cPara.push_back(tempmindiv); + char* tempMindiv = new char[mindiv.length()]; + strcpy(tempMindiv, mindiv.c_str()); + cPara.push_back(tempMindiv); + } + + if (useXn) { + char* tempxn = new char[4]; + strcpy(tempxn, "--xn"); + cPara.push_back(tempxn); + char* tempXn = new char[xn.length()]; + strcpy(tempXn, xn.c_str()); + cPara.push_back(tempXn); + } + + if (useDn) { + char* tempdn = new char[4]; + strcpy(tempdn, "--dn"); + cPara.push_back(tempdn); + char* tempDn = new char[dn.length()]; + strcpy(tempDn, dn.c_str()); + cPara.push_back(tempDn); + } + + if (useXa) { + char* tempxa = new char[4]; + strcpy(tempxa, "--xa"); + cPara.push_back(tempxa); + char* tempXa = new char[xa.length()]; + strcpy(tempXa, xa.c_str()); + cPara.push_back(tempXa); + } + + if (useChunks) { + char* tempchunks = new char[8]; + strcpy(tempchunks, "--chunks"); + cPara.push_back(tempchunks); + char* tempChunks = new char[chunks.length()]; + strcpy(tempChunks, chunks.c_str()); + cPara.push_back(tempChunks); + } + + if (useMinchunk) { + char* tempminchunk = new char[10]; + strcpy(tempminchunk, "--minchunk"); + cPara.push_back(tempminchunk); + char* tempMinchunk = new char[minchunk.length()]; + strcpy(tempMinchunk, minchunk.c_str()); + cPara.push_back(tempMinchunk); + } + + if (useIdsmoothwindow) { + char* tempidsmoothwindow = new char[16]; + strcpy(tempidsmoothwindow, "--idsmoothwindow"); + cPara.push_back(tempidsmoothwindow); + char* tempIdsmoothwindow = new char[idsmoothwindow.length()]; + strcpy(tempIdsmoothwindow, idsmoothwindow.c_str()); + cPara.push_back(tempIdsmoothwindow); + } + + if (useMinsmoothid) { + char* tempminsmoothid = new char[13]; + strcpy(tempminsmoothid, "--minsmoothid"); + cPara.push_back(tempminsmoothid); + char* tempMinsmoothid = new char[minsmoothid.length()]; + strcpy(tempMinsmoothid, minsmoothid.c_str()); + cPara.push_back(tempMinsmoothid); + } + + if (useMaxp) { + char* tempmaxp = new char[6]; + strcpy(tempmaxp, "--maxp"); + cPara.push_back(tempmaxp); + char* tempMaxp = new char[maxp.length()]; + strcpy(tempMaxp, maxp.c_str()); + cPara.push_back(tempMaxp); + } + + if (!skipgaps) { + char* tempskipgaps = new char[14]; + strcpy(tempskipgaps, "--[no]skipgaps"); + cPara.push_back(tempskipgaps); + } + + if (!skipgaps2) { + char* tempskipgaps2 = new char[15]; + strcpy(tempskipgaps2, "--[no]skipgaps2"); + cPara.push_back(tempskipgaps2); + } + + if (useMinlen) { + char* tempminlen = new char[8]; + strcpy(tempminlen, "--minlen"); + cPara.push_back(tempminlen); + char* tempMinlen = new char[minlen.length()]; + strcpy(tempMinlen, minlen.c_str()); + cPara.push_back(tempMinlen); + } + + if (useMaxlen) { + char* tempmaxlen = new char[8]; + strcpy(tempmaxlen, "--maxlen"); + cPara.push_back(tempmaxlen); + char* tempMaxlen = new char[maxlen.length()]; + strcpy(tempMaxlen, maxlen.c_str()); + cPara.push_back(tempMaxlen); + } + + if (ucl) { + char* tempucl = new char[5]; + strcpy(tempucl, "--ucl"); + cPara.push_back(tempucl); + } + + if (useQueryfract) { + char* tempqueryfract = new char[12]; + strcpy(tempqueryfract, "--queryfract"); + cPara.push_back(tempqueryfract); + char* tempQueryfract = new char[queryfract.length()]; + strcpy(tempQueryfract, queryfract.c_str()); + cPara.push_back(tempQueryfract); + } + + char** uchimeParameters; uchimeParameters = new char*[cPara.size()]; for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; } @@ -504,7 +734,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc } /**************************************************************************************************/ -int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos) { +int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename, string accnos, string alns) { try { processIDS.clear(); @@ -526,7 +756,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename MPI_Comm_size(MPI_COMM_WORLD, &processors); if (pid == 0) { //you are the root process - num = driver(outputFileName, files[0], accnos); + num = driver(outputFileName, files[0], accnos, alns); if (templatefile != "self") { //wait on chidren @@ -540,11 +770,16 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename m->appendFiles((accnos + toString(j) + ".temp"), accnos); remove((accnos + toString(j) + ".temp").c_str()); + + if (chimealns) { + m->appendFiles((alns + toString(j) + ".temp"), alns); + remove((alns + toString(j) + ".temp").c_str()); + } } } }else{ //you are a child process if (templatefile != "self") { //if template=self we can only use 1 processor - num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp"); + num = driver(outputFileName+toString(pid) + ".temp", files[pid], accnos+toString(pid) + ".temp", alns+toString(pid) + ".temp"); //send numSeqs to parent MPI_Send(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); @@ -562,7 +797,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp"); + num = driver(outputFileName + toString(getpid()) + ".temp", files[process], accnos + toString(getpid()) + ".temp", alns + toString(getpid()) + ".temp"); //pass numSeqs to parent ofstream out; @@ -580,7 +815,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } //do my part - num = driver(outputFileName, files[0], accnos); + num = driver(outputFileName, files[0], accnos, alns); //force parent to wait until all the processes are done for (int i=0;iappendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos); remove((accnos + toString(processIDS[i]) + ".temp").c_str()); + + if (chimealns) { + m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns); + remove((alns + toString(processIDS[i]) + ".temp").c_str()); + } } #endif //get rid of the file pieces. diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h index 36e4a39..a7d5ad6 100644 --- a/chimerauchimecommand.h +++ b/chimerauchimecommand.h @@ -26,7 +26,7 @@ public: string getCommandName() { return "chimera.uchime"; } string getCommandCategory() { return "Sequence Processing"; } string getHelpString(); - string getCitation() { return "http://drive5.com/uchime/ \nhttp://www.mothur.org/wiki/Chimera.uchime"; } + string getCitation() { return "uchime by Robert C. Edgar\nhttp://drive5.com/uchime\nThis code is donated to the public domain.\nhttp://www.mothur.org/wiki/Chimera.uchime"; } int execute(); @@ -34,15 +34,11 @@ public: private: vector processIDS; //processid - int driver(string, string, string); - int createProcesses(string, string, string); - -#ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); -#endif - - bool abort; - string fastafile, templatefile, outputDir, namefile; + int driver(string, string, string, string); + int createProcesses(string, string, string, string); + + bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract; + string fastafile, templatefile, outputDir, namefile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract; int processors; vector outputNames; diff --git a/diagbox.h b/diagbox.h index 0c5846c..2491a58 100644 --- a/diagbox.h +++ b/diagbox.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef diagbox_h #define diagbox_h @@ -184,10 +186,10 @@ Don't have proof.. } }; -typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm); - -const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB, - unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm); +typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB, + unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm); + +const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB, + unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm); #endif // diagbox_h diff --git a/dp.h b/dp.h index c771538..d8581fb 100644 --- a/dp.h +++ b/dp.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef dp_h #define dp_h diff --git a/evalue.h b/evalue.h index c9308db..fe324f8 100644 --- a/evalue.h +++ b/evalue.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef evalue_h #define evalue_h diff --git a/fractid.cpp b/fractid.cpp index f298877..ad4c157 100644 --- a/fractid.cpp +++ b/fractid.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "alpha.h" diff --git a/getparents.cpp b/getparents.cpp index d82f902..26a2cd1 100644 --- a/getparents.cpp +++ b/getparents.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "chime.h" #include "ultra.h" diff --git a/globalalign2.cpp b/globalalign2.cpp index 6bb35a9..10f8032 100644 --- a/globalalign2.cpp +++ b/globalalign2.cpp @@ -1,5 +1,5 @@ //#if UCHIMES - +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. #include "dp.h" #include "seq.h" diff --git a/help.h b/help.h index 9d7a89f..1534cd1 100644 --- a/help.h +++ b/help.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + "\n" "Usage\n" "-----\n" diff --git a/hsp.h b/hsp.h index 339256f..7f003bf 100644 --- a/hsp.h +++ b/hsp.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef hsp_h #define hsp_h 1 diff --git a/hspfinder.h b/hspfinder.h index 2b8e9d8..dc287e9 100644 --- a/hspfinder.h +++ b/hspfinder.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef hspfinder_h #define hspfinder_h diff --git a/make3way.cpp b/make3way.cpp index ce88f86..dbadd1a 100644 --- a/make3way.cpp +++ b/make3way.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "sfasta.h" #include "path.h" diff --git a/mx.cpp b/mx.cpp index 48c347e..9e61d1b 100644 --- a/mx.cpp +++ b/mx.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "mx.h" #include "seqdb.h" diff --git a/mx.h b/mx.h index 1438900..2462937 100644 --- a/mx.h +++ b/mx.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef mx_h #define mx_h diff --git a/myopts.h b/myopts.h index ba901ea..812dd8b 100644 --- a/myopts.h +++ b/myopts.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef MY_VERSION #define MY_VERSION "4.2" #endif diff --git a/myutils.cpp b/myutils.cpp index ea983eb..e9d52c2 100755 --- a/myutils.cpp +++ b/myutils.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include #include #include diff --git a/myutils.h b/myutils.h index b63ad3c..6122054 100644 --- a/myutils.h +++ b/myutils.h @@ -1,8 +1,10 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef myutils_h #define myutils_h #define RCE_MALLOC 0 - + #include #include #include @@ -267,8 +269,8 @@ void GetCmdLine(string &s); extern const char *SVN_VERSION; extern const char *SVN_MODS; -extern bool opt_quiet; -extern bool opt_version; -extern FILE *g_fLog; +extern bool opt_quiet; +extern bool opt_version; +extern FILE *g_fLog; #endif // myutils_h diff --git a/orf.h b/orf.h index 90b29d1..06f73a1 100644 --- a/orf.h +++ b/orf.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef orf_h #define orf_h diff --git a/out.h b/out.h index 4ca50c7..595e08c 100644 --- a/out.h +++ b/out.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef out_h #define out_h diff --git a/path.cpp b/path.cpp index 9340344..555830a 100644 --- a/path.cpp +++ b/path.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "path.h" #include "timing.h" diff --git a/path.h b/path.h index f63be7e..b04ab74 100644 --- a/path.h +++ b/path.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef path_h #define path_h diff --git a/searchchime.cpp b/searchchime.cpp index c00a9c4..a1c981a 100644 --- a/searchchime.cpp +++ b/searchchime.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "ultra.h" #include "chime.h" diff --git a/seq.h b/seq.h index 9014641..83ba30f 100644 --- a/seq.h +++ b/seq.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef seq_h #define seq_h diff --git a/seqdb.cpp b/seqdb.cpp index 03de189..be3373a 100644 --- a/seqdb.cpp +++ b/seqdb.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "seqdb.h" #include "alpha.h" diff --git a/seqdb.h b/seqdb.h index fafbdd9..6907015 100644 --- a/seqdb.h +++ b/seqdb.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef seqdb_h #define seqdb_h diff --git a/setnucmx.cpp b/setnucmx.cpp index 030ff5a..123791d 100644 --- a/setnucmx.cpp +++ b/setnucmx.cpp @@ -1,11 +1,13 @@ -#include "myutils.h" -#include "mx.h" - -Mx g_SubstMxf; -float **g_SubstMx; - -static const char Alphabet[] = "ACGTU"; - +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + +#include "myutils.h" +#include "mx.h" + +Mx g_SubstMxf; +float **g_SubstMx; + +static const char Alphabet[] = "ACGTU"; + void SetNucSubstMx(double Match, double Mismatch) { static bool Done = false; diff --git a/sfasta.cpp b/sfasta.cpp index 5e794c6..9559040 100644 --- a/sfasta.cpp +++ b/sfasta.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "sfasta.h" #include "orf.h" #include "alpha.h" @@ -225,8 +227,9 @@ const byte *SFasta::GetNextSeqLo() if (!WarningDone) { if (isgap(c)) - Warning("Ignoring gaps in FASTA file '%s'", - m_FileName.c_str()); + //Warning("Ignoring gaps in FASTA file '%s'", + //m_FileName.c_str()); + ; else if (isprint(c)) Warning("Invalid FASTA file '%s', non-letter '%c' in sequence >%s", m_FileName.c_str(), c, Label); diff --git a/sfasta.h b/sfasta.h index ed2f2ff..79ab961 100644 --- a/sfasta.h +++ b/sfasta.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef sfasta_h #define sfasta_h diff --git a/timing.h b/timing.h index b566e1b..10bd1f7 100644 --- a/timing.h +++ b/timing.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #define TIMING 0 #ifndef timing_h #define timing_h diff --git a/tracebackbit.cpp b/tracebackbit.cpp index 94159cd..d58b900 100644 --- a/tracebackbit.cpp +++ b/tracebackbit.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "dp.h" #define TRACE 0 diff --git a/uc.h b/uc.h index 631ea36..8edef83 100644 --- a/uc.h +++ b/uc.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef uc_h #define uc_h diff --git a/uchime_main.cpp b/uchime_main.cpp index 40e7f44..0465c72 100644 --- a/uchime_main.cpp +++ b/uchime_main.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "chime.h" #include "seqdb.h" @@ -107,11 +109,7 @@ int uchime_main(int argc, char *argv[]) return 0; } - //printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION); - //printf("by Robert C. Edgar\n"); - //printf("http://drive5.com/uchime\n"); - //printf("This code is donated to the public domain.\n"); - //printf("\n"); + if (!optset_w) opt_w = 8; @@ -203,10 +201,16 @@ int uchime_main(int argc, char *argv[]) WriteChimeHit(g_fUChime, Hit); - ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1)); - + //ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1)); + //report progress + if((i+1) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i+1) + ", " + toString(HitCount) + " chimeras found."); m->mothurOutEndLine(); } + } + if (!m->control_pressed) { + //report progress + if((QuerySeqCount) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(QuerySeqCount) + ", " + toString(HitCount) + " chimeras found."); m->mothurOutEndLine(); } } + Log("\n"); Log("%s: %u/%u chimeras found (%.1f%%)\n", opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount)); diff --git a/usort.cpp b/usort.cpp index 7afbf42..33f102e 100644 --- a/usort.cpp +++ b/usort.cpp @@ -1,5 +1,5 @@ //#if UCHIMES - +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. #include "myutils.h" #include "seqdb.h" #include "seq.h" diff --git a/viterbifast.cpp b/viterbifast.cpp index 2b20174..3f3fc97 100644 --- a/viterbifast.cpp +++ b/viterbifast.cpp @@ -1,9 +1,11 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "dp.h" #include "out.h" #include "evalue.h" #define CMP_SIMPLE 0 - + #if SAVE_FAST static Mx g_MxDPM; static Mx g_MxDPD; @@ -66,21 +68,21 @@ static void AllocSave(unsigned LA, unsigned LB) g_DPDSimple = g_DPDSimpleMx->GetData(); g_DPISimple = g_DPISimpleMx->GetData(); #endif - g_MxDPM.Alloc("FastM", LA+1, LB+1); - g_MxDPD.Alloc("FastD", LA+1, LB+1); - g_MxDPI.Alloc("FastI", LA+1, LB+1); - - g_MxTBM.Alloc("FastTBM", LA+1, LB+1); - g_MxTBD.Alloc("FastTBD", LA+1, LB+1); - g_MxTBI.Alloc("FastTBI", LA+1, LB+1); - - g_DPM = g_MxDPM.GetData(); - g_DPD = g_MxDPD.GetData(); - g_DPI = g_MxDPI.GetData(); - - g_TBM = g_MxTBM.GetData(); - g_TBD = g_MxTBD.GetData(); - g_TBI = g_MxTBI.GetData(); + g_MxDPM.Alloc("FastM", LA+1, LB+1); + g_MxDPD.Alloc("FastD", LA+1, LB+1); + g_MxDPI.Alloc("FastI", LA+1, LB+1); + + g_MxTBM.Alloc("FastTBM", LA+1, LB+1); + g_MxTBD.Alloc("FastTBD", LA+1, LB+1); + g_MxTBI.Alloc("FastTBI", LA+1, LB+1); + + g_DPM = g_MxDPM.GetData(); + g_DPD = g_MxDPD.GetData(); + g_DPI = g_MxDPI.GetData(); + + g_TBM = g_MxTBM.GetData(); + g_TBD = g_MxTBD.GetData(); + g_TBI = g_MxTBI.GetData(); } static void SAVE_DPM(unsigned i, unsigned j, float x) diff --git a/windex.h b/windex.h index 0b324ca..b1e002d 100644 --- a/windex.h +++ b/windex.h @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #ifndef windex_h #define windex_h diff --git a/writechhit.cpp b/writechhit.cpp index ea67061..85ec26d 100644 --- a/writechhit.cpp +++ b/writechhit.cpp @@ -1,3 +1,5 @@ +//uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain. + #include "myutils.h" #include "chime.h"