X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimeraperseuscommand.h;h=b3d6ccf56858deb00435ace345826450f7d4cd9e;hb=4e3080b15a3d0c02f7ef2fd779ead433ffdece31;hp=3608835cd16ed30f648469f308d2320f23bf423e;hpb=c7e8c2d15bd7cedcfdf18675cb0ea1a0dcd0e3c0;p=mothur.git diff --git a/chimeraperseuscommand.h b/chimeraperseuscommand.h index 3608835..b3d6ccf 100644 --- a/chimeraperseuscommand.h +++ b/chimeraperseuscommand.h @@ -30,9 +30,10 @@ public: vector setParameters(); string getCommandName() { return "chimera.perseus"; } string getCommandCategory() { return "Sequence Processing"; } - string getOutputFileNameTag(string, string); + string getHelpString(); - string getCitation() { return "http://www.mothur.org/wiki/Chimera.perseus\n"; } + string getOutputPattern(string); + string getCitation() { return "Quince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011). Removing noise from pyrosequenced amplicons. BMC Bioinformatics 12:38.\nEdgar,R.C., Haas,B.J., Clemente,J.C., Quince,C. and Knight,R. (2011), UCHIME improves sensitivity and speed of chimera detection. Bioinformatics 27:2194.\nhttp://www.mothur.org/wiki/Chimera.perseus\n"; } string getDescription() { return "detect chimeric sequences"; } int execute(); @@ -45,7 +46,7 @@ private: linePair(int i, int j) : start(i), end(j) {} }; - bool abort, hasName, hasCount; + bool abort, hasName, hasCount, dups; string fastafile, groupfile, countfile, outputDir, namefile; int processors, alignLength; double cutoff, alpha, beta; @@ -65,6 +66,7 @@ private: int deconvoluteResults(map&, string, string); int driverGroups(string, string, int, int, vector); int createProcessesGroups(string, string, vector, string, string, string); + string removeNs(string); }; /**************************************************************************************************/ @@ -136,11 +138,11 @@ static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ int totalSeqs = 0; int numChimeras = 0; - for (int i = pDataArray->start; i < pDataArray->end; i++) { + for (int u = pDataArray->start; u < pDataArray->end; u++) { int start = time(NULL); if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; } - pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group " + pDataArray->groups[i] + "..."); pDataArray->m->mothurOutEndLine(); + pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group " + pDataArray->groups[u] + "..."); pDataArray->m->mothurOutEndLine(); //vector sequences = loadSequences(parser, groups[i]); - same function below //////////////////////////////////////////////////////////////////////////////////////// @@ -148,8 +150,8 @@ static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ int alignLength = 0; vector sequences; if (pDataArray->hasCount) { - vector thisGroupsSeqs = cparser->getSeqs(pDataArray->groups[i]); - map counts = cparser->getCountTable(pDataArray->groups[i]); + vector thisGroupsSeqs = cparser->getSeqs(pDataArray->groups[u]); + map counts = cparser->getCountTable(pDataArray->groups[u]); map::iterator it; for (int i = 0; i < thisGroupsSeqs.size(); i++) { @@ -159,13 +161,18 @@ static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ it = counts.find(thisGroupsSeqs[i].getName()); if (it == counts.end()) { error = true; pDataArray->m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); } else { + string newSeq = ""; + string tempSeq = thisGroupsSeqs[i].getUnaligned(); + for (int j = 0; j < tempSeq.length(); j++) { if (tempSeq[j] != 'N') { newSeq += tempSeq[j]; } } + thisGroupsSeqs[i].setAligned(newSeq); + sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), it->second)); if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } } } }else{ - vector thisGroupsSeqs = parser->getSeqs(pDataArray->groups[i]); - map nameMap = parser->getNameMap(pDataArray->groups[i]); + vector thisGroupsSeqs = parser->getSeqs(pDataArray->groups[u]); + map nameMap = parser->getNameMap(pDataArray->groups[u]); map::iterator it; for (int i = 0; i < thisGroupsSeqs.size(); i++) { @@ -176,6 +183,11 @@ static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ if (it == nameMap.end()) { error = true; pDataArray->m->mothurOut("[ERROR]: " + thisGroupsSeqs[i].getName() + " is in your fasta file and not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); } else { int num = pDataArray->m->getNumNames(it->second); + string newSeq = ""; + string tempSeq = thisGroupsSeqs[i].getUnaligned(); + for (int j = 0; j < tempSeq.length(); j++) { if (tempSeq[j] != 'N') { newSeq += tempSeq[j]; } } + thisGroupsSeqs[i].setAligned(newSeq); + sequences.push_back(seqData(thisGroupsSeqs[i].getName(), thisGroupsSeqs[i].getUnaligned(), num)); if (thisGroupsSeqs[i].getUnaligned().length() > alignLength) { alignLength = thisGroupsSeqs[i].getUnaligned().length(); } } @@ -194,8 +206,8 @@ static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ //int numSeqs = driver((outputFName + groups[i]), sequences, (accnos+groups[i]), numChimeras); - same function below //////////////////////////////////////////////////////////////////////////////////////// - string chimeraFileName = pDataArray->outputFName+pDataArray->groups[i]; - string accnosFileName = pDataArray->accnos+pDataArray->groups[i]; + string chimeraFileName = pDataArray->outputFName+pDataArray->groups[u]; + string accnosFileName = pDataArray->accnos+pDataArray->groups[u]; vector > correctModel(4); //could be an option in the future to input own model matrix for(int j=0;j<4;j++){ correctModel[j].resize(4); } @@ -339,7 +351,7 @@ static DWORD WINAPI MyPerseusThreadFunction(LPVOID lpParam){ //append files pDataArray->m->appendFiles(chimeraFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(chimeraFileName); pDataArray->m->appendFiles(accnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(accnosFileName); - pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->groups[i] + "."); pDataArray->m->mothurOutEndLine(); + pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->groups[u] + "."); pDataArray->m->mothurOutEndLine(); if (pDataArray->m->control_pressed) { if (pDataArray->hasCount) { delete cparser; } { delete parser; } pDataArray->m->mothurRemove(pDataArray->outputFName); pDataArray->m->mothurRemove(pDataArray->accnos); return 0; } }