X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=classifyseqscommand.h;h=acee70c5cb474dee4e25eea4a6dcf6b5bd55fa06;hb=4c302368ef34f0d897afefc7853edf86fb45b9f3;hp=58de96937d995d5eb8a2169fcda3950a5f34b4d2;hpb=ae57e166b2ed7b475ec3f466106bd76fabadd063;p=mothur.git diff --git a/classifyseqscommand.h b/classifyseqscommand.h index 58de969..acee70c 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -10,7 +10,7 @@ * */ -#include "mothur.h" + #include "command.hpp" #include "classify.h" #include "referencedb.h" @@ -72,16 +72,16 @@ private: string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile; int processors, kmerSize, numWanted, cutoff, iters; float match, misMatch, gapOpen, gapExtend; - bool abort, probs, save; + bool abort, probs, save, flip; - int driver(linePair*, string, string, string); + int driver(linePair*, string, string, string, string); void appendTaxFiles(string, string); - int createProcesses(string, string, string); + int createProcesses(string, string, string, string); string addUnclassifieds(string, int); int MPIReadNamesFile(string); #ifdef USE_MPI - int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector&); + int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector&); #endif }; @@ -89,20 +89,21 @@ private: //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). -typedef struct classifyData { +struct classifyData { string taxFName; string tempTFName; string filename; - string search, taxonomyFileName, templateFileName, method; + string search, taxonomyFileName, templateFileName, method, accnos; unsigned long long start; unsigned long long end; MothurOut* m; float match, misMatch, gapOpen, gapExtend; int count, kmerSize, threadID, cutoff, iters, numWanted; - bool probs; + bool probs, flip; classifyData(){} - classifyData(bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid) { + classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) { + accnos = acc; taxonomyFileName = tx; templateFileName = te; taxFName = a; @@ -124,11 +125,12 @@ typedef struct classifyData { threadID = tid; probs = p; count = 0; + flip = fli; } }; /**************************************************************************************************/ -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){ classifyData* pDataArray; @@ -141,6 +143,9 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){ ofstream outTaxSimple; pDataArray->m->openOutputFile(pDataArray->tempTFName, outTaxSimple); + ofstream outAcc; + pDataArray->m->openOutputFile(pDataArray->accnos, outAcc); + ifstream inFASTA; pDataArray->m->openInputFile(pDataArray->filename, inFASTA); @@ -157,12 +162,12 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){ //make classify Classify* myclassify; - if(pDataArray->method == "bayesian"){ myclassify = new Bayesian("saved", "saved", pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID); } - else if(pDataArray->method == "knn"){ myclassify = new Knn("saved", "saved", pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); } + if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); } + else if(pDataArray->method == "knn"){ myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); } else { pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian."); pDataArray->m->mothurOutEndLine(); - myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID); + myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); } if (pDataArray->m->control_pressed) { delete myclassify; return 0; } @@ -180,16 +185,19 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){ if (pDataArray->m->control_pressed) { delete candidateSeq; return 0; } - if (taxonomy != "bad seq") { - //output confidence scores or not - if (pDataArray->probs) { - outTax << candidateSeq->getName() << '\t' << taxonomy << endl; - }else{ - outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl; - } - - outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl; + if (taxonomy == "unknown;") { pDataArray->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); pDataArray->m->mothurOutEndLine(); } + + //output confidence scores or not + if (pDataArray->probs) { + outTax << candidateSeq->getName() << '\t' << taxonomy << endl; + }else{ + outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl; } + + outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl; + + if (myclassify->getFlipped()) { outAcc << candidateSeq->getName() << endl; } + count++; } delete candidateSeq;