From: Pat Schloss Date: Fri, 21 Dec 2012 13:15:45 +0000 (-0500) Subject: Working on seq.error changes X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=3c5a709812a966bf3cabc8bb15f86f0c36cf1e34 Working on seq.error changes --- diff --git a/refchimeratest.cpp b/refchimeratest.cpp index 2f397e8..ab55ca4 100644 --- a/refchimeratest.cpp +++ b/refchimeratest.cpp @@ -14,7 +14,7 @@ int MAXINT = numeric_limits::max(); //*************************************************************************************************************** -RefChimeraTest::RefChimeraTest(vector& refs){ +RefChimeraTest::RefChimeraTest(vector& refs, bool aligned) : aligned(aligned){ m = MothurOut::getInstance(); @@ -29,7 +29,6 @@ RefChimeraTest::RefChimeraTest(vector& refs){ alignLength = referenceSeqs[0].length(); - } //*************************************************************************************************************** @@ -61,28 +60,12 @@ int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& ch int leftParentBi, rightParentBi, breakPointBi; int minMismatchToChimera = getChimera(left, right, leftParentBi, rightParentBi, breakPointBi, singleLeft, bestLeft, singleRight, bestRight); -// int minMismatchToTrimera = MAXINT; -// int leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB; - int nMera = 0; string chimeraRefSeq = ""; if(bestSequenceMismatch - minMismatchToChimera >= 3){// || (minMismatchToChimera == 0 && bestSequenceMismatch != 0)){ - nMera = 2; chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi); - -// minMismatchToTrimera = getTrimera(left, right, leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB, singleLeft, bestLeft, singleRight, bestRight); -// -// if(minMismatchToChimera - minMismatchToTrimera <= 3){ -// nMera = 2; -// chimeraRefSeq = stitchBimera(leftParentBi, rightParentBi, breakPointBi); -// } -// else{ -// nMera = 3; -// chimeraRefSeq = stitchTrimera(leftParentTri, middleParentTri, rightParentTri, breakPointTriA, breakPointTriB); -// } - } else{ nMera = 1; @@ -92,20 +75,10 @@ int RefChimeraTest::analyzeQuery(string queryName, string querySeq, ofstream& ch double distToChimera = calcDistToChimera(querySeq, chimeraRefSeq); -// double loonIndex = calcLoonIndex(querySeq, referenceSeqs[leftParentBi], referenceSeqs[rightParentBi], breakPointBi, binMatrix); - chimeraReportFile << queryName << '\t' << referenceNames[bestMatch] << '\t' << bestSequenceMismatch << '\t'; chimeraReportFile << referenceNames[leftParentBi] << ',' << referenceNames[rightParentBi] << '\t' << breakPointBi << '\t'; chimeraReportFile << minMismatchToChimera << '\t'; - -// if(nMera == 1){ -// chimeraReportFile << "NA" << '\t' << "NA" << '\t' << "NA"; -// } -// else{ -// chimeraReportFile << referenceNames[leftParentTri] << ',' << referenceNames[middleParentTri] << ',' << referenceNames[rightParentTri] << '\t' << breakPointTriA << ',' << breakPointTriB << '\t' << minMismatchToTrimera; -// } - - chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl; + chimeraReportFile << '\t' << distToChimera << '\t' << nMera << endl; return nMera; } @@ -121,8 +94,6 @@ int RefChimeraTest::getMismatches(string& querySeq, vector >& left, int lDiffs = 0; for(int l=0;l >& left, int rDiffs = 0; int index = 0; for(int l=alignLength-1;l>=0;l--){ -// if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){ if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l] && referenceSeqs[i][l] != 'N'){ rDiffs++; } diff --git a/refchimeratest.h b/refchimeratest.h index d4983e0..41df4eb 100644 --- a/refchimeratest.h +++ b/refchimeratest.h @@ -16,7 +16,7 @@ class RefChimeraTest { public: - RefChimeraTest(vector&); + RefChimeraTest(vector&, bool); int printHeader(ofstream&); int analyzeQuery(string, string, ofstream&); int getClosestRefIndex(); @@ -34,7 +34,8 @@ private: int alignLength; int bestMatch; //ofstream chimeraReportFile; - + bool aligned; + MothurOut* m; }; diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 87d4524..4ff1b6f 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -24,6 +24,7 @@ vector SeqErrorCommand::setParameters(){ CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pname); CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pignorechimeras); CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "","",false,false); parameters.push_back(pthreshold); + CommandParameter paligned("aligned", "Boolean", "T", "", "", "", "","",false,false); parameters.push_back(paligned); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); @@ -243,6 +244,13 @@ SeqErrorCommand::SeqErrorCommand(string option) { // ...at some point should added some additional type checking... temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "1.00"; } m->mothurConvert(temp, threshold); + + temp = validParameter.validFile(parameters, "aligned", true); if (temp == "not found"){ temp = "t"; } + aligned = m->isTrue(temp); +// rdb->aligned = aligned; #do we need these lines for aligned? +// if (aligned) { //clear out old references +// rdb->clearMemory(); +// } temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; } save = m->isTrue(temp); @@ -686,8 +694,10 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName, ofstream outChimeraReport; m->openOutputFile(chimeraOutputFileName, outChimeraReport); - RefChimeraTest chimeraTest(referenceSeqs); - if (line.start == 0) { chimeraTest.printHeader(outChimeraReport); } + + RefChimeraTest chimeraTest(referenceSeqs, aligned); + + if (line.start == 0) { chimeraTest.printHeader(outChimeraReport); } ofstream errorSummaryFile; m->openOutputFile(summaryFileName, errorSummaryFile); diff --git a/seqerrorcommand.h b/seqerrorcommand.h index 6c7af2c..9f400b3 100644 --- a/seqerrorcommand.h +++ b/seqerrorcommand.h @@ -92,7 +92,7 @@ private: string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir; double threshold; - bool ignoreChimeras, save; + bool ignoreChimeras, save, aligned; int numRefs, processors; int maxLength, totalBases, totalMatches; //ofstream errorSummaryFile, errorSeqFile;