From eccface807733d7b770c95716d05067d276ead26 Mon Sep 17 00:00:00 2001 From: pschloss Date: Fri, 6 May 2011 00:41:14 +0000 Subject: [PATCH] chimera.slayer debugging --- blastdb.cpp | 1 + chimeraslayer.cpp | 62 ++++++++++++++++++++++++-------------- maligner.cpp | 77 +++++++++++++++++++++++++++-------------------- 3 files changed, 85 insertions(+), 55 deletions(-) diff --git a/blastdb.cpp b/blastdb.cpp index 6d7ab18..438d90c 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -128,6 +128,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { // wordsize used in megablast. I'm sure we're sacrificing accuracy for speed, but anyother way would take way too // long. With this setting, it seems comparable in speed to the suffix tree approach. //7000004128189528left 0 100 66 0 0 1 66 61 126 1e-31 131 + string blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName()); system(blastCommand.c_str()); diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 1d22bdf..a182b84 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -545,6 +545,8 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef bool rightChimeric = false; bool leftChimeric = false; + + cout << endl; if (chimeraFlag == "yes") { //which peice is chimeric or are both @@ -552,7 +554,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } if (rightChimeric || leftChimeric) { -// cout << querySeq->getName() << "\tyes" << endl; + cout << querySeq->getName() << "\tyes" << endl; outAccString += querySeq->getName() + "\n"; results = true; @@ -1049,46 +1051,66 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db vector tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim); vector tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim); - //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl; - vector smaller; - vector larger; + - if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } - else { smaller = tempIndexesLeft; larger = tempIndexesRight; } + //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl; +// vector smaller; +// vector larger; +// +// if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } +// else { smaller = tempIndexesLeft; larger = tempIndexesRight; } //merge results map seen; map::iterator it; vector mergedResults; - for (int i = 0; i < smaller.size(); i++) { + + int index = 0; +// for (int i = 0; i < smaller.size(); i++) { + while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){ + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } //add left if you havent already - it = seen.find(smaller[i]); + it = seen.find(tempIndexesLeft[index]); if (it == seen.end()) { - mergedResults.push_back(smaller[i]); - seen[smaller[i]] = smaller[i]; + mergedResults.push_back(tempIndexesLeft[index]); + seen[tempIndexesLeft[index]] = tempIndexesLeft[index]; } //add right if you havent already - it = seen.find(larger[i]); + it = seen.find(tempIndexesRight[index]); if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; + mergedResults.push_back(tempIndexesRight[index]); + seen[tempIndexesRight[index]] = tempIndexesRight[index]; } + index++; } + - for (int i = smaller.size(); i < larger.size(); i++) { + for (int i = index; i < tempIndexesLeft.size(); i++) { if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } //add right if you havent already - it = seen.find(larger[i]); + it = seen.find(tempIndexesLeft[i]); if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; + mergedResults.push_back(tempIndexesLeft[i]); + seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; } } + for (int i = index; i < tempIndexesRight.size(); i++) { + if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } + + //add right if you havent already + it = seen.find(tempIndexesRight[i]); + if (it == seen.end()) { + mergedResults.push_back(tempIndexesRight[i]); + seen[tempIndexesRight[i]] = tempIndexesRight[i]; + } + } + + for (int i = 0; i < mergedResults.size(); i++) { //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; if (db[mergedResults[i]]->getName() != q->getName()) { @@ -1098,11 +1120,7 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db } } - -// for(int i=0;igetName() << endl; -// } - + delete queryRight; delete queryLeft; diff --git a/maligner.cpp b/maligner.cpp index d7731fa..1205cef 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -73,6 +73,11 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { if (query->getAligned() == "") { return "no"; } vector temp = refSeqs; + +// for(int i=0;igetName() << endl; +// } + temp.push_back(query); verticalFilter(temp); @@ -90,7 +95,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { if (m->control_pressed) { return chimera; } vector trace = mapTraceRegionsToAlignment(path, refSeqs); - + if (trace.size() > 1) { chimera = "yes"; } else { chimera = "no"; return chimera; } @@ -98,13 +103,16 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { int traceEnd = path[path.size()-1].col; string queryInRange = query->getAligned(); queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); - +// cout << queryInRange << endl; string chimeraSeq = constructChimericSeq(trace, refSeqs); +// cout << chimeraSeq << endl; + // cout << queryInRange.length() << endl; // cout << chimeraSeq.length() << endl; percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); +// cout << percentIdenticalQueryChimera << endl; /* vector trace = extractHighestPath(matrix); @@ -129,6 +137,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { int regionEnd = trace[i].oldCol; int seqIndex = trace[i].row; +// cout << regionStart << '\t' << regionEnd << '\t' << seqIndex << endl; results temp; temp.parent = refSeqs[seqIndex]->getName(); @@ -316,6 +325,8 @@ void Maligner::fillScoreMatrix(vector >& ms, vectorgetAligned().length(); int numRows = seqs.size(); +// cout << numRows << endl; + //initialize first col string queryAligned = query->getAligned(); for (int i = 0; i < numRows; i++) { @@ -337,6 +348,7 @@ void Maligner::fillScoreMatrix(vector >& ms, vectorgetAligned(); @@ -346,16 +358,13 @@ void Maligner::fillScoreMatrix(vector >& ms, vector >& ms, vectorgetName(); - for(int j=0;jgetName() << endl << seqs[i]->getAligned() << endl << endl; - if ((seqs[i]->getName() == "S000003470") || (seqs[i]->getName() == "S000383265") || (seqs[i]->getName() == "7000004128191054")) { - for(int j=0;jgetName(); - for(int j=0;jgetName(); +// for(int j=0;jgetName(); +// for(int j=0;j Maligner::extractHighestPath(vector > } } +// cout << highestScore << endl; vector path; int rowIndex = highestStruct.row; @@ -466,7 +474,8 @@ vector Maligner::mapTraceRegionsToAlignment(vector p for (int i = 1; i < path.size(); i++) { int next_region_index = path[i].row; - +// cout << i << '\t' << next_region_index << endl; + if (next_region_index != region_index) { // add trace region @@ -735,6 +744,8 @@ float Maligner::computePercentID(string queryAlign, string chimera) { if (numBases == 0) { return 0; } +// cout << numIdentical << '\t' << numBases << endl; + float percentIdentical = (numIdentical/(float)numBases) * 100; // cout << percentIdentical << endl; -- 2.39.2