From 2405cc589aaaf0c44809a48fe98d3b96863dac0b Mon Sep 17 00:00:00 2001 From: pschloss Date: Thu, 5 May 2011 20:42:45 +0000 Subject: [PATCH] chimera.slayer debugging --- chimerarealigner.cpp | 2 +- chimeraslayer.cpp | 15 +++++++++++++-- chimeraslayercommand.cpp | 2 +- maligner.cpp | 28 ++++++++++++++++++++-------- slayer.cpp | 37 +++++++++++++++++++------------------ 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/chimerarealigner.cpp b/chimerarealigner.cpp index 82c4427..abad480 100644 --- a/chimerarealigner.cpp +++ b/chimerarealigner.cpp @@ -310,7 +310,7 @@ string ChimeraReAligner::getNewAlignment(string query){ for(int i=alignmentLength-1;i>=0;i--){ flipSeq += queryAlignment[i]; } - + return flipSeq; } catch(exception& e) { diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 73c61de..826f38b 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -552,7 +552,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS)) { leftChimeric = true; } } if (rightChimeric || leftChimeric) { - cout << querySeq->getName() << "\tyes" << endl; +// cout << querySeq->getName() << "\tyes" << endl; outAccString += querySeq->getName() + "\n"; results = true; @@ -827,8 +827,14 @@ int ChimeraSlayer::getChimeras(Sequence* query) { } //put seqs into vector to send to slayer + +// cout << query->getAligned() << endl; vector seqsForSlayer; - for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } + for (int k = 0; k < seqs.size(); k++) { +// cout << seqs[k].seq->getAligned() << endl; + seqsForSlayer.push_back(seqs[k].seq); + + } if (m->control_pressed) { for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } return 0; } @@ -1086,6 +1092,11 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db } } + + +// for(int i=0;igetName() << endl; +// } delete queryRight; delete queryLeft; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 97edf11..7dfebb0 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -25,7 +25,7 @@ vector ChimeraSlayerCommand::setParameters(){ CommandParameter pmincov("mincov", "Number", "", "70", "", "", "",false,false); parameters.push_back(pmincov); CommandParameter pminsnp("minsnp", "Number", "", "10", "", "", "",false,false); parameters.push_back(pminsnp); CommandParameter pminbs("minbs", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminbs); - CommandParameter psearch("search", "Multiple", "kmer-blast-distance", "distance", "", "", "",false,false); parameters.push_back(psearch); + CommandParameter psearch("search", "Multiple", "kmer-blast-distance", "blast", "", "", "",false,false); parameters.push_back(psearch); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); CommandParameter prealign("realign", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(prealign); CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptrim); diff --git a/maligner.cpp b/maligner.cpp index 5ffba32..d7731fa 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -97,9 +97,11 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { int traceStart = path[0].col; int traceEnd = path[path.size()-1].col; string queryInRange = query->getAligned(); - queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart)); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); string chimeraSeq = constructChimericSeq(trace, refSeqs); +// cout << queryInRange.length() << endl; +// cout << chimeraSeq.length() << endl; percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); @@ -137,20 +139,20 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { temp.regionEnd = regionEnd; string parentInRange = refSeqs[seqIndex]->getAligned(); - parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart)); + parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1)); temp.queryToParent = computePercentID(queryInRange, parentInRange); temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); string queryInRegion = query->getAligned(); - queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart)); + queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); string parentInRegion = refSeqs[seqIndex]->getAligned(); - parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart)); + parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1)); temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); - //cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " << temp.divR << endl; +// cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " << temp.divR << endl; outputResults.push_back(temp); } @@ -652,8 +654,9 @@ string Maligner::constructChimericSeq(vector trace, vector refSeqs) { selectedDivs.push_back(divs[k]); } } - + //save selected for (int mi = 0; mi < selectedDivs.size(); mi++) { all.push_back(selectedDivs[mi]); } @@ -90,6 +90,7 @@ string Slayer::getResults(Sequence* query, vector refSeqs) { } } + // compute bootstrap support if (all.size() > 0) { //sort them @@ -144,16 +145,16 @@ vector Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* int breakpoint = i; int leftLength = breakpoint + 1; int rightLength = length - leftLength; - + float QLA = computePercentID(query, parentA, 0, breakpoint); - float QRB = computePercentID(query, parentB, breakpoint+1, length - 1); + float QRB = computePercentID(query, parentB, breakpoint+1, length); float QLB = computePercentID(query, parentB, 0, breakpoint); - float QRA = computePercentID(query, parentA, breakpoint+1, length - 1); + float QRA = computePercentID(query, parentA, breakpoint+1, length); float LAB = computePercentID(parentA, parentB, 0, breakpoint); - float RAB = computePercentID(parentA, parentB, breakpoint+1, length - 1); - + float RAB = computePercentID(parentA, parentB, breakpoint+1, length); + float AB = ((LAB*leftLength) + (RAB*rightLength)) / (float) length; float QA = ((QLA*leftLength) + (QRA*rightLength)) / (float) length; float QB = ((QLB*leftLength) + (QRB*rightLength)) / (float) length; @@ -166,6 +167,7 @@ vector Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB)); float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB)); + //cout << q->getName() << '\t'; //cout << pA->getName() << '\t'; //cout << pB->getName() << '\t'; @@ -200,7 +202,7 @@ vector Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* member.winLEnd = breakpoint; member.winRStart = breakpoint+1; member.winREnd = length-1; - member.querySeq = *(q); + member.querySeq = *(q); member.parentA = *(pA); member.parentB = *(pB); member.bsa = 0; @@ -227,7 +229,7 @@ vector Slayer::getSNPS(string parentA, string query, string parentB, int l try { vector data; -//cout << left << '\t' << right << endl; + for (int i = left; i <= right; i++) { char A = parentA[i]; @@ -235,33 +237,31 @@ vector Slayer::getSNPS(string parentA, string query, string parentB, int l char B = parentB[i]; if ((A != Q) || (B != Q)) { -//cout << "not equal " << Q << '\t' << A << '\t' << B << endl; - + //ensure not neighboring a gap. change to 12/09 release of chimeraSlayer - not sure what this adds, but it eliminates alot of SNPS + + if ( //did query loose a base here during filter?? ( i == 0 || abs (baseSpots[0][i] - baseSpots[0][i-1]) == 1) && - ( i == query.length() || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1) + ( i == query.length()-1 || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1) && //did parentA loose a base here during filter?? ( i == 0 || abs (baseSpots[1][i] - baseSpots[1][i-1]) == 1) && - ( i == parentA.length() || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) + ( i == parentA.length()-1 || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) && //did parentB loose a base here during filter?? ( i == 0 || abs (baseSpots[2][i] - baseSpots[2][i-1]) == 1) && - ( i == parentB.length() || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1) + ( i == parentB.length()-1 || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1) ) { - snps member; member.queryChar = Q; member.parentAChar = A; member.parentBChar = B; -//cout << "not neighboring a gap " << Q << '\t' << A << '\t' << B << '\t' << baseSpots[0][i] << '\t' << baseSpots[0][i+1] << '\t' << baseSpots[0][i-1] << '\t' << baseSpots[1][i] << '\t' << baseSpots[1][i+1] << '\t' << baseSpots[1][i-1] << '\t' << baseSpots[2][i] << '\t' << baseSpots[2][i+1] << '\t' << baseSpots[2][i-1] << endl; data.push_back(member); } } -// cout << i << '\t' << data.size() << endl; } return data; @@ -283,7 +283,7 @@ int Slayer::bootstrapSNPS(vector left, vector right, float& BSA, flo int numLeft = max(1, int(left.size() * percentSNPSample/(float)100 + 0.5)); int numRight = max(1, int(right.size() * percentSNPSample/(float)100 + 0.5)); - //cout << numLeft << '\t' << numRight << endl; + for (int i = 0; i < numIters; i++) { //random sampling with replacement. @@ -451,6 +451,7 @@ float Slayer::computePercentID(string queryAlign, string chimera, int left, int } } } + } float numBases = (countA + countB) /(float) 2; @@ -458,7 +459,7 @@ float Slayer::computePercentID(string queryAlign, string chimera, int left, int if (numBases == 0) { return 0; } float percentIdentical = (numIdentical/(float)numBases) * 100; - + return percentIdentical; } -- 2.39.2