X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=maligner.cpp;h=3d1d1c9fa5ac995a65265b62ed4c59f25ed6679d;hp=f06884fcad747a84e8c3d8ae673a5eabf839f2ec;hb=615301e57c25e241356a9c2380648d117709458d;hpb=aa9238c0a9e6e7aa0ed8b8b606b08ad4fd7dcfe3 diff --git a/maligner.cpp b/maligner.cpp index f06884f..3d1d1c9 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -8,53 +8,50 @@ */ #include "maligner.h" -#include "kmerdb.hpp" -#include "blastdb.hpp" +/***********************************************************************/ //int num, int match, int misMatch, , string mode, Database* dataLeft, Database* dataRight +Maligner::Maligner(vector temp, int match, int misMatch, float div, int ms, int minCov) : db(temp), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov) { + //numWanted(num), , searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) + + m = MothurOut::getInstance(); + +} /***********************************************************************/ -Maligner::Maligner(vector temp, int num, int match, int misMatch, float div, int ms, int minCov, string mode, Database* dataLeft, Database* dataRight) : - db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov), searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) { m = MothurOut::getInstance(); } -/***********************************************************************/ -string Maligner::getResults(Sequence* q, DeCalculator* decalc) { +string Maligner::getResults(Sequence q, DeCalculator decalc) { try { outputResults.clear(); //make copy so trimming doesn't destroy query from calling class - remember to deallocate - query = new Sequence(q->getName(), q->getAligned()); + query.setName(q.getName()); query.setAligned(q.getAligned()); string chimera; - if (searchMethod == "distance") { - //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate - refSeqs = decalc->findClosest(query, db, numWanted, indexes); - }else if (searchMethod == "blast") { - refSeqs = getBlastSeqs(query, numWanted); //fills indexes - }else if (searchMethod == "kmer") { - refSeqs = getKmerSeqs(query, numWanted); //fills indexes - }else { m->mothurOut("not valid search."); exit(1); } //should never get here - - if (m->control_pressed) { return chimera; } + //copy refSeqs so that filter does not effect original + for(int i = 0; i < db.size(); i++) { + Sequence newSeq(db[i].getName(), db[i].getAligned()); + refSeqs.push_back(newSeq); + } refSeqs = minCoverageFilter(refSeqs); - + if (refSeqs.size() < 2) { - for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } percentIdenticalQueryChimera = 0.0; return "unknown"; } int chimeraPenalty = computeChimeraPenalty(); - + //fills outputResults chimera = chimeraMaligner(chimeraPenalty, decalc); if (m->control_pressed) { return chimera; } //free memory - delete query; + //delete query; - for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } return chimera; } @@ -64,44 +61,70 @@ string Maligner::getResults(Sequence* q, DeCalculator* decalc) { } } /***********************************************************************/ -string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { +string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator decalc) { try { string chimera; - //trims seqs to first non gap char in all seqs and last non gap char in all seqs - spotMap = decalc->trimSeqs(query, refSeqs); + spotMap = decalc.trimSeqs(query, refSeqs); - vector temp = refSeqs; + //you trimmed the whole sequence, skip + if (query.getAligned() == "") { return "no"; } + + vector temp = refSeqs; temp.push_back(query); + + temp = verticalFilter(temp); + query = temp[temp.size()-1]; + for (int i = 0; i < temp.size()-1; i++) { refSeqs[i] = temp[i]; } - verticalFilter(temp); -//for (int i = 0; i < temp.size(); i++) { cout << temp[i]->getName() << '\n' << temp[i]->getAligned() << endl; } return "no"; + //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl ; }//<< refSeqs[i]->getAligned() << endl - vector< vector > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes + vector< vector > matrix = buildScoreMatrix(query.getAligned().length(), refSeqs.size()); //builds and initializes if (m->control_pressed) { return chimera; } fillScoreMatrix(matrix, refSeqs, chimeraPenalty); - + vector path = extractHighestPath(matrix); if (m->control_pressed) { return chimera; } - vector trace = mapTraceRegionsToAlignment(path, refSeqs); - + vector trace = mapTraceRegionsToAlignment(path); + if (trace.size() > 1) { chimera = "yes"; } - else { chimera = "no"; } + else { chimera = "no"; return chimera; } int traceStart = path[0].col; int traceEnd = path[path.size()-1].col; - string queryInRange = query->getAligned(); + string queryInRange = query.getAligned(); queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); - +// cout << queryInRange << endl; string chimeraSeq = constructChimericSeq(trace, refSeqs); - +// cout << chimeraSeq << endl; + +// cout << queryInRange.length() << endl; +// cout << chimeraSeq.length() << endl; + percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); +// cout << percentIdenticalQueryChimera << endl; + /* + vector trace = extractHighestPath(matrix); + + //cout << "traces\n"; + //for(int i=0;igetName() << endl; + //} + + if (trace.size() > 1) { chimera = "yes"; } + else { chimera = "no"; return chimera; } + + int traceStart = trace[0].col; + int traceEnd = trace[trace.size()-1].oldCol; + string queryInRange = query->getAligned(); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart));*/ + if (m->control_pressed) { return chimera; } //save output results @@ -110,32 +133,35 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { int regionEnd = trace[i].oldCol; int seqIndex = trace[i].row; +// cout << regionStart << '\t' << regionEnd << '\t' << seqIndex << endl; results temp; - temp.parent = refSeqs[seqIndex]->getName(); - temp.parentAligned = db[indexes[seqIndex]]->getAligned(); + temp.parent = refSeqs[seqIndex].getName(); + temp.parentAligned = db[seqIndex].getAligned(); temp.nastRegionStart = spotMap[regionStart]; temp.nastRegionEnd = spotMap[regionEnd]; - temp.regionStart = regionStart; - temp.regionEnd = regionEnd; + temp.regionStart = unalignedMap[regionStart]; + temp.regionEnd = unalignedMap[regionEnd]; - string parentInRange = refSeqs[seqIndex]->getAligned(); + string parentInRange = refSeqs[seqIndex].getAligned(); parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1)); temp.queryToParent = computePercentID(queryInRange, parentInRange); temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); - - string queryInRegion = query->getAligned(); + + string queryInRegion = query.getAligned(); queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); - string parentInRegion = refSeqs[seqIndex]->getAligned(); + string parentInRegion = refSeqs[seqIndex].getAligned(); parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1)); temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); - + + //cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " << temp.divR << endl; + outputResults.push_back(temp); } - + return chimera; } catch(exception& e) { @@ -145,15 +171,15 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { } /***********************************************************************/ //removes top matches that do not have minimum coverage with query. -vector Maligner::minCoverageFilter(vector ref){ +vector Maligner::minCoverageFilter(vector ref){ try { - vector newRefs; + vector newRefs; - string queryAligned = query->getAligned(); + string queryAligned = query.getAligned(); for (int i = 0; i < ref.size(); i++) { - string refAligned = ref[i]->getAligned(); + string refAligned = ref[i].getAligned(); int numBases = 0; int numCovered = 0; @@ -175,7 +201,9 @@ vector Maligner::minCoverageFilter(vector ref){ //if coverage above minimum if (coverage > minCoverage) { newRefs.push_back(ref[i]); - } + }//else { + //delete ref[i]; + //} } return newRefs; @@ -190,10 +218,12 @@ vector Maligner::minCoverageFilter(vector ref){ int Maligner::computeChimeraPenalty() { try { - int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases()); - + int numAllowable = ((1.0 - (1.0/minDivR)) * query.getNumBases()); + +// if(numAllowable < 1){ numAllowable = 1; } + int penalty = int(numAllowable + 1) * misMatchPenalty; - + return penalty; } @@ -204,16 +234,16 @@ int Maligner::computeChimeraPenalty() { } /***********************************************************************/ //this is a vertical filter -void Maligner::verticalFilter(vector seqs) { +vector Maligner::verticalFilter(vector seqs) { try { - vector gaps; gaps.resize(query->getAligned().length(), 0); + vector gaps; gaps.resize(query.getAligned().length(), 0); - string filterString = (string(query->getAligned().length(), '1')); + string filterString = (string(query.getAligned().length(), '1')); //for each sequence for (int i = 0; i < seqs.size(); i++) { - string seqAligned = seqs[i]->getAligned(); + string seqAligned = seqs[i].getAligned(); for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap @@ -223,7 +253,7 @@ void Maligner::verticalFilter(vector seqs) { //zero out spot where all sequences have blanks int numColRemoved = 0; - for(int i = 0; i < seqs[0]->getAligned().length(); i++){ + for(int i = 0; i < seqs[0].getAligned().length(); i++){ if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } } @@ -231,7 +261,7 @@ void Maligner::verticalFilter(vector seqs) { //for each sequence for (int i = 0; i < seqs.size(); i++) { - string seqAligned = seqs[i]->getAligned(); + string seqAligned = seqs[i].getAligned(); string newAligned = ""; int count = 0; @@ -244,10 +274,27 @@ void Maligner::verticalFilter(vector seqs) { } } - seqs[i]->setAligned(newAligned); + seqs[i].setAligned(newAligned); } - + + string query = seqs[seqs.size()-1].getAligned(); + int queryLength = query.length(); + + unalignedMap.resize(queryLength, 0); + + + for(int i=1;ierrorOut(e, "Maligner", "verticalFilter"); @@ -258,9 +305,9 @@ void Maligner::verticalFilter(vector seqs) { vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { try{ - vector< vector > m; m.resize(rows); + vector< vector > m(rows); - for (int i = 0; i < m.size(); i++) { + for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { //initialize each cell @@ -281,35 +328,43 @@ vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { exit(1); } } + //*************************************************************************************************************** -void Maligner::fillScoreMatrix(vector >& ms, vector seqs, int penalty) { + +void Maligner::fillScoreMatrix(vector >& ms, vector seqs, int penalty) { try{ //get matrix dimensions - int numCols = query->getAligned().length(); + int numCols = query.getAligned().length(); int numRows = seqs.size(); +// cout << numRows << endl; + //initialize first col - string queryAligned = query->getAligned(); + string queryAligned = query.getAligned(); for (int i = 0; i < numRows; i++) { - string subjectAligned = seqs[i]->getAligned(); + string subjectAligned = seqs[i].getAligned(); //are you both gaps? if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) { ms[i][0].score = 0; - }else if (queryAligned[0] == subjectAligned[0]) { +// ms[i][0].mismatches = 0; + }else if (queryAligned[0] == subjectAligned[0]) { //|| subjectAligned[0] == 'N') ms[i][0].score = matchScore; +// ms[i][0].mismatches = 0; }else{ ms[i][0].score = 0; +// ms[i][0].mismatches = 1; } } //fill rest of matrix for (int j = 1; j < numCols; j++) { //iterate through matrix columns +// for (int i = 0; i < 1; i++) { //iterate through matrix rows for (int i = 0; i < numRows; i++) { //iterate through matrix rows - string subjectAligned = seqs[i]->getAligned(); + string subjectAligned = seqs[i].getAligned(); int matchMisMatchScore = 0; //are you both gaps? @@ -322,7 +377,7 @@ void Maligner::fillScoreMatrix(vector >& ms, vector >& ms, vectorgetName(); +// for(int j=0;jgetName(); +// for(int j=0;jerrorOut(e, "Maligner", "fillScoreMatrix"); @@ -349,12 +430,12 @@ void Maligner::fillScoreMatrix(vector >& ms, vector Maligner::extractHighestPath(vector > ms) { try { - + //get matrix dimensions - int numCols = query->getAligned().length(); + int numCols = query.getAligned().length(); int numRows = ms.size(); - - + + //find highest score scoring matrix score_struct highestStruct; int highestScore = 0; @@ -367,12 +448,14 @@ vector Maligner::extractHighestPath(vector > } } } - + vector path; int rowIndex = highestStruct.row; int pos = highestStruct.col; int score = highestStruct.score; + +// cout << rowIndex << '\t' << pos << '\t' << score << endl; while (pos >= 0 && score > 0) { score_struct temp = ms[rowIndex][pos]; @@ -383,9 +466,9 @@ vector Maligner::extractHighestPath(vector > rowIndex = temp.prev; pos--; } - + reverse(path.begin(), path.end()); - + return path; } @@ -395,6 +478,142 @@ vector Maligner::extractHighestPath(vector > } } //*************************************************************************************************************** +vector Maligner::mapTraceRegionsToAlignment(vector path) { + try { + vector trace; + + int region_index = path[0].row; + int region_start = path[0].col; + + for (int i = 1; i < path.size(); i++) { + + int next_region_index = path[i].row; + //cout << i << '\t' << next_region_index << endl; + + if (next_region_index != region_index) { + + // add trace region + int col_index = path[i].col; + trace_struct temp; + temp.col = region_start; + temp.oldCol = col_index-1; + temp.row = region_index; + + trace.push_back(temp); + + region_index = path[i].row; + region_start = col_index; + } + } + + // get last one + trace_struct temp; + temp.col = region_start; + temp.oldCol = path[path.size()-1].col; + temp.row = region_index; + trace.push_back(temp); + + return trace; + + } + catch(exception& e) { + m->errorOut(e, "Maligner", "mapTraceRegionsToAlignment"); + exit(1); + } +} + +/*************************************************************************************************************** + +vector Maligner::extractHighestPath(vector > ms) { + try { + + + //get matrix dimensions + int numCols = query->getAligned().length(); + int numRows = ms.size(); + + + //find highest score scoring matrix + vector highestStruct; + int highestScore = 0; + + for (int i = 0; i < numRows; i++) { + for (int j = 0; j < numCols; j++) { + if (ms[i][j].score > highestScore) { + highestScore = ms[i][j].score; + highestStruct.resize(0); + highestStruct.push_back(ms[i][j]); + } + else if(ms[i][j].score == highestScore){ + highestStruct.push_back(ms[i][j]); + } + } + } + + //cout << endl << highestScore << '\t' << highestStruct.size() << '\t' << highestStruct[0].row << endl; + + vector maxTrace; + double maxPercentIdenticalQueryAntiChimera = 0; + + for(int i=0;i path; + + int rowIndex = highestStruct[i].row; + int pos = highestStruct[i].col; + int score = highestStruct[i].score; + + while (pos >= 0 && score > 0) { + score_struct temp = ms[rowIndex][pos]; + score = temp.score; + + if (score > 0) { path.push_back(temp); } + + rowIndex = temp.prev; + pos--; + } + + reverse(path.begin(), path.end()); + + vector trace = mapTraceRegionsToAlignment(path, refSeqs); + + //cout << "traces\n"; + //for(int j=0;jgetName() << endl; + //} + + int traceStart = path[0].col; + int traceEnd = path[path.size()-1].col; +// cout << "traceStart/End\t" << traceStart << '\t' << traceEnd << endl; + + string queryInRange = query->getAligned(); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); +// cout << "here" << endl; + string chimeraSeq = constructChimericSeq(trace, refSeqs); + string antiChimeraSeq = constructAntiChimericSeq(trace, refSeqs); + + percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); + double percentIdenticalQueryAntiChimera = computePercentID(queryInRange, antiChimeraSeq); +// cout << i << '\t' << percentIdenticalQueryChimera << '\t' << percentIdenticalQueryAntiChimera << endl; + + if(percentIdenticalQueryAntiChimera > maxPercentIdenticalQueryAntiChimera){ + maxPercentIdenticalQueryAntiChimera = percentIdenticalQueryAntiChimera; + maxTrace = trace; + } + } +// cout << maxPercentIdenticalQueryAntiChimera << endl; + return maxTrace; + + + } + catch(exception& e) { + m->errorOut(e, "Maligner", "extractHighestPath"); + exit(1); + } +} + +*************************************************************************************************************** + vector Maligner::mapTraceRegionsToAlignment(vector path, vector seqs) { try { vector trace; @@ -429,6 +648,13 @@ vector Maligner::mapTraceRegionsToAlignment(vector p temp.row = region_index; trace.push_back(temp); +// cout << endl; +// cout << trace.size() << endl; +// for(int i=0;igetName() << endl; +// } +// cout << endl; + return trace; } @@ -437,17 +663,23 @@ vector Maligner::mapTraceRegionsToAlignment(vector p exit(1); } } +*/ //*************************************************************************************************************** -string Maligner::constructChimericSeq(vector trace, vector seqs) { + +string Maligner::constructChimericSeq(vector trace, vector seqs) { try { string chimera = ""; for (int i = 0; i < trace.size(); i++) { - string seqAlign = seqs[trace[i].row]->getAligned(); +// cout << i << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl; + + string seqAlign = seqs[trace[i].row].getAligned(); seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); chimera += seqAlign; } - +// cout << chimera << endl; +// if (chimera != "") { chimera = chimera.substr(0, (chimera.length()-1)); } //this was introducing a fence post error +// cout << chimera << endl; return chimera; } catch(exception& e) { @@ -455,179 +687,89 @@ string Maligner::constructChimericSeq(vector trace, vector trace, vector seqs) { + try { + string antiChimera = ""; + + for (int i = 0; i < trace.size(); i++) { +// cout << i << '\t' << (trace.size() - i - 1) << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl; + + int oppositeIndex = trace.size() - i - 1; + + string seqAlign = seqs[trace[oppositeIndex].row].getAligned(); + seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); + antiChimera += seqAlign; + } + + return antiChimera; + } + catch(exception& e) { + m->errorOut(e, "Maligner", "constructChimericSeq"); + exit(1); + } +} + //*************************************************************************************************************** float Maligner::computePercentID(string queryAlign, string chimera) { try { if (queryAlign.length() != chimera.length()) { m->mothurOut("Error, alignment strings are of different lengths: "); m->mothurOutEndLine(); - m->mothurOut(toString(queryAlign.length())); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOutEndLine(); + m->mothurOut(toString(queryAlign.length())); m->mothurOutEndLine(); m->mothurOut(toString(chimera.length())); m->mothurOutEndLine(); return -1.0; } - - int numBases = 0; +// cout << queryAlign.length() << endl; int numIdentical = 0; - + int countA = 0; + int countB = 0; for (int i = 0; i < queryAlign.length(); i++) { - if ((isalpha(queryAlign[i])) || (isalpha(chimera[i]))) { - numBases++; - if (queryAlign[i] == chimera[i]) { - numIdentical++; + if (((queryAlign[i] != 'G') && (queryAlign[i] != 'T') && (queryAlign[i] != 'A') && (queryAlign[i] != 'C')&& (queryAlign[i] != '.') && (queryAlign[i] != '-')) || + ((chimera[i] != 'G') && (chimera[i] != 'T') && (chimera[i] != 'A') && (chimera[i] != 'C')&& (chimera[i] != '.') && (chimera[i] != '-'))) {} + else { + + bool charA = false; bool charB = false; + if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; } + if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; } + + + if (charA || charB) { + + if (charA) { countA++; } + if (charB) { countB++; } + + if (queryAlign[i] == chimera[i]) { + numIdentical++; + } } +// cout << queryAlign[i] << '\t' << chimera[i] << '\t' << countA << '\t' << countB << endl; + } } - - if (numBases == 0) { return 0; } - - float percentIdentical = (numIdentical/(float)numBases) * 100; - - return percentIdentical; - } - catch(exception& e) { - m->errorOut(e, "Maligner", "computePercentID"); - exit(1); - } -} -//*************************************************************************************************************** -vector Maligner::getBlastSeqs(Sequence* q, int num) { - try { - indexes.clear(); - vector refResults; - //generate blastdb - Database* database = new BlastDB(-2.0, -1.0, matchScore, misMatchPenalty); - for (int i = 0; i < db.size(); i++) { database->addSequence(*db[i]); } - database->generateDB(); - database->setNumSeqs(db.size()); - - //get parts of query - string queryUnAligned = q->getUnaligned(); - string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence - string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence - - Sequence* queryLeft = new Sequence(q->getName(), leftQuery); - Sequence* queryRight = new Sequence(q->getName(), rightQuery); - - vector tempIndexesRight = database->findClosestMegaBlast(queryRight, num+1); - vector tempIndexesLeft = database->findClosestMegaBlast(queryLeft, num+1); - - //if ((tempIndexesRight.size() != (num+1)) || (tempIndexesLeft.size() != (num+1))) { m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to porcess sequence " + q->getName()); m->mothurOutEndLine(); return refResults; } - - vector smaller; - vector larger; - - if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; larger = tempIndexesLeft; } - else { smaller = tempIndexesLeft; larger = tempIndexesRight; } - - //merge results - map seen; - map::iterator it; - - vector mergedResults; - for (int i = 0; i < smaller.size(); i++) { - //add left if you havent already - it = seen.find(smaller[i]); - if (it == seen.end()) { - mergedResults.push_back(smaller[i]); - seen[smaller[i]] = smaller[i]; - } +// cout << "pat\t" << countA << '\t' << countB << '\t' << numIdentical << endl; - //add right if you havent already - it = seen.find(larger[i]); - if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; - } - } - for (int i = smaller.size(); i < larger.size(); i++) { - it = seen.find(larger[i]); - if (it == seen.end()) { - mergedResults.push_back(larger[i]); - seen[larger[i]] = larger[i]; - } - } + float numBases = (countA + countB) /(float) 2; - if (mergedResults.size() < numWanted) { numWanted = mergedResults.size(); } -//cout << q->getName() << endl; - for (int i = 0; i < numWanted; i++) { -//cout << db[mergedResults[i]]->getName() << endl; - if (db[mergedResults[i]]->getName() != q->getName()) { - Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); - refResults.push_back(temp); - indexes.push_back(mergedResults[i]); - } -//cout << mergedResults[i] << endl; - } -//cout << endl; - delete queryRight; - delete queryLeft; - delete database; + if (numBases == 0) { return 0; } + +// cout << numIdentical << '\t' << numBases << endl; - return refResults; - } - catch(exception& e) { - m->errorOut(e, "Maligner", "getBlastSeqs"); - exit(1); - } -} -//*************************************************************************************************************** -vector Maligner::getKmerSeqs(Sequence* q, int num) { - try { - indexes.clear(); - - //get parts of query - string queryUnAligned = q->getUnaligned(); - string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence - string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence - - Sequence* queryLeft = new Sequence(q->getName(), leftQuery); - Sequence* queryRight = new Sequence(q->getName(), rightQuery); - - vector tempIndexesLeft = databaseLeft->findClosestSequences(queryLeft, numWanted); - vector tempIndexesRight = databaseRight->findClosestSequences(queryRight, numWanted); - - //merge results - map seen; - map::iterator it; - - vector mergedResults; - for (int i = 0; i < tempIndexesLeft.size(); i++) { - //add left if you havent already - it = seen.find(tempIndexesLeft[i]); - if (it == seen.end()) { - mergedResults.push_back(tempIndexesLeft[i]); - seen[tempIndexesLeft[i]] = tempIndexesLeft[i]; - } - - //add right if you havent already - it = seen.find(tempIndexesRight[i]); - if (it == seen.end()) { - mergedResults.push_back(tempIndexesRight[i]); - seen[tempIndexesRight[i]] = tempIndexesRight[i]; - } - } + float percentIdentical = (numIdentical/(float)numBases) * 100; -//cout << q->getName() << endl; - vector refResults; - for (int i = 0; i < numWanted; i++) { -//cout << db[mergedResults[i]]->getName() << endl; - Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); - refResults.push_back(temp); - indexes.push_back(mergedResults[i]); - } -//cout << endl; - delete queryRight; - delete queryLeft; +// cout << percentIdentical << endl; + + return percentIdentical; - return refResults; } catch(exception& e) { - m->errorOut(e, "Maligner", "getBlastSeqs"); + m->errorOut(e, "Maligner", "computePercentID"); exit(1); } } //*************************************************************************************************************** -