X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=maligner.cpp;h=3d1d1c9fa5ac995a65265b62ed4c59f25ed6679d;hp=f834db1db319badab0ddb70cde9e54c7dbb3193f;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=1cf188b912d6da8f2cd03dd71cecef664a699c1a diff --git a/maligner.cpp b/maligner.cpp index f834db1..3d1d1c9 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -10,48 +10,48 @@ #include "maligner.h" /***********************************************************************/ //int num, int match, int misMatch, , string mode, Database* dataLeft, Database* dataRight -Maligner::Maligner(vector temp, int match, int misMatch, float div, int ms, int minCov) : db(temp), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov) { +Maligner::Maligner(vector temp, int match, int misMatch, float div, int ms, int minCov) : db(temp), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov) { //numWanted(num), , searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight) m = MothurOut::getInstance(); } /***********************************************************************/ -string Maligner::getResults(Sequence* q, DeCalculator* decalc) { +string Maligner::getResults(Sequence q, DeCalculator decalc) { try { outputResults.clear(); //make copy so trimming doesn't destroy query from calling class - remember to deallocate - query = new Sequence(q->getName(), q->getAligned()); + query.setName(q.getName()); query.setAligned(q.getAligned()); string chimera; //copy refSeqs so that filter does not effect original for(int i = 0; i < db.size(); i++) { - Sequence* newSeq = new Sequence(db[i]->getName(), db[i]->getAligned()); + Sequence newSeq(db[i].getName(), db[i].getAligned()); refSeqs.push_back(newSeq); } refSeqs = minCoverageFilter(refSeqs); if (refSeqs.size() < 2) { - for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } percentIdenticalQueryChimera = 0.0; return "unknown"; } int chimeraPenalty = computeChimeraPenalty(); - + //fills outputResults chimera = chimeraMaligner(chimeraPenalty, decalc); if (m->control_pressed) { return chimera; } //free memory - delete query; + //delete query; - for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } + //for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; } return chimera; } @@ -61,25 +61,26 @@ string Maligner::getResults(Sequence* q, DeCalculator* decalc) { } } /***********************************************************************/ -string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { +string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator decalc) { try { string chimera; - //trims seqs to first non gap char in all seqs and last non gap char in all seqs - spotMap = decalc->trimSeqs(query, refSeqs); - + spotMap = decalc.trimSeqs(query, refSeqs); + //you trimmed the whole sequence, skip - if (query->getAligned() == "") { return "no"; } + if (query.getAligned() == "") { return "no"; } - vector temp = refSeqs; + vector temp = refSeqs; temp.push_back(query); - verticalFilter(temp); + temp = verticalFilter(temp); + query = temp[temp.size()-1]; + for (int i = 0; i < temp.size()-1; i++) { refSeqs[i] = temp[i]; } - //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl << refSeqs[i]->getAligned() << endl; } + //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl ; }//<< refSeqs[i]->getAligned() << endl - vector< vector > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes + vector< vector > matrix = buildScoreMatrix(query.getAligned().length(), refSeqs.size()); //builds and initializes if (m->control_pressed) { return chimera; } @@ -89,20 +90,25 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { if (m->control_pressed) { return chimera; } - vector trace = mapTraceRegionsToAlignment(path, refSeqs); - + vector trace = mapTraceRegionsToAlignment(path); + if (trace.size() > 1) { chimera = "yes"; } else { chimera = "no"; return chimera; } int traceStart = path[0].col; int traceEnd = path[path.size()-1].col; - string queryInRange = query->getAligned(); - queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart)); - + string queryInRange = query.getAligned(); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); +// cout << queryInRange << endl; string chimeraSeq = constructChimericSeq(trace, refSeqs); +// cout << chimeraSeq << endl; + +// cout << queryInRange.length() << endl; +// cout << chimeraSeq.length() << endl; percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq); +// cout << percentIdenticalQueryChimera << endl; /* vector trace = extractHighestPath(matrix); @@ -127,30 +133,31 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { int regionEnd = trace[i].oldCol; int seqIndex = trace[i].row; +// cout << regionStart << '\t' << regionEnd << '\t' << seqIndex << endl; results temp; - temp.parent = refSeqs[seqIndex]->getName(); - temp.parentAligned = db[seqIndex]->getAligned(); + temp.parent = refSeqs[seqIndex].getName(); + temp.parentAligned = db[seqIndex].getAligned(); temp.nastRegionStart = spotMap[regionStart]; temp.nastRegionEnd = spotMap[regionEnd]; - temp.regionStart = regionStart; - temp.regionEnd = regionEnd; + temp.regionStart = unalignedMap[regionStart]; + temp.regionEnd = unalignedMap[regionEnd]; - string parentInRange = refSeqs[seqIndex]->getAligned(); - parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart)); + string parentInRange = refSeqs[seqIndex].getAligned(); + parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1)); temp.queryToParent = computePercentID(queryInRange, parentInRange); temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); - string queryInRegion = query->getAligned(); - queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart)); + string queryInRegion = query.getAligned(); + queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); - string parentInRegion = refSeqs[seqIndex]->getAligned(); - parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart)); + string parentInRegion = refSeqs[seqIndex].getAligned(); + parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1)); temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); -// cout << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << endl; + //cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " << temp.divR << endl; outputResults.push_back(temp); } @@ -164,15 +171,15 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { } /***********************************************************************/ //removes top matches that do not have minimum coverage with query. -vector Maligner::minCoverageFilter(vector ref){ +vector Maligner::minCoverageFilter(vector ref){ try { - vector newRefs; + vector newRefs; - string queryAligned = query->getAligned(); + string queryAligned = query.getAligned(); for (int i = 0; i < ref.size(); i++) { - string refAligned = ref[i]->getAligned(); + string refAligned = ref[i].getAligned(); int numBases = 0; int numCovered = 0; @@ -194,9 +201,9 @@ vector Maligner::minCoverageFilter(vector ref){ //if coverage above minimum if (coverage > minCoverage) { newRefs.push_back(ref[i]); - }else { - delete ref[i]; - } + }//else { + //delete ref[i]; + //} } return newRefs; @@ -211,7 +218,7 @@ vector Maligner::minCoverageFilter(vector ref){ int Maligner::computeChimeraPenalty() { try { - int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases()); + int numAllowable = ((1.0 - (1.0/minDivR)) * query.getNumBases()); // if(numAllowable < 1){ numAllowable = 1; } @@ -227,16 +234,16 @@ int Maligner::computeChimeraPenalty() { } /***********************************************************************/ //this is a vertical filter -void Maligner::verticalFilter(vector seqs) { +vector Maligner::verticalFilter(vector seqs) { try { - vector gaps; gaps.resize(query->getAligned().length(), 0); + vector gaps; gaps.resize(query.getAligned().length(), 0); - string filterString = (string(query->getAligned().length(), '1')); + string filterString = (string(query.getAligned().length(), '1')); //for each sequence for (int i = 0; i < seqs.size(); i++) { - string seqAligned = seqs[i]->getAligned(); + string seqAligned = seqs[i].getAligned(); for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap @@ -246,7 +253,7 @@ void Maligner::verticalFilter(vector seqs) { //zero out spot where all sequences have blanks int numColRemoved = 0; - for(int i = 0; i < seqs[0]->getAligned().length(); i++){ + for(int i = 0; i < seqs[0].getAligned().length(); i++){ if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } } @@ -254,7 +261,7 @@ void Maligner::verticalFilter(vector seqs) { //for each sequence for (int i = 0; i < seqs.size(); i++) { - string seqAligned = seqs[i]->getAligned(); + string seqAligned = seqs[i].getAligned(); string newAligned = ""; int count = 0; @@ -267,10 +274,27 @@ void Maligner::verticalFilter(vector seqs) { } } - seqs[i]->setAligned(newAligned); + seqs[i].setAligned(newAligned); } - + + string query = seqs[seqs.size()-1].getAligned(); + int queryLength = query.length(); + + unalignedMap.resize(queryLength, 0); + + + for(int i=1;ierrorOut(e, "Maligner", "verticalFilter"); @@ -307,17 +331,19 @@ vector< vector > Maligner::buildScoreMatrix(int cols, int rows) { //*************************************************************************************************************** -void Maligner::fillScoreMatrix(vector >& ms, vector seqs, int penalty) { +void Maligner::fillScoreMatrix(vector >& ms, vector seqs, int penalty) { try{ //get matrix dimensions - int numCols = query->getAligned().length(); + int numCols = query.getAligned().length(); int numRows = seqs.size(); +// cout << numRows << endl; + //initialize first col - string queryAligned = query->getAligned(); + string queryAligned = query.getAligned(); for (int i = 0; i < numRows; i++) { - string subjectAligned = seqs[i]->getAligned(); + string subjectAligned = seqs[i].getAligned(); //are you both gaps? if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) { @@ -335,25 +361,23 @@ void Maligner::fillScoreMatrix(vector >& ms, vectorgetAligned(); + string subjectAligned = seqs[i].getAligned(); int matchMisMatchScore = 0; //are you both gaps? if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) { //leave the same }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) { - //matchMisMatchScore = matchScore; //leave the same }else if (queryAligned[j] == subjectAligned[j]) { matchMisMatchScore = matchScore; -// ms[i][j].mismatches = ms[i][j-1].mismatches; }else if (queryAligned[j] != subjectAligned[j]) { matchMisMatchScore = misMatchPenalty; -// ms[i][j].mismatches = ms[i][j-1].mismatches + 1; } - + //compute score based on previous columns scores for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows @@ -368,35 +392,33 @@ void Maligner::fillScoreMatrix(vector >& ms, vectorgetName(); - for(int j=0;jgetName() << endl << seqs[i]->getAligned() << endl << endl; - if ((seqs[i]->getName() == "S000003470") || (seqs[i]->getName() == "S000383265") || (seqs[i]->getName() == "7000004128191054")) { - for(int j=0;jgetName(); - for(int j=0;jgetName(); +// for(int j=0;jgetName(); +// for(int j=0;j Maligner::extractHighestPath(vector > try { //get matrix dimensions - int numCols = query->getAligned().length(); + int numCols = query.getAligned().length(); int numRows = ms.size(); @@ -432,6 +454,8 @@ vector Maligner::extractHighestPath(vector > int rowIndex = highestStruct.row; int pos = highestStruct.col; int score = highestStruct.score; + +// cout << rowIndex << '\t' << pos << '\t' << score << endl; while (pos >= 0 && score > 0) { score_struct temp = ms[rowIndex][pos]; @@ -454,7 +478,7 @@ vector Maligner::extractHighestPath(vector > } } //*************************************************************************************************************** -vector Maligner::mapTraceRegionsToAlignment(vector path, vector seqs) { +vector Maligner::mapTraceRegionsToAlignment(vector path) { try { vector trace; @@ -464,7 +488,8 @@ vector Maligner::mapTraceRegionsToAlignment(vector p for (int i = 1; i < path.size(); i++) { int next_region_index = path[i].row; - + //cout << i << '\t' << next_region_index << endl; + if (next_region_index != region_index) { // add trace region @@ -587,7 +612,7 @@ vector Maligner::extractHighestPath(vector > } } -//*************************************************************************************************************** +*************************************************************************************************************** vector Maligner::mapTraceRegionsToAlignment(vector path, vector seqs) { try { @@ -641,19 +666,20 @@ vector Maligner::mapTraceRegionsToAlignment(vector p */ //*************************************************************************************************************** -string Maligner::constructChimericSeq(vector trace, vector seqs) { +string Maligner::constructChimericSeq(vector trace, vector seqs) { try { string chimera = ""; for (int i = 0; i < trace.size(); i++) { // cout << i << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl; - string seqAlign = seqs[trace[i].row]->getAligned(); + string seqAlign = seqs[trace[i].row].getAligned(); seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); chimera += seqAlign; } - - if (chimera != "") { chimera = chimera.substr(0, (chimera.length()-1)); } +// cout << chimera << endl; +// if (chimera != "") { chimera = chimera.substr(0, (chimera.length()-1)); } //this was introducing a fence post error +// cout << chimera << endl; return chimera; } catch(exception& e) { @@ -664,7 +690,7 @@ string Maligner::constructChimericSeq(vector trace, vector trace, vector seqs) { +string Maligner::constructAntiChimericSeq(vector trace, vector seqs) { try { string antiChimera = ""; @@ -673,7 +699,7 @@ string Maligner::constructAntiChimericSeq(vector trace, vectorgetAligned(); + string seqAlign = seqs[trace[oppositeIndex].row].getAligned(); seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1)); antiChimera += seqAlign; } @@ -697,6 +723,7 @@ float Maligner::computePercentID(string queryAlign, string chimera) { return -1.0; } +// cout << queryAlign.length() << endl; int numIdentical = 0; int countA = 0; int countB = 0; @@ -709,6 +736,7 @@ float Maligner::computePercentID(string queryAlign, string chimera) { if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; } if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; } + if (charA || charB) { if (charA) { countA++; } @@ -718,15 +746,24 @@ float Maligner::computePercentID(string queryAlign, string chimera) { numIdentical++; } } +// cout << queryAlign[i] << '\t' << chimera[i] << '\t' << countA << '\t' << countB << endl; + } } +// cout << "pat\t" << countA << '\t' << countB << '\t' << numIdentical << endl; + + float numBases = (countA + countB) /(float) 2; if (numBases == 0) { return 0; } +// cout << numIdentical << '\t' << numBases << endl; + float percentIdentical = (numIdentical/(float)numBases) * 100; - + +// cout << percentIdentical << endl; + return percentIdentical; }