verticalFilter(temp);
- //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl << refSeqs[i]->getAligned() << endl; }
+ //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl ; }//<< refSeqs[i]->getAligned() << endl
vector< vector<score_struct> > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes
if (m->control_pressed) { return chimera; }
fillScoreMatrix(matrix, refSeqs, chimeraPenalty);
-
+
+ vector<score_struct> path = extractHighestPath(matrix);
+
+ if (m->control_pressed) { return chimera; }
+
+ vector<trace_struct> trace = mapTraceRegionsToAlignment(path, refSeqs);
+
+ if (trace.size() > 1) { chimera = "yes"; }
+ else { chimera = "no"; return chimera; }
+
+ int traceStart = path[0].col;
+ int traceEnd = path[path.size()-1].col;
+ string queryInRange = query->getAligned();
+ queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1));
+
+ string chimeraSeq = constructChimericSeq(trace, refSeqs);
+// cout << queryInRange.length() << endl;
+// cout << chimeraSeq.length() << endl;
+
+ percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
+
+ /*
vector<trace_struct> trace = extractHighestPath(matrix);
//cout << "traces\n";
int traceStart = trace[0].col;
int traceEnd = trace[trace.size()-1].oldCol;
string queryInRange = query->getAligned();
- queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart));
+ queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart));*/
if (m->control_pressed) { return chimera; }
temp.regionEnd = regionEnd;
string parentInRange = refSeqs[seqIndex]->getAligned();
- parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart));
+ parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1));
temp.queryToParent = computePercentID(queryInRange, parentInRange);
temp.divR = (percentIdenticalQueryChimera / temp.queryToParent);
string queryInRegion = query->getAligned();
- queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart));
+ queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1));
string parentInRegion = refSeqs[seqIndex]->getAligned();
- parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart));
+ parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1));
temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion);
- cout << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << endl;
+// cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " << temp.divR << endl;
+
outputResults.push_back(temp);
}
exit(1);
}
}
-
//***************************************************************************************************************
+vector<score_struct> Maligner::extractHighestPath(vector<vector<score_struct> > ms) {
+ try {
+
+ //get matrix dimensions
+ int numCols = query->getAligned().length();
+ int numRows = ms.size();
+
+
+ //find highest score scoring matrix
+ score_struct highestStruct;
+ int highestScore = 0;
+
+ for (int i = 0; i < numRows; i++) {
+ for (int j = 0; j < numCols; j++) {
+ if (ms[i][j].score > highestScore) {
+ highestScore = ms[i][j].score;
+ highestStruct = ms[i][j];
+ }
+ }
+ }
+
+ vector<score_struct> path;
+
+ int rowIndex = highestStruct.row;
+ int pos = highestStruct.col;
+ int score = highestStruct.score;
+
+ while (pos >= 0 && score > 0) {
+ score_struct temp = ms[rowIndex][pos];
+ score = temp.score;
+
+ if (score > 0) { path.push_back(temp); }
+
+ rowIndex = temp.prev;
+ pos--;
+ }
+
+ reverse(path.begin(), path.end());
+
+ return path;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Maligner", "extractHighestPath");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
+ try {
+ vector<trace_struct> trace;
+
+ int region_index = path[0].row;
+ int region_start = path[0].col;
+
+ for (int i = 1; i < path.size(); i++) {
+
+ int next_region_index = path[i].row;
+
+ if (next_region_index != region_index) {
+
+ // add trace region
+ int col_index = path[i].col;
+ trace_struct temp;
+ temp.col = region_start;
+ temp.oldCol = col_index-1;
+ temp.row = region_index;
+
+ trace.push_back(temp);
+
+ region_index = path[i].row;
+ region_start = col_index;
+ }
+ }
+
+ // get last one
+ trace_struct temp;
+ temp.col = region_start;
+ temp.oldCol = path[path.size()-1].col;
+ temp.row = region_index;
+ trace.push_back(temp);
+
+ return trace;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Maligner", "mapTraceRegionsToAlignment");
+ exit(1);
+ }
+}
+
+/***************************************************************************************************************
vector<trace_struct> Maligner::extractHighestPath(vector<vector<score_struct> > ms) {
try {
exit(1);
}
}
-
+*/
//***************************************************************************************************************
string Maligner::constructChimericSeq(vector<trace_struct> trace, vector<Sequence*> seqs) {
seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1));
chimera += seqAlign;
}
-
+// cout << chimera << endl;
+// if (chimera != "") { chimera = chimera.substr(0, (chimera.length()-1)); } //this was introducing a fence post error
+// cout << chimera << endl;
return chimera;
}
catch(exception& e) {
if (queryAlign.length() != chimera.length()) {
m->mothurOut("Error, alignment strings are of different lengths: "); m->mothurOutEndLine();
- m->mothurOut(toString(queryAlign.length())); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOutEndLine();
+ m->mothurOut(toString(queryAlign.length())); m->mothurOutEndLine();
m->mothurOut(toString(chimera.length())); m->mothurOutEndLine();
return -1.0;
}
+// cout << queryAlign.length() << endl;
int numIdentical = 0;
int countA = 0;
int countB = 0;
if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; }
if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; }
+
if (charA || charB) {
if (charA) { countA++; }
numIdentical++;
}
}
+// cout << queryAlign[i] << '\t' << chimera[i] << '\t' << countA << '\t' << countB << endl;
+
}
}
+// cout << "pat\t" << countA << '\t' << countB << '\t' << numIdentical << endl;
+
+
float numBases = (countA + countB) /(float) 2;
if (numBases == 0) { return 0; }
float percentIdentical = (numIdentical/(float)numBases) * 100;
-
+
+// cout << percentIdentical << endl;
+
return percentIdentical;
}