+ m->errorOut(e, "DeCalculator", "getCoef");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+//gets closest matches to each end, since chimeras will most likely have different parents on each end
+vector<Sequence> DeCalculator::findClosest(Sequence querySeq, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate, int numWanted, int minSim) {
+ try {
+ //indexes.clear();
+
+ vector<Sequence> seqsMatches;
+
+ vector<SeqDist> distsLeft;
+ vector<SeqDist> distsRight;
+
+ Dist* distcalculator = new eachGapDist();
+
+ string queryUnAligned = querySeq.getUnaligned();
+ int numBases = int(queryUnAligned.length() * 0.33);
+
+ string leftQuery = ""; //first 1/3 of the sequence
+ string rightQuery = ""; //last 1/3 of the sequence
+ string queryAligned = querySeq.getAligned();
+
+ //left side
+ bool foundFirstBase = false;
+ int baseCount = 0;
+ int leftSpot = 0;
+ int firstBaseSpot = 0;
+ for (int i = 0; i < queryAligned.length(); i++) {
+ //if you are a base
+ if (isalpha(queryAligned[i])) {
+ baseCount++;
+ if (!foundFirstBase) { foundFirstBase = true; firstBaseSpot = i; }
+ }
+
+ //eliminate opening .'s
+ if (foundFirstBase) { leftQuery += queryAligned[i]; }
+ //if you have 1/3
+ if (baseCount >= numBases) { leftSpot = i; break; } //first 1/3
+ }
+
+ //right side - count through another 1/3, so you are at last third
+ baseCount = 0;
+ int rightSpot = 0;
+ for (int i = leftSpot; i < queryAligned.length(); i++) {
+ //if you are a base
+ if (isalpha(queryAligned[i])) { baseCount++; }
+ //if you have 1/3
+ if (baseCount > numBases + 1) { rightSpot = i; break; } //last 1/3
+ }
+
+ //trim end
+ //find last position in query that is a non gap character
+ int lastBaseSpot = queryAligned.length()-1;
+ for (int j = queryAligned.length()-1; j >= 0; j--) {
+ if (isalpha(queryAligned[j])) {
+ lastBaseSpot = j;
+ break;
+ }
+ }
+ rightQuery = queryAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //sequence from pos spot to end
+
+ Sequence queryLeft(querySeq.getName(), leftQuery);
+ Sequence queryRight(querySeq.getName(), rightQuery);
+
+//cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl;
+//cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl;
+ for(int j = 0; j < thisFilteredTemplate.size(); j++){
+
+ string dbAligned = thisFilteredTemplate[j]->getAligned();
+ string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence
+ string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //last 1/3 of the sequence
+
+ Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB);
+ Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB);
+
+ distcalculator->calcDist(queryLeft, dbLeft);
+ float distLeft = distcalculator->getDist();
+
+ distcalculator->calcDist(queryRight, dbRight);
+ float distRight = distcalculator->getDist();
+
+ SeqDist subjectLeft;
+ subjectLeft.seq = NULL;
+ subjectLeft.dist = distLeft;
+ subjectLeft.index = j;
+
+ distsLeft.push_back(subjectLeft);
+
+ SeqDist subjectRight;
+ subjectRight.seq = NULL;
+ subjectRight.dist = distRight;
+ subjectRight.index = j;
+
+ distsRight.push_back(subjectRight);
+
+ }
+
+ delete distcalculator;
+
+ //sort by smallest distance
+ sort(distsRight.begin(), distsRight.end(), compareSeqDist);
+ sort(distsLeft.begin(), distsLeft.end(), compareSeqDist);
+
+
+ //merge results
+ map<string, string> seen;
+ map<string, string>::iterator it;
+
+ vector<SeqDist> dists;
+ float lastRight = distsRight[0].dist;
+ float lastLeft = distsLeft[0].dist;
+
+ float maxDist = 1.0 - (minSim / 100.0);
+
+ for (int i = 0; i < numWanted+1; i++) {
+ if (m->control_pressed) { return seqsMatches; }
+
+ //add left if you havent already
+ it = seen.find(thisTemplate[distsLeft[i].index]->getName());
+ if (it == seen.end() && distsLeft[i].dist <= maxDist) {
+ dists.push_back(distsLeft[i]);
+ seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName();
+ lastLeft = distsLeft[i].dist;
+// cout << "loop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
+ }
+
+ //add right if you havent already
+ it = seen.find(thisTemplate[distsRight[i].index]->getName());
+ if (it == seen.end() && distsRight[i].dist <= maxDist) {
+ dists.push_back(distsRight[i]);
+ seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName();
+ lastRight = distsRight[i].dist;
+// cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
+ }
+
+ if (i == numWanted) { break; }
+
+ }
+
+ //are we still above the minimum similarity cutoff
+ if ((lastLeft >= minSim) || (lastRight >= minSim)) {
+ //add in ties from left
+ int i = numWanted;
+ while (i < distsLeft.size()) {
+ if (distsLeft[i].dist == lastLeft) { dists.push_back(distsLeft[i]); }
+ else { break; }
+ i++;
+ }
+
+ //add in ties from right
+ i = numWanted;
+ while (i < distsRight.size()) {
+ if (distsRight[i].dist == lastRight) { dists.push_back(distsRight[i]); }
+ else { break; }
+ i++;
+ }
+ }
+
+ //cout << numWanted << endl;
+ for (int i = 0; i < dists.size(); i++) {
+// cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl;
+
+ if ((thisTemplate[dists[i].index]->getName() != querySeq.getName()) && (((1.0-dists[i].dist)*100) >= minSim)) {
+ Sequence temp(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+ //cout << querySeq->getName() << '\t' << thisTemplate[dists[i].index]->getName() << '\t' << dists[i].dist << endl;
+ seqsMatches.push_back(temp);
+ }
+
+ }
+
+ return seqsMatches;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DeCalculator", "findClosest");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+Sequence* DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db) {
+ try {
+
+ Sequence* seqsMatch;
+
+ Dist* distcalculator = new eachGapDist();
+ int index = 0;
+ int smallest = 1000000;
+
+ for(int j = 0; j < db.size(); j++){
+
+ distcalculator->calcDist(*querySeq, *db[j]);
+ float dist = distcalculator->getDist();
+
+ if (dist < smallest) {
+ smallest = dist;
+ index = j;
+ }
+ }
+
+ delete distcalculator;
+
+ seqsMatch = new Sequence(db[index]->getName(), db[index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+
+ return seqsMatch;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DeCalculator", "findClosest");
+ exit(1);
+ }
+}
+/***************************************************************************************************************/
+map<int, int> DeCalculator::trimSeqs(Sequence& query, vector<Sequence>& topMatches) {
+ try {
+
+ int frontPos = 0; //should contain first position in all seqs that is not a gap character
+ int rearPos = query.getAligned().length();
+
+ //********find first position in topMatches that is a non gap character***********//
+ //find first position all query seqs that is a non gap character
+ for (int i = 0; i < topMatches.size(); i++) {
+
+ string aligned = topMatches[i].getAligned();
+ int pos = 0;
+
+ //find first spot in this seq
+ for (int j = 0; j < aligned.length(); j++) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos > frontPos) { frontPos = pos; }
+ }
+
+
+ string aligned = query.getAligned();
+ int pos = 0;
+
+ //find first position in query that is a non gap character
+ for (int j = 0; j < aligned.length(); j++) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos > frontPos) { frontPos = pos; }
+
+
+ //********find last position in topMatches that is a non gap character***********//
+ for (int i = 0; i < topMatches.size(); i++) {
+
+ string aligned = topMatches[i].getAligned();
+ int pos = aligned.length();
+
+ //find first spot in this seq
+ for (int j = aligned.length()-1; j >= 0; j--) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos < rearPos) { rearPos = pos; }
+ }
+
+
+ aligned = query.getAligned();
+ pos = aligned.length();
+
+ //find last position in query that is a non gap character
+ for (int j = aligned.length()-1; j >= 0; j--) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos < rearPos) { rearPos = pos; }
+
+ map<int, int> trimmedPos;
+ //check to make sure that is not whole seq
+ if ((rearPos - frontPos - 1) <= 0) {
+ query.setAligned("");
+ //trim topMatches
+ for (int i = 0; i < topMatches.size(); i++) {
+ topMatches[i].setAligned("");
+ }
+
+ }else {
+
+ //trim query
+ string newAligned = query.getAligned();
+ newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+ query.setAligned(newAligned);
+
+ //trim topMatches
+ for (int i = 0; i < topMatches.size(); i++) {
+ newAligned = topMatches[i].getAligned();
+ newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+ topMatches[i].setAligned(newAligned);
+ }
+
+ for (int i = 0; i < newAligned.length(); i++) {
+ trimmedPos[i] = i+frontPos;
+ }
+ }
+ return trimmedPos;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "DeCalculator", "trimSequences");