*/
#include "decalc.h"
+#include "chimera.h"
+#include "dist.h"
+#include "eachgapdist.h"
+
//***************************************************************************************************************
void DeCalculator::setMask(string m) {
if (size == 0) { if (cutoff > 1200) { size = 300; }
else{ size = (cutoff / 4); } }
else if (size > (cutoff / 4)) {
- mothurOut("You have selected to large a window size for sequence " + query->getName() + ". I will choose an appropriate window size."); mothurOutEndLine();
+ mothurOut("You have selected too large a window size for sequence " + query->getName() + ". I will choose an appropriate window size."); mothurOutEndLine();
size = (cutoff / 4);
}
}
}
//***************************************************************************************************************
+//gets closest matches to each end, since chimeras will most likely have different parents on each end
+vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
+ try {
+
+ vector<Sequence*> seqsMatches;
+ vector<SeqDist> dists;
+
+ Dist* distcalculator = new eachGapDist();
+
+ string queryAligned = querySeq->getAligned();
+ string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
+ string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
+
+ Sequence queryLeft(querySeq->getName(), leftQuery);
+ Sequence queryRight(querySeq->getName(), rightQuery);
+
+ for(int j = 0; j < db.size(); j++){
+
+ string dbAligned = db[j]->getAligned();
+ string leftDB = dbAligned.substr(0, (dbAligned.length() / 3)); //first 1/3 of the sequence
+ string rightDB = dbAligned.substr(((dbAligned.length() / 3)*2)); //last 1/3 of the sequence
+
+ Sequence dbLeft(db[j]->getName(), leftDB);
+ Sequence dbRight(db[j]->getName(), rightDB);
+
+ distcalculator->calcDist(queryLeft, dbLeft);
+ float distLeft = distcalculator->getDist();
+
+ distcalculator->calcDist(queryRight, dbRight);
+ float distRight = distcalculator->getDist();
+
+ float dist = min(distLeft, distRight); //keep smallest distance
+
+ SeqDist subject;
+ subject.seq = db[j];
+ subject.dist = dist;
+
+ dists.push_back(subject);
+ }
+
+ delete distcalculator;
+
+ sort(dists.begin(), dists.end(), compareSeqDist);
+
+ for (int i = 0; i < numWanted; i++) {
+ Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+ seqsMatches.push_back(temp);
+ }
+
+ return seqsMatches;
+ }
+ catch(exception& e) {
+ errorOut(e, "DeCalculator", "findClosest");
+ exit(1);
+ }
+}
+/***************************************************************************************************************/
+map<int, int> DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
+ try {
+
+ int frontPos = 0; //should contain first position in all seqs that is not a gap character
+ int rearPos = query->getAligned().length();
+
+ //********find first position in topMatches that is a non gap character***********//
+ //find first position all query seqs that is a non gap character
+ for (int i = 0; i < topMatches.size(); i++) {
+
+ string aligned = topMatches[i]->getAligned();
+ int pos = 0;
+
+ //find first spot in this seq
+ for (int j = 0; j < aligned.length(); j++) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos > frontPos) { frontPos = pos; }
+ }
+
+
+ string aligned = query->getAligned();
+ int pos = 0;
+
+ //find first position in query that is a non gap character
+ for (int j = 0; j < aligned.length(); j++) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos > frontPos) { frontPos = pos; }
+
+
+ //********find last position in topMatches that is a non gap character***********//
+ for (int i = 0; i < topMatches.size(); i++) {
+
+ string aligned = topMatches[i]->getAligned();
+ int pos = aligned.length();
+
+ //find first spot in this seq
+ for (int j = aligned.length()-1; j >= 0; j--) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos < rearPos) { rearPos = pos; }
+ }
+
+
+ aligned = query->getAligned();
+ pos = aligned.length();
+
+ //find last position in query that is a non gap character
+ for (int j = aligned.length()-1; j >= 0; j--) {
+ if (isalpha(aligned[j])) {
+ pos = j;
+ break;
+ }
+ }
+
+ //save this spot if it is the farthest
+ if (pos < rearPos) { rearPos = pos; }
+
+ //check to make sure that is not whole seq
+ if ((rearPos - frontPos - 1) <= 0) { mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1); }
+//cout << "front = " << frontPos << " rear = " << rearPos << endl;
+ //trim query
+ string newAligned = query->getAligned();
+ newAligned = newAligned.substr(frontPos, (rearPos-frontPos));
+ query->setAligned(newAligned);
+
+ //trim topMatches
+ for (int i = 0; i < topMatches.size(); i++) {
+ newAligned = topMatches[i]->getAligned();
+ newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+ topMatches[i]->setAligned(newAligned);
+ }
+
+ map<int, int> trimmedPos;
+
+ for (int i = 0; i < newAligned.length(); i++) {
+ trimmedPos[i] = i+frontPos;
+ }
+
+ return trimmedPos;
+ }
+ catch(exception& e) {
+ errorOut(e, "DeCalculator", "trimSequences");
+ exit(1);
+ }
+
+}
+//***************************************************************************************************************
+