#include "dist.h"
#include "eachgapdist.h"
#include "ignoregaps.h"
-
+#include "eachgapdistignorens.h"
//***************************************************************************************************************
void DeCalculator::setMask(string ms) {
try {
vector<float> prob;
- string freqfile = getRootName(filename) + "freq";
+ string freqfile = m->getRootName(filename) + "freq";
ofstream outFreq;
- openOutputFile(freqfile, outFreq);
+ m->openOutputFile(freqfile, outFreq);
outFreq << "#" << m->getVersion() << endl;
indexes.clear();
vector<Sequence*> seqsMatches;
+
vector<SeqDist> distsLeft;
vector<SeqDist> distsRight;
- Dist* distcalculator = new eachGapDist();
+ Dist* distcalculator = new eachGapDistIgnoreNs();
string queryUnAligned = querySeq->getUnaligned();
int numBases = int(queryUnAligned.length() * 0.33);
float distRight = distcalculator->getDist();
SeqDist subjectLeft;
- subjectLeft.seq = db[j];
+ subjectLeft.seq = NULL;
subjectLeft.dist = distLeft;
subjectLeft.index = j;
distsLeft.push_back(subjectLeft);
SeqDist subjectRight;
- subjectRight.seq = db[j];
+ subjectRight.seq = NULL;
subjectRight.dist = distRight;
subjectRight.index = j;
//sort by smallest distance
sort(distsRight.begin(), distsRight.end(), compareSeqDist);
sort(distsLeft.begin(), distsLeft.end(), compareSeqDist);
+// cout << distsLeft.size() << '\t' << distsRight.size() << endl;
+// for(int i=0;i<15;i++){
+// cout << "left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
+// }
+// for(int i=0;i<15;i++){
+// cout << "right\t" << db[distsLeft[i].index]->getName() << '\t' << distsRight[i].dist << endl;
+// }
+
//merge results
map<string, string> seen;
int lasti = 0;
for (int i = 0; i < distsLeft.size(); i++) {
//add left if you havent already
- it = seen.find(distsLeft[i].seq->getName());
+ it = seen.find(db[distsLeft[i].index]->getName());
if (it == seen.end()) {
dists.push_back(distsLeft[i]);
- seen[distsLeft[i].seq->getName()] = distsLeft[i].seq->getName();
+ seen[db[distsLeft[i].index]->getName()] = db[distsLeft[i].index]->getName();
lastLeft = distsLeft[i].dist;
+// cout << "loop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
}
//add right if you havent already
- it = seen.find(distsRight[i].seq->getName());
+ it = seen.find(db[distsRight[i].index]->getName());
if (it == seen.end()) {
dists.push_back(distsRight[i]);
- seen[distsRight[i].seq->getName()] = distsRight[i].seq->getName();
+ seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName();
lastRight = distsRight[i].dist;
+// cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
}
if (dists.size() > numWanted) { lasti = i; break; } //you have enough results
}
- //add in dups
+// cout << "lastLeft\t" << lastLeft << endl;
+
+ //add in sequences with same distance as last sequence added
lasti++;
int i = lasti;
while (i < distsLeft.size()) {
- if (distsLeft[i].dist == lastLeft) { dists.push_back(distsLeft[i]); numWanted++; }
+ if (distsLeft[i].dist == lastLeft) {
+ it = seen.find(db[distsLeft[i].index]->getName());
+
+ if (it == seen.end()) {
+// cout << "newLoop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
+ dists.push_back(distsLeft[i]);
+ seen[db[distsRight[i].index]->getName()] = db[distsLeft[i].index]->getName();
+// numWanted++;
+ }
+ }
else { break; }
i++;
}
+// cout << "lastRight\t" << lastRight << endl;
+ //add in sequences with same distance as last sequence added
i = lasti;
while (i < distsRight.size()) {
- if (distsRight[i].dist == lastRight) { dists.push_back(distsRight[i]); numWanted++; }
+ if (distsRight[i].dist == lastRight) {
+ it = seen.find(db[distsRight[i].index]->getName());
+
+ if (it == seen.end()) {
+// cout << "newLoop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
+ dists.push_back(distsRight[i]);
+ seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName();
+// numWanted++;
+ }
+ }
else { break; }
i++;
}
- if (numWanted > dists.size()) { m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); numWanted = dists.size(); }
+ numWanted = seen.size();
+
+ if (numWanted > dists.size()) {
+ //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine();
+ numWanted = dists.size();
+ }
//cout << numWanted << endl;
for (int i = 0; i < numWanted; i++) {
-//cout << dists[i].seq->getName() << '\t' << dists[i].dist << endl;
- Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
- seqsMatches.push_back(temp);
- indexes.push_back(dists[i].index);
+// cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl;
+
+ if (db[dists[i].index]->getName() != querySeq->getName()) {
+ Sequence* temp = new Sequence(db[dists[i].index]->getName(), db[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+
+ seqsMatches.push_back(temp);
+ indexes.push_back(dists[i].index);
+ }
+
}
return seqsMatches;
Sequence* seqsMatch;
- Dist* distcalculator = new eachGapDist();
+ Dist* distcalculator = new eachGapDistIgnoreNs();
int index = 0;
int smallest = 1000000;
//save this spot if it is the farthest
if (pos < rearPos) { rearPos = pos; }
-
- //check to make sure that is not whole seq
- if ((rearPos - frontPos - 1) <= 0) { m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); m->mothurOutEndLine(); exit(1); }
-//cout << query->getName() << " front = " << frontPos << " rear = " << rearPos << endl;
- //trim query
- string newAligned = query->getAligned();
- newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
- query->setAligned(newAligned);
-
- //trim topMatches
- for (int i = 0; i < topMatches.size(); i++) {
- newAligned = topMatches[i]->getAligned();
- newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
- topMatches[i]->setAligned(newAligned);
- }
map<int, int> trimmedPos;
-
- for (int i = 0; i < newAligned.length(); i++) {
- trimmedPos[i] = i+frontPos;
+ //check to make sure that is not whole seq
+ if ((rearPos - frontPos - 1) <= 0) {
+ query->setAligned("");
+ //trim topMatches
+ for (int i = 0; i < topMatches.size(); i++) {
+ topMatches[i]->setAligned("");
+ }
+
+ }else {
+
+ //trim query
+ string newAligned = query->getAligned();
+ newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+ query->setAligned(newAligned);
+
+ //trim topMatches
+ for (int i = 0; i < topMatches.size(); i++) {
+ newAligned = topMatches[i]->getAligned();
+ newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+ topMatches[i]->setAligned(newAligned);
+ }
+
+ for (int i = 0; i < newAligned.length(); i++) {
+ trimmedPos[i] = i+frontPos;
+ }
}
-
return trimmedPos;
}
catch(exception& e) {