queryFile << '>' << seq->getName() << endl;
queryFile << seq->getUnaligned() << endl;
queryFile.close();
-
+ cout << seq->getUnaligned() << endl;
// the goal here is to quickly survey the database to find the closest match. To do this we are using the default
// wordsize used in megablast. I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
// long. With this setting, it seems comparable in speed to the suffix tree approach.
while(!m8FileHandle.eof()){
m8FileHandle >> dummy >> templateAccession >> searchScore >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
- //cout << dummy << '\t' << templateAccession << '\t' << searchScore << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl;
+ cout << dummy << '\t' << templateAccession << '\t' << searchScore << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl;
//get rest of junk in line
//while (!m8FileHandle.eof()) { char c = m8FileHandle.get(); if (c == 10 || c == 13){ break; }else{ cout << c; } } //
m8FileHandle.close();
remove((queryFileName+seq->getName()).c_str());
remove((blastFileName+seq->getName()).c_str());
-//cout << "\n" ;
+cout << "\n" ;
return topMatches;
}
catch(exception& e) {
//if you are a base
if (isalpha(queryAligned[i])) { baseCount++; }
//if you have 1/3
- if (baseCount >= numBases) { rightSpot = i; break; } //last 1/3
+ if (baseCount > numBases + 1) { rightSpot = i; break; } //last 1/3
}
//trim end
break;
}
}
- rightQuery = queryAligned.substr(rightSpot, (lastBaseSpot-rightSpot)); //sequence from pos spot to end
+ rightQuery = queryAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //sequence from pos spot to end
Sequence queryLeft(querySeq->getName(), leftQuery);
Sequence queryRight(querySeq->getName(), rightQuery);
+
//cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl;
//cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl;
for(int j = 0; j < thisFilteredTemplate.size(); j++){
string dbAligned = thisFilteredTemplate[j]->getAligned();
string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence
- string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot)); //last 1/3 of the sequence
-
+ string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //last 1/3 of the sequence
+
Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB);
Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB);
distcalculator->calcDist(queryRight, dbRight);
float distRight = distcalculator->getDist();
-
+
SeqDist subjectLeft;
subjectLeft.seq = NULL;
subjectLeft.dist = distLeft;
//sort by smallest distance
sort(distsRight.begin(), distsRight.end(), compareSeqDist);
sort(distsLeft.begin(), distsLeft.end(), compareSeqDist);
+
//merge results
map<string, string> seen;
vector<SeqDist> dists;
float lastRight = distsRight[0].dist;
float lastLeft = distsLeft[0].dist;
- //int lasti = 0;
- for (int i = 0; i < distsLeft.size(); i++) {
-
+
+ float maxDist = 1.0 - (minSim / 100.0);
+
+ for (int i = 0; i < numWanted+1; i++) {
if (m->control_pressed) { return seqsMatches; }
//add left if you havent already
it = seen.find(thisTemplate[distsLeft[i].index]->getName());
- if (it == seen.end()) {
+ if (it == seen.end() && distsLeft[i].dist <= maxDist) {
dists.push_back(distsLeft[i]);
seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName();
lastLeft = distsLeft[i].dist;
//add right if you havent already
it = seen.find(thisTemplate[distsRight[i].index]->getName());
- if (it == seen.end()) {
+ if (it == seen.end() && distsRight[i].dist <= maxDist) {
dists.push_back(distsRight[i]);
seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName();
lastRight = distsRight[i].dist;
}
}
-
-
//cout << numWanted << endl;
for (int i = 0; i < dists.size(); i++) {
// cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl;