//********************************************************************************************************************
//sorts lowest to highest
-inline bool compareOverlap(DistNode left, DistNode right){
+inline bool compareOverlap(seqDist left, seqDist right){
return (left.dist < right.dist);
}
/*********************************************************************************************/
-ReadBlast::ReadBlast(string file, float c, float p, int l, bool m, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(m), hclusterWanted(h) {
+ReadBlast::ReadBlast(string file, float c, float p, int l, bool ms, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(ms), hclusterWanted(h) {
try {
+ m = MothurOut::getInstance();
matrix = NULL;
}
catch(exception& e) {
- errorOut(e, "ReadBlast", "ReadBlast");
+ m->errorOut(e, "ReadBlast", "ReadBlast");
exit(1);
}
}
//assumptions about the blast file:
//1. if duplicate lines occur the first line is always best and is chosen
//2. blast scores are grouped together, ie. a a .... score, a b .... score, a c ....score...
-void ReadBlast::read(NameAssignment* nameMap) {
+int ReadBlast::read(NameAssignment* nameMap) {
try {
//if the user has not given a names file read names from blastfile
if (nameMap->size() == 0) { readNames(nameMap); }
int nseqs = nameMap->size();
+
+ if (m->control_pressed) { return 0; }
ifstream fileHandle;
openInputFile(blastfile, fileHandle);
openOutputFile(overlapFile, outOverlap);
openOutputFile(distFile, outDist);
}
-
+
+ if (m->control_pressed) {
+ fileHandle.close();
+ if (!hclusterWanted) { delete matrix; }
+ else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); }
+ return 0;
+ }
+
Progress* reading = new Progress("Reading blast: ", nseqs * nseqs);
//this is used to quickly find if we already have a distance for this combo
//if there is a valid overlap, add it
if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap < cutoff)) {
if (!hclusterWanted) {
- DistNode overlapValue(itA->second, itB->second, thisoverlap);
+ seqDist overlapValue(itA->second, itB->second, thisoverlap);
overlap.push_back(overlapValue);
}else {
outOverlap << itA->first << '\t' << itB->first << '\t' << thisoverlap << endl;
}
}
}
- }else { mothurOut("Error in your blast file, cannot read."); mothurOutEndLine(); exit(1); }
+ }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); }
//read file
while(!fileHandle.eof()){
+
+ if (m->control_pressed) {
+ fileHandle.close();
+ if (!hclusterWanted) { delete matrix; }
+ else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); }
+ delete reading;
+ return 0;
+ }
//read in line from file
fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
//if there is a valid overlap, add it
if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap < cutoff)) {
if (!hclusterWanted) {
- DistNode overlapValue(itA->second, itB->second, thisoverlap);
+ seqDist overlapValue(itA->second, itB->second, thisoverlap);
//cout << "overlap = " << itA->second << '\t' << itB->second << '\t' << thisoverlap << endl;
overlap.push_back(overlapValue);
}else {
map<int, float>::iterator itDist;
for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) {
distance = 1.0 - (it->second / refScore);
+
//do we already have the distance calculated for b->a
map<string,int>::iterator itA = nameMap->find(currentRow);
//if we have it then compare
if (itDist != dists[it->first].end()) {
+
//if you want the minimum blast score ratio, then pick max distance
if(minWanted) { distance = max(itDist->second, distance); }
else{ distance = min(itDist->second, distance); }
-
+
//is this distance below cutoff
if (distance < cutoff) {
if (!hclusterWanted) {
//if there is a valid overlap, add it
if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap < cutoff)) {
if (!hclusterWanted) {
- DistNode overlapValue(itA->second, itB->second, thisoverlap);
+ seqDist overlapValue(itA->second, itB->second, thisoverlap);
overlap.push_back(overlapValue);
}else {
outOverlap << itA->first << '\t' << itB->first << '\t' << thisoverlap << endl;
thisRowsBlastScores.clear();
dists.clear();
+ if (m->control_pressed) {
+ fileHandle.close();
+ if (!hclusterWanted) { delete matrix; }
+ else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); }
+ delete reading;
+ return 0;
+ }
+
if (!hclusterWanted) {
sort(overlap.begin(), overlap.end(), compareOverlap);
}else {
outOverlap.close();
}
+ if (m->control_pressed) {
+ fileHandle.close();
+ if (!hclusterWanted) { delete matrix; }
+ else { remove(overlapFile.c_str()); remove(distFile.c_str()); }
+ delete reading;
+ return 0;
+ }
+
reading->finish();
delete reading;
fileHandle.close();
+
+ return 0;
}
catch(exception& e) {
- errorOut(e, "ReadBlast", "read");
+ m->errorOut(e, "ReadBlast", "read");
exit(1);
}
}
/*********************************************************************************************/
-void ReadBlast::readNames(NameAssignment* nameMap) {
+int ReadBlast::readNames(NameAssignment* nameMap) {
try {
- mothurOut("Reading names... "); cout.flush();
+ m->mothurOut("Reading names... "); cout.flush();
string name, hold, prevName;
int num = 1;
ifstream in;
openInputFile(blastfile, in);
+ //ofstream outName;
+ //openOutputFile((blastfile + ".tempOutNames"), outName);
+
//read first line
in >> prevName;
+
for (int i = 0; i < 11; i++) { in >> hold; }
gobble(in);
-
+
//save name in nameMap
nameMap->push_back(prevName);
while (!in.eof()) {
+ if (m->control_pressed) { in.close(); return 0; }
//read line
in >> name;
+
for (int i = 0; i < 11; i++) { in >> hold; }
gobble(in);
//nameMap->print(out);
//out.close();
- mothurOut(toString(num) + " names read."); mothurOutEndLine();
+ if (m->control_pressed) { return 0; }
+
+ m->mothurOut(toString(num) + " names read."); m->mothurOutEndLine();
+
+ return 0;
}
catch(exception& e) {
- errorOut(e, "ReadBlast", "readNames");
+ m->errorOut(e, "ReadBlast", "readNames");
exit(1);
}
}