X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=readblast.cpp;h=1efaf5b5e3526b8ec53ff3651f9de4b540720b7e;hb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6;hp=a6d23e95bfddf71afb20a2d4c2b2d8d1377db71d;hpb=b5a791c81d432082bf38755a08b33863f255341d;p=mothur.git diff --git a/readblast.cpp b/readblast.cpp index a6d23e9..1efaf5b 100644 --- a/readblast.cpp +++ b/readblast.cpp @@ -16,12 +16,13 @@ inline bool compareOverlap(seqDist left, seqDist right){ return (left.dist < right.dist); } /*********************************************************************************************/ -ReadBlast::ReadBlast(string file, float c, float p, int l, bool m, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(m), hclusterWanted(h) { +ReadBlast::ReadBlast(string file, float c, float p, int l, bool ms, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(ms), hclusterWanted(h) { try { + m = MothurOut::getInstance(); matrix = NULL; } catch(exception& e) { - errorOut(e, "ReadBlast", "ReadBlast"); + m->errorOut(e, "ReadBlast", "ReadBlast"); exit(1); } } @@ -29,15 +30,17 @@ ReadBlast::ReadBlast(string file, float c, float p, int l, bool m, bool h) : bla //assumptions about the blast file: //1. if duplicate lines occur the first line is always best and is chosen //2. blast scores are grouped together, ie. a a .... score, a b .... score, a c ....score... -void ReadBlast::read(NameAssignment* nameMap) { +int ReadBlast::read(NameAssignment* nameMap) { try { //if the user has not given a names file read names from blastfile if (nameMap->size() == 0) { readNames(nameMap); } int nseqs = nameMap->size(); + + if (m->control_pressed) { return 0; } ifstream fileHandle; - openInputFile(blastfile, fileHandle); + m->openInputFile(blastfile, fileHandle); string firstName, secondName, eScore, currentRow; string repeatName = ""; @@ -53,13 +56,20 @@ void ReadBlast::read(NameAssignment* nameMap) { if (!hclusterWanted) { matrix = new SparseMatrix(); }else{ - overlapFile = getRootName(blastfile) + "overlap.dist"; - distFile = getRootName(blastfile) + "hclusterDists.dist"; + overlapFile = m->getRootName(blastfile) + "overlap.dist"; + distFile = m->getRootName(blastfile) + "hclusterDists.dist"; - openOutputFile(overlapFile, outOverlap); - openOutputFile(distFile, outDist); + m->openOutputFile(overlapFile, outOverlap); + m->openOutputFile(distFile, outDist); } - + + if (m->control_pressed) { + fileHandle.close(); + if (!hclusterWanted) { delete matrix; } + else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + return 0; + } + Progress* reading = new Progress("Reading blast: ", nseqs * nseqs); //this is used to quickly find if we already have a distance for this combo @@ -69,7 +79,7 @@ void ReadBlast::read(NameAssignment* nameMap) { if (!fileHandle.eof()) { //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; - gobble(fileHandle); + m->gobble(fileHandle); currentRow = firstName; lengthThisSeq = numBases; @@ -98,16 +108,24 @@ void ReadBlast::read(NameAssignment* nameMap) { } } } - }else { mothurOut("Error in your blast file, cannot read."); mothurOutEndLine(); exit(1); } + }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); } //read file while(!fileHandle.eof()){ + + if (m->control_pressed) { + fileHandle.close(); + if (!hclusterWanted) { delete matrix; } + else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + delete reading; + return 0; + } //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; //cout << firstName << '\t' << secondName << '\t' << percentId << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl; - gobble(fileHandle); + m->gobble(fileHandle); string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file @@ -151,6 +169,7 @@ void ReadBlast::read(NameAssignment* nameMap) { map::iterator itDist; for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) { distance = 1.0 - (it->second / refScore); + //do we already have the distance calculated for b->a map::iterator itA = nameMap->find(currentRow); @@ -158,10 +177,11 @@ void ReadBlast::read(NameAssignment* nameMap) { //if we have it then compare if (itDist != dists[it->first].end()) { + //if you want the minimum blast score ratio, then pick max distance if(minWanted) { distance = max(itDist->second, distance); } else{ distance = min(itDist->second, distance); } - + //is this distance below cutoff if (distance < cutoff) { if (!hclusterWanted) { @@ -248,6 +268,14 @@ void ReadBlast::read(NameAssignment* nameMap) { thisRowsBlastScores.clear(); dists.clear(); + if (m->control_pressed) { + fileHandle.close(); + if (!hclusterWanted) { delete matrix; } + else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + delete reading; + return 0; + } + if (!hclusterWanted) { sort(overlap.begin(), overlap.end(), compareOverlap); }else { @@ -255,40 +283,56 @@ void ReadBlast::read(NameAssignment* nameMap) { outOverlap.close(); } + if (m->control_pressed) { + fileHandle.close(); + if (!hclusterWanted) { delete matrix; } + else { remove(overlapFile.c_str()); remove(distFile.c_str()); } + delete reading; + return 0; + } + reading->finish(); delete reading; fileHandle.close(); + + return 0; } catch(exception& e) { - errorOut(e, "ReadBlast", "read"); + m->errorOut(e, "ReadBlast", "read"); exit(1); } } /*********************************************************************************************/ -void ReadBlast::readNames(NameAssignment* nameMap) { +int ReadBlast::readNames(NameAssignment* nameMap) { try { - mothurOut("Reading names... "); cout.flush(); + m->mothurOut("Reading names... "); cout.flush(); string name, hold, prevName; int num = 1; ifstream in; - openInputFile(blastfile, in); + m->openInputFile(blastfile, in); + + //ofstream outName; + //m->openOutputFile((blastfile + ".tempOutNames"), outName); //read first line in >> prevName; + for (int i = 0; i < 11; i++) { in >> hold; } - gobble(in); - + m->gobble(in); + //save name in nameMap nameMap->push_back(prevName); while (!in.eof()) { + if (m->control_pressed) { in.close(); return 0; } //read line in >> name; + for (int i = 0; i < 11; i++) { in >> hold; } - gobble(in); + m->gobble(in); //is this a new name? if (name != prevName) { @@ -301,17 +345,21 @@ void ReadBlast::readNames(NameAssignment* nameMap) { in.close(); //write out names file - //string outNames = getRootName(blastfile) + "names"; + //string outNames = m->getRootName(blastfile) + "names"; //ofstream out; - //openOutputFile(outNames, out); + //m->openOutputFile(outNames, out); //nameMap->print(out); //out.close(); - mothurOut(toString(num) + " names read."); mothurOutEndLine(); + if (m->control_pressed) { return 0; } + + m->mothurOut(toString(num) + " names read."); m->mothurOutEndLine(); + + return 0; } catch(exception& e) { - errorOut(e, "ReadBlast", "readNames"); + m->errorOut(e, "ReadBlast", "readNames"); exit(1); } }