X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=readblast.cpp;h=84fddcf263cfdeb572f72ff3bd658325f31271e0;hp=c3dfd4884f0d01d6c9dbb98c17aa9a8341a44c5d;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=0c78e45408d8c099bc51579225ebfc227f7523b2 diff --git a/readblast.cpp b/readblast.cpp index c3dfd48..84fddcf 100644 --- a/readblast.cpp +++ b/readblast.cpp @@ -40,7 +40,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (m->control_pressed) { return 0; } ifstream fileHandle; - openInputFile(blastfile, fileHandle); + m->openInputFile(blastfile, fileHandle); string firstName, secondName, eScore, currentRow; string repeatName = ""; @@ -54,19 +54,20 @@ int ReadBlast::read(NameAssignment* nameMap) { //create objects needed for read if (!hclusterWanted) { - matrix = new SparseMatrix(); + matrix = new SparseDistanceMatrix(); + matrix->resize(nseqs); }else{ - overlapFile = getRootName(blastfile) + "overlap.dist"; - distFile = getRootName(blastfile) + "hclusterDists.dist"; + overlapFile = m->getRootName(blastfile) + "overlap.dist"; + distFile = m->getRootName(blastfile) + "hclusterDists.dist"; - openOutputFile(overlapFile, outOverlap); - openOutputFile(distFile, outDist); + m->openOutputFile(overlapFile, outOverlap); + m->openOutputFile(distFile, outDist); } if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); } return 0; } @@ -79,7 +80,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (!fileHandle.eof()) { //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; - gobble(fileHandle); + m->gobble(fileHandle); currentRow = firstName; lengthThisSeq = numBases; @@ -90,8 +91,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[itB->second] = score; @@ -110,14 +111,14 @@ int ReadBlast::read(NameAssignment* nameMap) { } }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); } - + //read file while(!fileHandle.eof()){ if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); } delete reading; return 0; } @@ -125,7 +126,7 @@ int ReadBlast::read(NameAssignment* nameMap) { //read in line from file fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score; //cout << firstName << '\t' << secondName << '\t' << percentId << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl; - gobble(fileHandle); + m->gobble(fileHandle); string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file @@ -143,8 +144,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } //save score thisRowsBlastScores[itB->second] = score; @@ -185,9 +186,14 @@ int ReadBlast::read(NameAssignment* nameMap) { //is this distance below cutoff if (distance < cutoff) { if (!hclusterWanted) { - PCell value(itA->second, it->first, distance); - matrix->addCell(value); - }else{ + if (itA->second < it->first) { + PDistCell value(it->first, distance); + matrix->addCell(itA->second, value); + }else { + PDistCell value(itA->second, distance); + matrix->addCell(it->first, value); + } + }else{ outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl; } } @@ -210,8 +216,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[itB->second] = score; @@ -252,8 +258,13 @@ int ReadBlast::read(NameAssignment* nameMap) { //is this distance below cutoff if (distance < cutoff) { if (!hclusterWanted) { - PCell value(itA->second, it->first, distance); - matrix->addCell(value); + if (itA->second < it->first) { + PDistCell value(it->first, distance); + matrix->addCell(itA->second, value); + }else { + PDistCell value(itA->second, distance); + matrix->addCell(it->first, value); + } }else{ outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl; } @@ -271,7 +282,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); } delete reading; return 0; } @@ -286,7 +297,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { remove(overlapFile.c_str()); remove(distFile.c_str()); } + else { m->mothurRemove(overlapFile); m->mothurRemove(distFile); } delete reading; return 0; } @@ -311,16 +322,16 @@ int ReadBlast::readNames(NameAssignment* nameMap) { int num = 1; ifstream in; - openInputFile(blastfile, in); + m->openInputFile(blastfile, in); //ofstream outName; - //openOutputFile((blastfile + ".tempOutNames"), outName); + //m->openOutputFile((blastfile + ".tempOutNames"), outName); //read first line in >> prevName; for (int i = 0; i < 11; i++) { in >> hold; } - gobble(in); + m->gobble(in); //save name in nameMap nameMap->push_back(prevName); @@ -332,12 +343,17 @@ int ReadBlast::readNames(NameAssignment* nameMap) { in >> name; for (int i = 0; i < 11; i++) { in >> hold; } - gobble(in); + m->gobble(in); //is this a new name? if (name != prevName) { prevName = name; - nameMap->push_back(name); + + if (nameMap->get(name) != -1) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups. Are you sequence names unique? quitting.\n"); m->control_pressed = true; } + else { + nameMap->push_back(name); + } + //outName << name << '\t' << name << endl; num++; } } @@ -345,9 +361,9 @@ int ReadBlast::readNames(NameAssignment* nameMap) { in.close(); //write out names file - //string outNames = getRootName(blastfile) + "names"; + //string outNames = m->getRootName(blastfile) + "names"; //ofstream out; - //openOutputFile(outNames, out); + //m->openOutputFile(outNames, out); //nameMap->print(out); //out.close();