X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=readblast.cpp;h=84fddcf263cfdeb572f72ff3bd658325f31271e0;hp=1efaf5b5e3526b8ec53ff3651f9de4b540720b7e;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6 diff --git a/readblast.cpp b/readblast.cpp index 1efaf5b..84fddcf 100644 --- a/readblast.cpp +++ b/readblast.cpp @@ -54,7 +54,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //create objects needed for read if (!hclusterWanted) { - matrix = new SparseMatrix(); + matrix = new SparseDistanceMatrix(); + matrix->resize(nseqs); }else{ overlapFile = m->getRootName(blastfile) + "overlap.dist"; distFile = m->getRootName(blastfile) + "hclusterDists.dist"; @@ -66,7 +67,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); } return 0; } @@ -90,8 +91,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[itB->second] = score; @@ -110,14 +111,14 @@ int ReadBlast::read(NameAssignment* nameMap) { } }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); } - + //read file while(!fileHandle.eof()){ if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); } delete reading; return 0; } @@ -143,8 +144,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } //save score thisRowsBlastScores[itB->second] = score; @@ -185,9 +186,14 @@ int ReadBlast::read(NameAssignment* nameMap) { //is this distance below cutoff if (distance < cutoff) { if (!hclusterWanted) { - PCell value(itA->second, it->first, distance); - matrix->addCell(value); - }else{ + if (itA->second < it->first) { + PDistCell value(it->first, distance); + matrix->addCell(itA->second, value); + }else { + PDistCell value(itA->second, distance); + matrix->addCell(it->first, value); + } + }else{ outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl; } } @@ -210,8 +216,8 @@ int ReadBlast::read(NameAssignment* nameMap) { //convert name to number map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } thisRowsBlastScores[itB->second] = score; @@ -252,8 +258,13 @@ int ReadBlast::read(NameAssignment* nameMap) { //is this distance below cutoff if (distance < cutoff) { if (!hclusterWanted) { - PCell value(itA->second, it->first, distance); - matrix->addCell(value); + if (itA->second < it->first) { + PDistCell value(it->first, distance); + matrix->addCell(itA->second, value); + }else { + PDistCell value(itA->second, distance); + matrix->addCell(it->first, value); + } }else{ outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl; } @@ -271,7 +282,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str()); } + else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); } delete reading; return 0; } @@ -286,7 +297,7 @@ int ReadBlast::read(NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); if (!hclusterWanted) { delete matrix; } - else { remove(overlapFile.c_str()); remove(distFile.c_str()); } + else { m->mothurRemove(overlapFile); m->mothurRemove(distFile); } delete reading; return 0; } @@ -337,7 +348,12 @@ int ReadBlast::readNames(NameAssignment* nameMap) { //is this a new name? if (name != prevName) { prevName = name; - nameMap->push_back(name); + + if (nameMap->get(name) != -1) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups. Are you sequence names unique? quitting.\n"); m->control_pressed = true; } + else { + nameMap->push_back(name); + } + //outName << name << '\t' << name << endl; num++; } }