From e321831a217a5c87550e930995816c9930f85f02 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 14 Dec 2012 14:40:10 -0500 Subject: [PATCH] added warning to readBlast to warn about duplicate names in the blast file. --- distancecommand.cpp | 12 ++++++------ mgclustercommand.cpp | 5 ++++- readblast.cpp | 20 +++++++++++--------- sparsedistancematrix.cpp | 10 +++++++--- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/distancecommand.cpp b/distancecommand.cpp index 73a38bf..16fcbf5 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -809,7 +809,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } if(i % 100 == 0){ - //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << i << '\t' << (time(NULL) - startTime) << endl; } @@ -826,7 +826,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, MPI_File& outMPI, flo } - //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; delete distCalculator; return 1; @@ -904,7 +904,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned if(i % 100 == 0){ - //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << i << '\t' << (time(NULL) - startTime) << endl; } @@ -920,7 +920,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned delete buf; } - //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; MPI_File_close(&outMPI); delete distCalculator; @@ -999,7 +999,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned if(i % 100 == 0){ - //m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << i << '\t' << (time(NULL) - startTime) << endl; } @@ -1015,7 +1015,7 @@ int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned delete buf; } - //m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); + m->mothurOut(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); cout << (endLine-1) << '\t' << (time(NULL) - startTime) << endl; MPI_File_close(&outMPI); delete distCalculator; diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index b9c9d1e..0a2f91f 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -312,10 +312,13 @@ int MGClusterCommand::execute(){ outputTypes.clear(); return 0; } - + + //cluster using cluster classes while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){ + if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; } + cluster->update(cutoff); if (m->control_pressed) { diff --git a/readblast.cpp b/readblast.cpp index e98b69c..84fddcf 100644 --- a/readblast.cpp +++ b/readblast.cpp @@ -110,9 +110,8 @@ int ReadBlast::read(NameAssignment* nameMap) { } } }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); } -string outDistFilem = "../kathryn/blastDist.dist"; - ofstream outMDist; - m->openOutputFile(outDistFilem, outMDist); + + //read file while(!fileHandle.eof()){ @@ -194,8 +193,7 @@ string outDistFilem = "../kathryn/blastDist.dist"; PDistCell value(itA->second, distance); matrix->addCell(it->first, value); } - outMDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl; - }else{ + }else{ outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl; } } @@ -326,8 +324,8 @@ int ReadBlast::readNames(NameAssignment* nameMap) { ifstream in; m->openInputFile(blastfile, in); - ofstream outName; - m->openOutputFile((blastfile + ".tempOutNames"), outName); + //ofstream outName; + //m->openOutputFile((blastfile + ".tempOutNames"), outName); //read first line in >> prevName; @@ -350,8 +348,12 @@ int ReadBlast::readNames(NameAssignment* nameMap) { //is this a new name? if (name != prevName) { prevName = name; - nameMap->push_back(name); - outName << name << '\t' << name << endl; + + if (nameMap->get(name) != -1) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups. Are you sequence names unique? quitting.\n"); m->control_pressed = true; } + else { + nameMap->push_back(name); + } + //outName << name << '\t' << name << endl; num++; } } diff --git a/sparsedistancematrix.cpp b/sparsedistancematrix.cpp index b315c48..03e9fa8 100644 --- a/sparsedistancematrix.cpp +++ b/sparsedistancematrix.cpp @@ -39,8 +39,10 @@ int SparseDistanceMatrix::updateCellCompliment(ull row, ull col){ ull vcol = 0; //find the columns entry for this cell as well - for (int i = 0; i < seqVec[vrow].size(); i++) { if (seqVec[vrow][i].index == row) { vcol = i; break; } } - + for (int i = 0; i < seqVec[vrow].size(); i++) { + if (seqVec[vrow][i].index == row) { vcol = i; break; } + } + seqVec[vrow][vcol].dist = seqVec[row][col].dist; return 0; @@ -98,7 +100,9 @@ ull SparseDistanceMatrix::getSmallestCell(ull& row){ for (int i = 0; i < seqVec.size(); i++) { for (int j = 0; j < seqVec[i].size(); j++) { - + + if (m->control_pressed) { return smallDist; } + //already checked everyone else in row if (i < seqVec[i][j].index) { -- 2.39.2