X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=readcluster.cpp;h=b6cb71de5fdbb888f4a7c1614987f74cd0bf8b1a;hb=cbaa068e77aeb15bb06f0695a36d8f757977ed64;hp=233b8e13094cf560ef295522a7cd6a8d552dd55a;hpb=d2cbe389d0e42eccb6bbb888462fe17e22a487a8;p=mothur.git diff --git a/readcluster.cpp b/readcluster.cpp index 233b8e1..b6cb71d 100644 --- a/readcluster.cpp +++ b/readcluster.cpp @@ -11,96 +11,39 @@ /***********************************************************************/ -ReadCluster::ReadCluster(string distfile, float c){ - globaldata = GlobalData::getInstance(); +ReadCluster::ReadCluster(string distfile, float c, string o, bool s){ + m = MothurOut::getInstance(); distFile = distfile; cutoff = c; + outputDir = o; + sortWanted = s; + list = NULL; } /***********************************************************************/ -void ReadCluster::read(NameAssignment* nameMap){ +int ReadCluster::read(NameAssignment*& nameMap){ try { if (format == "phylip") { convertPhylip2Column(nameMap); } else { list = new ListVector(nameMap->getListVector()); } - createHClusterFile(); - - } - catch(exception& e) { - errorOut(e, "ReadCluster", "read"); - exit(1); - } -} -/***********************************************************************/ - -void ReadCluster::createHClusterFile(){ - try { - string outfile = getRootName(distFile) + "sorted.dist"; - - //if you can, use the unix sort since its been optimized for years - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - string command = "sort -n -k +3 " + distFile + " -o " + outfile; - system(command.c_str()); - #else //you are stuck with my best attempt... - //windows sort does not have a way to specify a column, only a character in the line - //since we cannot assume that the distance will always be at the the same character location on each line - //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back. - - //read in file line by file and put distance first - string tempDistFile = distFile + ".temp"; - ifstream input; - ofstream output; - openInputFile(distFile, input); - openOutputFile(tempDistFile, output); - - string firstName, secondName; - float dist; - while (input) { - input >> firstName >> secondName >> dist; - output << dist << '\t' << firstName << '\t' << secondName << endl; - gobble(input); - } - input.close(); - output.close(); - - - //sort using windows sort - string tempOutfile = outfile + ".temp"; - string command = "sort " + tempDistFile + " /O " + tempOutfile; - system(command.c_str()); - - //read in sorted file and put distance at end again - ifstream input2; - openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); - - while (input2) { - input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; - gobble(input2); - } - input2.close(); - output.close(); + if (m->control_pressed) { return 0; } - //remove temp files - remove(tempDistFile.c_str()); - remove(tempOutfile.c_str()); - #endif + if (sortWanted) { OutPutFile = m->sortFile(distFile, outputDir); } + else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column - OutPutFile = outfile; + return 0; + } catch(exception& e) { - errorOut(e, "ReadCluster", "createHClusterFile"); + m->errorOut(e, "ReadCluster", "read"); exit(1); } } - - /***********************************************************************/ -void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ +int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){ try { //convert phylip file to column file map rowToName; @@ -110,15 +53,20 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ ofstream out; string tempFile = distFile + ".column.temp"; - openInputFile(distFile, in); - openOutputFile(tempFile, out); + m->openInputFile(distFile, in); m->gobble(in); + m->openOutputFile(tempFile, out); float distance; int square, nseqs; string name; vector matrixNames; - - in >> nseqs >> name; + + string numTest; + in >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + rowToName[0] = name; matrixNames.push_back(name); @@ -128,7 +76,7 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ } else{ list = new ListVector(nameMap->getListVector()); - if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } } char d; @@ -160,6 +108,9 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ list->set(i, name); for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -171,9 +122,12 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ } else{ - if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -194,6 +148,8 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ if(nameMap == NULL){ list->set(i, name); for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -204,9 +160,11 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ } } else{ - if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -223,39 +181,44 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ list->setLabel("0"); in.close(); out.close(); - + if(nameMap == NULL){ nameMap = new NameAssignment(); for(int i=0;ipush_back(matrixNames[i]); } - globaldata->nameMap = nameMap; } ifstream in2; ofstream out2; - string outputFile = getRootName(distFile) + "column.dist"; - openInputFile(tempFile, in2); - openOutputFile(outputFile, out2); + string outputFile = m->getRootName(distFile) + "column.dist"; + m->openInputFile(tempFile, in2); + m->openOutputFile(outputFile, out2); int first, second; float dist; while (in2) { + if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; } + in2 >> first >> second >> dist; out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl; - gobble(in2); + m->gobble(in2); } in2.close(); out2.close(); - remove(tempFile.c_str()); + m->mothurRemove(tempFile); distFile = outputFile; + + if (m->control_pressed) { m->mothurRemove(outputFile); } + + return 0; } catch(exception& e) { - errorOut(e, "ReadCluster", "convertPhylip2Column"); + m->errorOut(e, "ReadCluster", "convertPhylip2Column"); exit(1); } }