X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=readcluster.cpp;h=a6adabb5b8aa155d3b633f885fd37f7ba051465f;hp=233b8e13094cf560ef295522a7cd6a8d552dd55a;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=d2cbe389d0e42eccb6bbb888462fe17e22a487a8 diff --git a/readcluster.cpp b/readcluster.cpp index 233b8e1..a6adabb 100644 --- a/readcluster.cpp +++ b/readcluster.cpp @@ -11,96 +11,59 @@ /***********************************************************************/ -ReadCluster::ReadCluster(string distfile, float c){ - globaldata = GlobalData::getInstance(); +ReadCluster::ReadCluster(string distfile, float c, string o, bool s){ + m = MothurOut::getInstance(); distFile = distfile; cutoff = c; + outputDir = o; + sortWanted = s; + list = NULL; } /***********************************************************************/ -void ReadCluster::read(NameAssignment* nameMap){ +int ReadCluster::read(NameAssignment*& nameMap){ try { if (format == "phylip") { convertPhylip2Column(nameMap); } else { list = new ListVector(nameMap->getListVector()); } - createHClusterFile(); + if (m->control_pressed) { return 0; } + + if (sortWanted) { OutPutFile = m->sortFile(distFile, outputDir); } + else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column + + return 0; } catch(exception& e) { - errorOut(e, "ReadCluster", "read"); + m->errorOut(e, "ReadCluster", "read"); exit(1); } } /***********************************************************************/ - -void ReadCluster::createHClusterFile(){ - try { - string outfile = getRootName(distFile) + "sorted.dist"; - - //if you can, use the unix sort since its been optimized for years - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - string command = "sort -n -k +3 " + distFile + " -o " + outfile; - system(command.c_str()); - #else //you are stuck with my best attempt... - //windows sort does not have a way to specify a column, only a character in the line - //since we cannot assume that the distance will always be at the the same character location on each line - //due to variable sequence name lengths, I chose to force the distance into first position, then sort and then put it back. - - //read in file line by file and put distance first - string tempDistFile = distFile + ".temp"; - ifstream input; - ofstream output; - openInputFile(distFile, input); - openOutputFile(tempDistFile, output); - - string firstName, secondName; - float dist; - while (input) { - input >> firstName >> secondName >> dist; - output << dist << '\t' << firstName << '\t' << secondName << endl; - gobble(input); - } - input.close(); - output.close(); +int ReadCluster::read(CountTable*& ct){ + try { + + if (format == "phylip") { convertPhylip2Column(ct); } + else { list = new ListVector(ct->getListVector()); } - - //sort using windows sort - string tempOutfile = outfile + ".temp"; - string command = "sort " + tempDistFile + " /O " + tempOutfile; - system(command.c_str()); - - //read in sorted file and put distance at end again - ifstream input2; - openInputFile(tempOutfile, input2); - openOutputFile(outfile, output); - - while (input2) { - input2 >> dist >> firstName >> secondName; - output << firstName << '\t' << secondName << '\t' << dist << endl; - gobble(input2); - } - input2.close(); - output.close(); + if (m->control_pressed) { return 0; } - //remove temp files - remove(tempDistFile.c_str()); - remove(tempOutfile.c_str()); - #endif + if (sortWanted) { OutPutFile = m->sortFile(distFile, outputDir); } + else { OutPutFile = distFile; } //for use by clusters splitMatrix to convert a phylip matrix to column - OutPutFile = outfile; + return 0; + } catch(exception& e) { - errorOut(e, "ReadCluster", "createHClusterFile"); + m->errorOut(e, "ReadCluster", "read"); exit(1); } } - - /***********************************************************************/ -void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ +int ReadCluster::convertPhylip2Column(NameAssignment*& nameMap){ try { //convert phylip file to column file map rowToName; @@ -110,15 +73,20 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ ofstream out; string tempFile = distFile + ".column.temp"; - openInputFile(distFile, in); - openOutputFile(tempFile, out); + m->openInputFile(distFile, in); m->gobble(in); + m->openOutputFile(tempFile, out); float distance; int square, nseqs; string name; vector matrixNames; - - in >> nseqs >> name; + + string numTest; + in >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + rowToName[0] = name; matrixNames.push_back(name); @@ -128,7 +96,7 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ } else{ list = new ListVector(nameMap->getListVector()); - if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } } char d; @@ -160,6 +128,9 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ list->set(i, name); for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -171,9 +142,12 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ } else{ - if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -194,6 +168,8 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ if(nameMap == NULL){ list->set(i, name); for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -204,9 +180,11 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ } } else{ - if(nameMap->count(name)==0){ mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); mothurOutEndLine(); } + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + in >> distance; if (distance == -1) { distance = 1000000; } @@ -223,39 +201,219 @@ void ReadCluster::convertPhylip2Column(NameAssignment* nameMap){ list->setLabel("0"); in.close(); out.close(); - + if(nameMap == NULL){ nameMap = new NameAssignment(); for(int i=0;ipush_back(matrixNames[i]); } - globaldata->nameMap = nameMap; } ifstream in2; ofstream out2; - string outputFile = getRootName(distFile) + "column.dist"; - openInputFile(tempFile, in2); - openOutputFile(outputFile, out2); + string outputFile = m->getRootName(distFile) + "column.dist"; + m->openInputFile(tempFile, in2); + m->openOutputFile(outputFile, out2); + + int first, second; + float dist; + + while (in2) { + if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; } + + in2 >> first >> second >> dist; + out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl; + m->gobble(in2); + } + in2.close(); + out2.close(); + + m->mothurRemove(tempFile); + distFile = outputFile; + + if (m->control_pressed) { m->mothurRemove(outputFile); } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ReadCluster", "convertPhylip2Column"); + exit(1); + } +} +/***********************************************************************/ + +int ReadCluster::convertPhylip2Column(CountTable*& ct){ + try { + //convert phylip file to column file + map rowToName; + map::iterator it; + + ifstream in; + ofstream out; + string tempFile = distFile + ".column.temp"; + + m->openInputFile(distFile, in); m->gobble(in); + m->openOutputFile(tempFile, out); + + float distance; + int square, nseqs; + string name; + vector matrixNames; + + string numTest; + in >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + rowToName[0] = name; + matrixNames.push_back(name); + + if(ct == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ list = new ListVector(ct->getListVector()); } + + char d; + while((d=in.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + in.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + if(square == 0){ + + for(int i=1;i> name; + rowToName[i] = name; + matrixNames.push_back(name); + + //there's A LOT of repeated code throughout this method... + if(ct == NULL){ + list->set(i, name); + + for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + + in >> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + out << i << '\t' << j << '\t' << distance << endl; + } + } + + } + else{ + + for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + + in >> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + out << i << '\t' << j << '\t' << distance << endl; + } + } + } + } + } + else{ + for(int i=1;i> name; + rowToName[i] = name; + matrixNames.push_back(name); + + if(ct == NULL){ + list->set(i, name); + for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + + in >> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + out << i << '\t' << j << '\t' << distance << endl; + } + } + } + else{ + for(int j=0;jcontrol_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); return 0; } + + in >> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + out << i << '\t' << j << '\t' << distance << endl; + } + + } + } + } + } + + list->setLabel("0"); + in.close(); + out.close(); + + if(ct == NULL){ + ct = new CountTable(); + for(int i=0;ipush_back(matrixNames[i]); + } + } + + + ifstream in2; + ofstream out2; + + string outputFile = m->getRootName(distFile) + "column.dist"; + m->openInputFile(tempFile, in2); + m->openOutputFile(outputFile, out2); int first, second; float dist; while (in2) { + if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(tempFile); m->mothurRemove(outputFile); return 0; } + in2 >> first >> second >> dist; out2 << rowToName[first] << '\t' << rowToName[second] << '\t' << dist << endl; - gobble(in2); + m->gobble(in2); } in2.close(); out2.close(); - remove(tempFile.c_str()); + m->mothurRemove(tempFile); distFile = outputFile; + + if (m->control_pressed) { m->mothurRemove(outputFile); } + + return 0; } catch(exception& e) { - errorOut(e, "ReadCluster", "convertPhylip2Column"); + m->errorOut(e, "ReadCluster", "convertPhylip2Column"); exit(1); } }