X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=formatphylip.cpp;h=57bc5d7389b2ad8ed980b3612f950c46ce135af7;hp=aaf540aed0d800d111e1a8a7703520640a28d240;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=74844a60d80c6dd06e3fb02ee9b928424f9019b0 diff --git a/formatphylip.cpp b/formatphylip.cpp index aaf540a..57bc5d7 100644 --- a/formatphylip.cpp +++ b/formatphylip.cpp @@ -12,11 +12,11 @@ /***********************************************************************/ FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) { - openInputFile(filename, fileHandle); + m->openInputFile(filename, fileHandle); } /***********************************************************************/ //not using nameMap -void FormatPhylipMatrix::read(NameAssignment* nameMap){ +int FormatPhylipMatrix::read(NameAssignment* nameMap){ try { float distance; @@ -24,10 +24,20 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ string name; ofstream out; - fileHandle >> nseqs >> name; - - list = new ListVector(nseqs); - list->set(0, name); + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + if(nameMap == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ + list = new ListVector(nameMap->getListVector()); + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } + } char d; while((d=fileHandle.get()) != EOF){ @@ -37,11 +47,11 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ fileHandle.close(); //reset file //open and get through numSeqs, code below formats rest of file - openInputFile(filename, fileHandle); - fileHandle >> nseqs; gobble(fileHandle); + m->openInputFile(filename, fileHandle); + fileHandle >> nseqs; m->gobble(fileHandle); distFile = filename + ".rowFormatted"; - openOutputFile(distFile, out); + m->openOutputFile(distFile, out); break; } if(d == '\n'){ @@ -59,15 +69,21 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ ofstream outTemp; string tempFile = filename + ".temp"; - openOutputFile(tempFile, outTemp); + m->openOutputFile(tempFile, outTemp); //convert to square column matrix for(int i=1;i> name; - list->set(i, name); + if(nameMap == NULL){ list->set(i, name); } + else { if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } + } for(int j=0;jcontrol_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close(); delete reading; return 0; } + fileHandle >> distance; if (distance == -1) { distance = 1000000; } @@ -84,10 +100,10 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ //format from square column to rowFormatted //sort file by first column so the distances for each row are together - string outfile = getRootName(tempFile) + "sorted.dist.temp"; + string outfile = m->getRootName(tempFile) + "sorted.dist.temp"; //use the unix sort - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n " + tempFile + " -o " + outfile; system(command.c_str()); #else //sort using windows sort @@ -95,13 +111,14 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ system(command.c_str()); #endif + if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile); delete reading; return 0; } //output to new file distance for each row and save positions in file where new row begins ifstream in; - openInputFile(outfile, in); + m->openInputFile(outfile, in); distFile = outfile + ".rowFormatted"; - openOutputFile(distFile, out); + m->openOutputFile(distFile, out); rowPos.resize(nseqs, -1); int currentRow; @@ -118,7 +135,9 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); } while(!in.eof()) { - in >> first >> second >> dist; gobble(in); + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile); delete reading; return 0; } + + in >> first >> second >> dist; m->gobble(in); if (first != currentRow) { //save position in file of each new row @@ -158,8 +177,11 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ in.close(); out.close(); - remove(tempFile.c_str()); - remove(outfile.c_str()); + m->mothurRemove(tempFile); + m->mothurRemove(outfile); + + if (m->control_pressed) { m->mothurRemove(distFile); delete reading; return 0; } + } else{ //square matrix convert directly to formatted row file int index = nseqs; @@ -170,9 +192,13 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ for(int i=0;i> name; - list->set(i, name); + if(nameMap == NULL){ list->set(i, name); } + else { if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } + } for(int j=0;jcontrol_pressed) { fileHandle.close(); out.close(); m->mothurRemove(distFile); delete reading; return 0; } + fileHandle >> distance; if (distance == -1) { distance = 1000000; } @@ -184,7 +210,7 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ reading->update(index); } - gobble(fileHandle); + m->gobble(fileHandle); //save position in file of each new row rowPos[i] = out.tellp(); @@ -202,17 +228,252 @@ void FormatPhylipMatrix::read(NameAssignment* nameMap){ } reading->finish(); delete reading; - - list->setLabel("0"); fileHandle.close(); out.close(); + if (m->control_pressed) { m->mothurRemove(distFile); return 0; } + + list->setLabel("0"); + + return 1; + + } catch(exception& e) { m->errorOut(e, "FormatPhylipMatrix", "read"); exit(1); } } +/***********************************************************************/ +//not using nameMap +int FormatPhylipMatrix::read(CountTable* nameMap){ + try { + + float distance; + int square, nseqs; + string name; + ofstream out; + + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + if(nameMap == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ + list = new ListVector(nameMap->getListVector()); + nameMap->get(name); + } + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ //you are square + square = 1; + fileHandle.close(); //reset file + + //open and get through numSeqs, code below formats rest of file + m->openInputFile(filename, fileHandle); + fileHandle >> nseqs; m->gobble(fileHandle); + + distFile = filename + ".rowFormatted"; + m->openOutputFile(distFile, out); + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + reading = new Progress("Formatting matrix: ", nseqs * nseqs); + + //lower triangle, so must go to column then formatted row file + if(square == 0){ + int index = 0; + + ofstream outTemp; + string tempFile = filename + ".temp"; + m->openOutputFile(tempFile, outTemp); + + //convert to square column matrix + for(int i=1;i> name; + + if(nameMap == NULL){ list->set(i, name); } + else { nameMap->get(name); } + + + for(int j=0;jcontrol_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close(); delete reading; return 0; } + + fileHandle >> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + outTemp << i << '\t' << j << '\t' << distance << endl; + outTemp << j << '\t' << i << '\t' << distance << endl; + } + index++; + reading->update(index); + } + } + outTemp.close(); + + //format from square column to rowFormatted + //sort file by first column so the distances for each row are together + string outfile = m->getRootName(tempFile) + "sorted.dist.temp"; + + //use the unix sort +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + string command = "sort -n " + tempFile + " -o " + outfile; + system(command.c_str()); +#else //sort using windows sort + string command = "sort " + tempFile + " /O " + outfile; + system(command.c_str()); +#endif + + if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile); delete reading; return 0; } + + //output to new file distance for each row and save positions in file where new row begins + ifstream in; + m->openInputFile(outfile, in); + + distFile = outfile + ".rowFormatted"; + m->openOutputFile(distFile, out); + + rowPos.resize(nseqs, -1); + int currentRow; + int first, second; + float dist; + map rowMap; + map::iterator itRow; + + //get first currentRow + in >> first; + currentRow = first; + + string firstString = toString(first); + for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); } + + while(!in.eof()) { + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile); delete reading; return 0; } + + in >> first >> second >> dist; m->gobble(in); + + if (first != currentRow) { + //save position in file of each new row + rowPos[currentRow] = out.tellp(); + + out << currentRow << '\t' << rowMap.size() << '\t'; + + for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) { + out << itRow->first << '\t' << itRow->second << '\t'; + } + out << endl; + + currentRow = first; + rowMap.clear(); + + //save row you just read + rowMap[second] = dist; + + index++; + reading->update(index); + }else{ + rowMap[second] = dist; + } + } + + //print last Row + //save position in file of each new row + rowPos[currentRow] = out.tellp(); + + out << currentRow << '\t' << rowMap.size() << '\t'; + + for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) { + out << itRow->first << '\t' << itRow->second << '\t'; + } + out << endl; + + in.close(); + out.close(); + + m->mothurRemove(tempFile); + m->mothurRemove(outfile); + + if (m->control_pressed) { m->mothurRemove(distFile); delete reading; return 0; } + + } + else{ //square matrix convert directly to formatted row file + int index = nseqs; + map rowMap; + map::iterator itRow; + rowPos.resize(nseqs, -1); + + for(int i=0;i> name; + + if(nameMap == NULL){ list->set(i, name); } + else { nameMap->get(name); } + + for(int j=0;jcontrol_pressed) { fileHandle.close(); out.close(); m->mothurRemove(distFile); delete reading; return 0; } + + fileHandle >> distance; + + if (distance == -1) { distance = 1000000; } + + if((distance < cutoff) && (j != i)){ + rowMap[j] = distance; + } + index++; + reading->update(index); + } + + m->gobble(fileHandle); + + //save position in file of each new row + rowPos[i] = out.tellp(); + + //output row to file + out << i << '\t' << rowMap.size() << '\t'; + for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) { + out << itRow->first << '\t' << itRow->second << '\t'; + } + out << endl; + + //clear map for new row's info + rowMap.clear(); + } + } + reading->finish(); + delete reading; + fileHandle.close(); + out.close(); + + if (m->control_pressed) { m->mothurRemove(distFile); return 0; } + + list->setLabel("0"); + + return 1; + + + } + catch(exception& e) { + m->errorOut(e, "FormatPhylipMatrix", "read"); + exit(1); + } +} + /***********************************************************************/ FormatPhylipMatrix::~FormatPhylipMatrix(){} /***********************************************************************/