X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=formatcolumn.cpp;h=109c09ccb7c99af1f82e976078509a9239ce326b;hp=e68d85d71c32df15ca56cbee916c6c9dd1072df1;hb=a8e2df1b96a57f5f29576b08361b86a96a8eff4f;hpb=e1cf60b82a48d4d96e3a696a2d221c56cfb0b298 diff --git a/formatcolumn.cpp b/formatcolumn.cpp index e68d85d..109c09c 100644 --- a/formatcolumn.cpp +++ b/formatcolumn.cpp @@ -12,11 +12,11 @@ /***********************************************************************/ FormatColumnMatrix::FormatColumnMatrix(string df) : filename(df){ - openInputFile(filename, fileHandle); + m->openInputFile(filename, fileHandle); } /***********************************************************************/ -void FormatColumnMatrix::read(NameAssignment* nameMap){ +int FormatColumnMatrix::read(NameAssignment* nameMap){ try { string firstName, secondName; @@ -35,21 +35,22 @@ void FormatColumnMatrix::read(NameAssignment* nameMap){ ofstream out; string tempOutFile = filename + ".temp"; - openOutputFile(tempOutFile, out); + m->openOutputFile(tempOutFile, out); while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... + if (m->control_pressed) { out.close(); m->mothurRemove(tempOutFile); fileHandle.close(); delete reading; return 0; } + fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); } - if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); } + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && itA != itB){ - + + if((distance < cutoff) && (itA != itB)){ if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... refRow = itA->second; refCol = itB->second; @@ -67,21 +68,21 @@ void FormatColumnMatrix::read(NameAssignment* nameMap){ reading->update(itA->second * nseqs / 2); } - gobble(fileHandle); + m->gobble(fileHandle); } out.close(); fileHandle.close(); - + string squareFile; if(lt == 0){ // oops, it was square squareFile = filename; }else{ squareFile = tempOutFile; } //sort file by first column so the distances for each row are together - string outfile = getRootName(squareFile) + "sorted.dist.temp"; + string outfile = m->getRootName(squareFile) + "sorted.dist.temp"; //use the unix sort - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) string command = "sort -n " + squareFile + " -o " + outfile; system(command.c_str()); #else //sort using windows sort @@ -89,13 +90,14 @@ void FormatColumnMatrix::read(NameAssignment* nameMap){ system(command.c_str()); #endif + if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; } //output to new file distance for each row and save positions in file where new row begins ifstream in; - openInputFile(outfile, in); + m->openInputFile(outfile, in); distFile = outfile + ".rowFormatted"; - openOutputFile(distFile, out); + m->openOutputFile(distFile, out); rowPos.resize(nseqs, -1); int currentRow; @@ -112,7 +114,10 @@ void FormatColumnMatrix::read(NameAssignment* nameMap){ for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); } while(!in.eof()) { - in >> first >> second >> dist; gobble(in); + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; } + + in >> first >> second >> dist; m->gobble(in); if (first != currentRow) { //save position in file of each new row @@ -129,10 +134,13 @@ void FormatColumnMatrix::read(NameAssignment* nameMap){ rowMap.clear(); //save row you just read - rowMap[second] = dist; - + if (dist < cutoff) { + rowMap[second] = dist; + } }else{ - rowMap[second] = dist; + if (dist < cutoff) { + rowMap[second] = dist; + } } } @@ -151,16 +159,186 @@ void FormatColumnMatrix::read(NameAssignment* nameMap){ in.close(); out.close(); + if (m->control_pressed) { m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; } - remove(tempOutFile.c_str()); - remove(outfile.c_str()); + m->mothurRemove(tempOutFile); + m->mothurRemove(outfile); reading->finish(); + + delete reading; list->setLabel("0"); + + if (m->control_pressed) { m->mothurRemove(distFile); return 0; } + return 1; + + } + catch(exception& e) { + m->errorOut(e, "FormatColumnMatrix", "read"); + exit(1); + } +} +/***********************************************************************/ + +int FormatColumnMatrix::read(CountTable* nameMap){ + try { + + string firstName, secondName; + float distance; + int nseqs = nameMap->size(); + + list = new ListVector(nameMap->getListVector()); + + Progress* reading = new Progress("Formatting matrix: ", nseqs * nseqs); + + int lt = 1; + int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose + int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix + + //need to see if this is a square or a triangular matrix... + + ofstream out; + string tempOutFile = filename + ".temp"; + m->openOutputFile(tempOutFile, out); + + while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... + + if (m->control_pressed) { out.close(); m->mothurRemove(tempOutFile); fileHandle.close(); delete reading; return 0; } + + fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance + + int itA = nameMap->get(firstName); + int itB = nameMap->get(secondName); + + if (distance == -1) { distance = 1000000; } + + if((distance < cutoff) && (itA != itB)){ + if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... + refRow = itA; + refCol = itB; + + //making it square + out << itA << '\t' << itB << '\t' << distance << endl; + out << itB << '\t' << itA << '\t' << distance << endl; + } + else if(refRow == itA && refCol == itB){ lt = 0; } //you are square + else if(refRow == itB && refCol == itA){ lt = 0; } //you are square + else{ //making it square + out << itA << '\t' << itB << '\t' << distance << endl; + out << itB << '\t' << itA << '\t' << distance << endl; + } + + reading->update(itA * nseqs / 2); + } + m->gobble(fileHandle); + } + out.close(); + fileHandle.close(); + + string squareFile; + if(lt == 0){ // oops, it was square + squareFile = filename; + }else{ squareFile = tempOutFile; } + + //sort file by first column so the distances for each row are together + string outfile = m->getRootName(squareFile) + "sorted.dist.temp"; + + //use the unix sort +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + string command = "sort -n " + squareFile + " -o " + outfile; + system(command.c_str()); +#else //sort using windows sort + string command = "sort " + squareFile + " /O " + outfile; + system(command.c_str()); +#endif + + if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; } + + //output to new file distance for each row and save positions in file where new row begins + ifstream in; + m->openInputFile(outfile, in); + + distFile = outfile + ".rowFormatted"; + m->openOutputFile(distFile, out); + + rowPos.resize(nseqs, -1); + int currentRow; + int first, second; + float dist; + map rowMap; + map::iterator itRow; + + //get first currentRow + in >> first; + currentRow = first; + + string firstString = toString(first); + for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); } + + while(!in.eof()) { + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; } + + in >> first >> second >> dist; m->gobble(in); + + if (first != currentRow) { + //save position in file of each new row + rowPos[currentRow] = out.tellp(); + + out << currentRow << '\t' << rowMap.size() << '\t'; + + for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) { + out << itRow->first << '\t' << itRow->second << '\t'; + } + out << endl; + + currentRow = first; + rowMap.clear(); + + //save row you just read + if (dist < cutoff) { + rowMap[second] = dist; + } + }else{ + if (dist < cutoff) { + rowMap[second] = dist; + } + } + } + + //print last Row + //save position in file of each new row + rowPos[currentRow] = out.tellp(); + + out << currentRow << '\t' << rowMap.size() << '\t'; + + for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) { + out << itRow->first << '\t' << itRow->second << '\t'; + } + out << endl; + + + in.close(); + out.close(); + + if (m->control_pressed) { m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; } + + m->mothurRemove(tempOutFile); + m->mothurRemove(outfile); + + reading->finish(); + + delete reading; + list->setLabel("0"); + + if (m->control_pressed) { m->mothurRemove(distFile); return 0; } + + return 1; + } catch(exception& e) { - errorOut(e, "FormatColumnMatrix", "read"); + m->errorOut(e, "FormatColumnMatrix", "read"); exit(1); } }