5 * Created by westcott on 1/13/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "formatphylip.h"
11 #include "progress.hpp"
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15 m->openInputFile(filename, fileHandle);
17 /***********************************************************************/
19 int FormatPhylipMatrix::read(NameAssignment* nameMap){
28 fileHandle >> numTest >> name;
30 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
31 else { convert(numTest, nseqs); }
34 list = new ListVector(nseqs);
38 list = new ListVector(nameMap->getListVector());
39 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
43 while((d=fileHandle.get()) != EOF){
45 if(isalnum(d)){ //you are square
47 fileHandle.close(); //reset file
49 //open and get through numSeqs, code below formats rest of file
50 m->openInputFile(filename, fileHandle);
51 fileHandle >> nseqs; m->gobble(fileHandle);
53 distFile = filename + ".rowFormatted";
54 m->openOutputFile(distFile, out);
64 reading = new Progress("Formatting matrix: ", nseqs * nseqs);
66 //lower triangle, so must go to column then formatted row file
71 string tempFile = filename + ".temp";
72 m->openOutputFile(tempFile, outTemp);
74 //convert to square column matrix
75 for(int i=1;i<nseqs;i++){
79 if(nameMap == NULL){ list->set(i, name); }
80 else { if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
85 if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close(); delete reading; return 0; }
87 fileHandle >> distance;
89 if (distance == -1) { distance = 1000000; }
91 if(distance < cutoff){
92 outTemp << i << '\t' << j << '\t' << distance << endl;
93 outTemp << j << '\t' << i << '\t' << distance << endl;
96 reading->update(index);
101 //format from square column to rowFormatted
102 //sort file by first column so the distances for each row are together
103 string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
106 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
107 string command = "sort -n " + tempFile + " -o " + outfile;
108 system(command.c_str());
109 #else //sort using windows sort
110 string command = "sort " + tempFile + " /O " + outfile;
111 system(command.c_str());
114 if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile); delete reading; return 0; }
116 //output to new file distance for each row and save positions in file where new row begins
118 m->openInputFile(outfile, in);
120 distFile = outfile + ".rowFormatted";
121 m->openOutputFile(distFile, out);
123 rowPos.resize(nseqs, -1);
127 map<int, float> rowMap;
128 map<int, float>::iterator itRow;
130 //get first currentRow
134 string firstString = toString(first);
135 for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); }
138 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile); delete reading; return 0; }
140 in >> first >> second >> dist; m->gobble(in);
142 if (first != currentRow) {
143 //save position in file of each new row
144 rowPos[currentRow] = out.tellp();
146 out << currentRow << '\t' << rowMap.size() << '\t';
148 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
149 out << itRow->first << '\t' << itRow->second << '\t';
156 //save row you just read
157 rowMap[second] = dist;
160 reading->update(index);
162 rowMap[second] = dist;
167 //save position in file of each new row
168 rowPos[currentRow] = out.tellp();
170 out << currentRow << '\t' << rowMap.size() << '\t';
172 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
173 out << itRow->first << '\t' << itRow->second << '\t';
180 m->mothurRemove(tempFile);
181 m->mothurRemove(outfile);
183 if (m->control_pressed) { m->mothurRemove(distFile); delete reading; return 0; }
186 else{ //square matrix convert directly to formatted row file
188 map<int, float> rowMap;
189 map<int, float>::iterator itRow;
190 rowPos.resize(nseqs, -1);
192 for(int i=0;i<nseqs;i++){
195 if(nameMap == NULL){ list->set(i, name); }
196 else { if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
199 for(int j=0;j<nseqs;j++){
200 if (m->control_pressed) { fileHandle.close(); out.close(); m->mothurRemove(distFile); delete reading; return 0; }
202 fileHandle >> distance;
204 if (distance == -1) { distance = 1000000; }
206 if((distance < cutoff) && (j != i)){
207 rowMap[j] = distance;
210 reading->update(index);
213 m->gobble(fileHandle);
215 //save position in file of each new row
216 rowPos[i] = out.tellp();
219 out << i << '\t' << rowMap.size() << '\t';
220 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
221 out << itRow->first << '\t' << itRow->second << '\t';
225 //clear map for new row's info
234 if (m->control_pressed) { m->mothurRemove(distFile); return 0; }
242 catch(exception& e) {
243 m->errorOut(e, "FormatPhylipMatrix", "read");
247 /***********************************************************************/
249 int FormatPhylipMatrix::read(CountTable* nameMap){
258 fileHandle >> numTest >> name;
260 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
261 else { convert(numTest, nseqs); }
264 list = new ListVector(nseqs);
268 list = new ListVector(nameMap->getListVector());
273 while((d=fileHandle.get()) != EOF){
275 if(isalnum(d)){ //you are square
277 fileHandle.close(); //reset file
279 //open and get through numSeqs, code below formats rest of file
280 m->openInputFile(filename, fileHandle);
281 fileHandle >> nseqs; m->gobble(fileHandle);
283 distFile = filename + ".rowFormatted";
284 m->openOutputFile(distFile, out);
294 reading = new Progress("Formatting matrix: ", nseqs * nseqs);
296 //lower triangle, so must go to column then formatted row file
301 string tempFile = filename + ".temp";
302 m->openOutputFile(tempFile, outTemp);
304 //convert to square column matrix
305 for(int i=1;i<nseqs;i++){
309 if(nameMap == NULL){ list->set(i, name); }
310 else { nameMap->get(name); }
313 for(int j=0;j<i;j++){
315 if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close(); delete reading; return 0; }
317 fileHandle >> distance;
319 if (distance == -1) { distance = 1000000; }
321 if(distance < cutoff){
322 outTemp << i << '\t' << j << '\t' << distance << endl;
323 outTemp << j << '\t' << i << '\t' << distance << endl;
326 reading->update(index);
331 //format from square column to rowFormatted
332 //sort file by first column so the distances for each row are together
333 string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
336 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
337 string command = "sort -n " + tempFile + " -o " + outfile;
338 system(command.c_str());
339 #else //sort using windows sort
340 string command = "sort " + tempFile + " /O " + outfile;
341 system(command.c_str());
344 if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile); delete reading; return 0; }
346 //output to new file distance for each row and save positions in file where new row begins
348 m->openInputFile(outfile, in);
350 distFile = outfile + ".rowFormatted";
351 m->openOutputFile(distFile, out);
353 rowPos.resize(nseqs, -1);
357 map<int, float> rowMap;
358 map<int, float>::iterator itRow;
360 //get first currentRow
364 string firstString = toString(first);
365 for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); }
368 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile); delete reading; return 0; }
370 in >> first >> second >> dist; m->gobble(in);
372 if (first != currentRow) {
373 //save position in file of each new row
374 rowPos[currentRow] = out.tellp();
376 out << currentRow << '\t' << rowMap.size() << '\t';
378 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
379 out << itRow->first << '\t' << itRow->second << '\t';
386 //save row you just read
387 rowMap[second] = dist;
390 reading->update(index);
392 rowMap[second] = dist;
397 //save position in file of each new row
398 rowPos[currentRow] = out.tellp();
400 out << currentRow << '\t' << rowMap.size() << '\t';
402 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
403 out << itRow->first << '\t' << itRow->second << '\t';
410 m->mothurRemove(tempFile);
411 m->mothurRemove(outfile);
413 if (m->control_pressed) { m->mothurRemove(distFile); delete reading; return 0; }
416 else{ //square matrix convert directly to formatted row file
418 map<int, float> rowMap;
419 map<int, float>::iterator itRow;
420 rowPos.resize(nseqs, -1);
422 for(int i=0;i<nseqs;i++){
425 if(nameMap == NULL){ list->set(i, name); }
426 else { nameMap->get(name); }
428 for(int j=0;j<nseqs;j++){
429 if (m->control_pressed) { fileHandle.close(); out.close(); m->mothurRemove(distFile); delete reading; return 0; }
431 fileHandle >> distance;
433 if (distance == -1) { distance = 1000000; }
435 if((distance < cutoff) && (j != i)){
436 rowMap[j] = distance;
439 reading->update(index);
442 m->gobble(fileHandle);
444 //save position in file of each new row
445 rowPos[i] = out.tellp();
448 out << i << '\t' << rowMap.size() << '\t';
449 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
450 out << itRow->first << '\t' << itRow->second << '\t';
454 //clear map for new row's info
463 if (m->control_pressed) { m->mothurRemove(distFile); return 0; }
471 catch(exception& e) {
472 m->errorOut(e, "FormatPhylipMatrix", "read");
477 /***********************************************************************/
478 FormatPhylipMatrix::~FormatPhylipMatrix(){}
479 /***********************************************************************/