5 * Created by westcott on 1/13/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "formatphylip.h"
11 #include "progress.hpp"
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15 m->openInputFile(filename, fileHandle);
17 /***********************************************************************/
19 int FormatPhylipMatrix::read(NameAssignment* nameMap){
27 fileHandle >> nseqs >> name;
29 list = new ListVector(nseqs);
33 while((d=fileHandle.get()) != EOF){
35 if(isalnum(d)){ //you are square
37 fileHandle.close(); //reset file
39 //open and get through numSeqs, code below formats rest of file
40 m->openInputFile(filename, fileHandle);
41 fileHandle >> nseqs; m->gobble(fileHandle);
43 distFile = filename + ".rowFormatted";
44 m->openOutputFile(distFile, out);
54 reading = new Progress("Formatting matrix: ", nseqs * nseqs);
56 //lower triangle, so must go to column then formatted row file
61 string tempFile = filename + ".temp";
62 m->openOutputFile(tempFile, outTemp);
64 //convert to square column matrix
65 for(int i=1;i<nseqs;i++){
73 if (m->control_pressed) { outTemp.close(); remove(tempFile.c_str()); fileHandle.close(); delete reading; return 0; }
75 fileHandle >> distance;
77 if (distance == -1) { distance = 1000000; }
79 if(distance < cutoff){
80 outTemp << i << '\t' << j << '\t' << distance << endl;
81 outTemp << j << '\t' << i << '\t' << distance << endl;
84 reading->update(index);
89 //format from square column to rowFormatted
90 //sort file by first column so the distances for each row are together
91 string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
94 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
95 string command = "sort -n " + tempFile + " -o " + outfile;
96 system(command.c_str());
97 #else //sort using windows sort
98 string command = "sort " + tempFile + " /O " + outfile;
99 system(command.c_str());
102 if (m->control_pressed) { remove(tempFile.c_str()); remove(outfile.c_str()); delete reading; return 0; }
104 //output to new file distance for each row and save positions in file where new row begins
106 m->openInputFile(outfile, in);
108 distFile = outfile + ".rowFormatted";
109 m->openOutputFile(distFile, out);
111 rowPos.resize(nseqs, -1);
115 map<int, float> rowMap;
116 map<int, float>::iterator itRow;
118 //get first currentRow
122 string firstString = toString(first);
123 for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); }
126 if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); remove(distFile.c_str()); remove(outfile.c_str()); delete reading; return 0; }
128 in >> first >> second >> dist; m->gobble(in);
130 if (first != currentRow) {
131 //save position in file of each new row
132 rowPos[currentRow] = out.tellp();
134 out << currentRow << '\t' << rowMap.size() << '\t';
136 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
137 out << itRow->first << '\t' << itRow->second << '\t';
144 //save row you just read
145 rowMap[second] = dist;
148 reading->update(index);
150 rowMap[second] = dist;
155 //save position in file of each new row
156 rowPos[currentRow] = out.tellp();
158 out << currentRow << '\t' << rowMap.size() << '\t';
160 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
161 out << itRow->first << '\t' << itRow->second << '\t';
168 remove(tempFile.c_str());
169 remove(outfile.c_str());
171 if (m->control_pressed) { remove(distFile.c_str()); delete reading; return 0; }
174 else{ //square matrix convert directly to formatted row file
176 map<int, float> rowMap;
177 map<int, float>::iterator itRow;
178 rowPos.resize(nseqs, -1);
180 for(int i=0;i<nseqs;i++){
185 for(int j=0;j<nseqs;j++){
186 if (m->control_pressed) { fileHandle.close(); out.close(); remove(distFile.c_str()); delete reading; return 0; }
188 fileHandle >> distance;
190 if (distance == -1) { distance = 1000000; }
192 if((distance < cutoff) && (j != i)){
193 rowMap[j] = distance;
196 reading->update(index);
199 m->gobble(fileHandle);
201 //save position in file of each new row
202 rowPos[i] = out.tellp();
205 out << i << '\t' << rowMap.size() << '\t';
206 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
207 out << itRow->first << '\t' << itRow->second << '\t';
211 //clear map for new row's info
220 if (m->control_pressed) { remove(distFile.c_str()); return 0; }
228 catch(exception& e) {
229 m->errorOut(e, "FormatPhylipMatrix", "read");
233 /***********************************************************************/
234 FormatPhylipMatrix::~FormatPhylipMatrix(){}
235 /***********************************************************************/