5 * Created by westcott on 1/13/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "formatphylip.h"
11 #include "progress.hpp"
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15 openInputFile(filename, fileHandle);
17 /***********************************************************************/
19 void FormatPhylipMatrix::read(NameAssignment* nameMap){
27 fileHandle >> nseqs >> name;
29 list = new ListVector(nseqs);
33 while((d=fileHandle.get()) != EOF){
35 if(isalnum(d)){ //you are square
37 fileHandle.close(); //reset file
39 //open and get through numSeqs, code below formats rest of file
40 openInputFile(filename, fileHandle);
41 fileHandle >> nseqs; gobble(fileHandle);
43 distFile = filename + ".rowFormatted";
44 openOutputFile(distFile, out);
54 reading = new Progress("Formatting matrix: ", nseqs * nseqs);
56 //lower triangle, so must go to column then formatted row file
61 string tempFile = filename + ".temp";
62 openOutputFile(tempFile, outTemp);
64 //convert to square column matrix
65 for(int i=1;i<nseqs;i++){
71 fileHandle >> distance;
73 if (distance == -1) { distance = 1000000; }
75 if(distance < cutoff){
76 outTemp << i << '\t' << j << '\t' << distance << endl;
77 outTemp << j << '\t' << i << '\t' << distance << endl;
80 reading->update(index);
85 //format from square column to rowFormatted
86 //sort file by first column so the distances for each row are together
87 string outfile = getRootName(tempFile) + "sorted.dist.temp";
90 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
91 string command = "sort -n " + tempFile + " -o " + outfile;
92 system(command.c_str());
93 #else //sort using windows sort
94 string command = "sort " + tempFile + " /O " + outfile;
95 system(command.c_str());
99 //output to new file distance for each row and save positions in file where new row begins
101 openInputFile(outfile, in);
103 distFile = outfile + ".rowFormatted";
104 openOutputFile(distFile, out);
106 rowPos.resize(nseqs, -1);
110 map<int, float> rowMap;
111 map<int, float>::iterator itRow;
113 //get first currentRow
117 string firstString = toString(first);
118 for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); }
121 in >> first >> second >> dist; gobble(in);
123 if (first != currentRow) {
124 //save position in file of each new row
125 rowPos[currentRow] = out.tellp();
127 out << currentRow << '\t' << rowMap.size() << '\t';
129 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
130 out << itRow->first << '\t' << itRow->second << '\t';
137 //save row you just read
138 rowMap[second] = dist;
141 reading->update(index);
143 rowMap[second] = dist;
148 //save position in file of each new row
149 rowPos[currentRow] = out.tellp();
151 out << currentRow << '\t' << rowMap.size() << '\t';
153 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
154 out << itRow->first << '\t' << itRow->second << '\t';
161 remove(tempFile.c_str());
162 remove(outfile.c_str());
164 else{ //square matrix convert directly to formatted row file
166 map<int, float> rowMap;
167 map<int, float>::iterator itRow;
170 for(int i=0;i<nseqs;i++){
175 for(int j=0;j<nseqs;j++){
176 fileHandle >> distance;
178 if (distance == -1) { distance = 1000000; }
180 if((distance < cutoff) && (j != i)){
181 rowMap[j] = distance;
184 reading->update(index);
187 //save position in file of each new row
188 rowPos[i] = out.tellp();
191 out << i << '\t' << rowMap.size() << '\t';
192 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
193 out << itRow->first << '\t' << itRow->second << '\t';
197 //clear map for new row's info
209 catch(exception& e) {
210 errorOut(e, "FormatPhylipMatrix", "read");
214 /***********************************************************************/
215 FormatPhylipMatrix::~FormatPhylipMatrix(){}
216 /***********************************************************************/