]> git.donarmstrong.com Git - mothur.git/blob - formatphylip.cpp
206df7edfbd915f00947c46d6de07f44fc021c38
[mothur.git] / formatphylip.cpp
1 /*
2  *  formatphylip.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatphylip.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15         openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18 //not using nameMap
19 int FormatPhylipMatrix::read(NameAssignment* nameMap){
20         try {
21         
22                         float distance;
23                         int square, nseqs;
24                         string name;
25                         ofstream out;
26                         
27                         fileHandle >> nseqs >> name;
28                                                 
29                         list = new ListVector(nseqs);
30                         list->set(0, name);
31                         
32                         char d;
33                         while((d=fileHandle.get()) != EOF){
34                 
35                                 if(isalnum(d)){  //you are square
36                                         square = 1;
37                                         fileHandle.close();  //reset file
38                                         
39                                         //open and get through numSeqs, code below formats rest of file
40                                         openInputFile(filename, fileHandle);
41                                         fileHandle >> nseqs; gobble(fileHandle);
42                                         
43                                         distFile = filename + ".rowFormatted";
44                                         openOutputFile(distFile, out);
45                                         break;
46                                 }
47                                 if(d == '\n'){
48                                         square = 0;
49                                         break;
50                                 }
51                         }
52                         
53                         Progress* reading;
54                         reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
55                         
56                         //lower triangle, so must go to column then formatted row file
57                         if(square == 0){
58                                 int  index = 0;
59                                 
60                                 ofstream outTemp;
61                                 string tempFile = filename + ".temp";
62                                 openOutputFile(tempFile, outTemp);
63                 
64                                 //convert to square column matrix
65                                 for(int i=1;i<nseqs;i++){
66                                 
67                                         fileHandle >> name;
68                                         
69                                         list->set(i, name);
70                                         
71                                         for(int j=0;j<i;j++){
72                                         
73                                                 if (m->control_pressed) { outTemp.close(); remove(tempFile.c_str()); fileHandle.close();  delete reading; return 0; }
74                                                                                         
75                                                 fileHandle >> distance;
76                                                 
77                                                 if (distance == -1) { distance = 1000000; }
78                                                 
79                                                 if(distance < cutoff){
80                                                         outTemp << i << '\t' << j << '\t' << distance << endl;
81                                                         outTemp << j << '\t' << i << '\t' << distance << endl;
82                                                 }
83                                                 index++;
84                                                 reading->update(index);
85                                         }
86                                 }
87                                 outTemp.close();
88                                 
89                                 //format from square column to rowFormatted
90                                 //sort file by first column so the distances for each row are together
91                                 string outfile = getRootName(tempFile) + "sorted.dist.temp";
92                                 
93                                 //use the unix sort 
94                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
95                                         string command = "sort -n " + tempFile + " -o " + outfile;
96                                         system(command.c_str());
97                                 #else //sort using windows sort
98                                         string command = "sort " + tempFile + " /O " + outfile;
99                                         system(command.c_str());
100                                 #endif
101                                 
102                                 if (m->control_pressed) { remove(tempFile.c_str()); remove(outfile.c_str());  delete reading; return 0; }
103
104                                 //output to new file distance for each row and save positions in file where new row begins
105                                 ifstream in;
106                                 openInputFile(outfile, in);
107                                 
108                                 distFile = outfile + ".rowFormatted";
109                                 openOutputFile(distFile, out);
110                                 
111                                 rowPos.resize(nseqs, -1);
112                                 int currentRow;
113                                 int first, second;
114                                 float dist;
115                                 map<int, float> rowMap;
116                                 map<int, float>::iterator itRow;
117                                 
118                                 //get first currentRow
119                                 in >> first;
120                                 currentRow = first;
121                                 
122                                 string firstString = toString(first);
123                                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
124                                 
125                                 while(!in.eof()) {
126                                         if (m->control_pressed) { in.close(); out.close(); remove(tempFile.c_str()); remove(distFile.c_str()); remove(outfile.c_str());  delete reading; return 0; }
127
128                                         in >> first >> second >> dist; gobble(in);
129                                         
130                                         if (first != currentRow) {
131                                                 //save position in file of each new row
132                                                 rowPos[currentRow] = out.tellp();
133                                                 
134                                                 out << currentRow << '\t' << rowMap.size() << '\t';
135                                                 
136                                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
137                                                         out << itRow->first << '\t' << itRow->second << '\t';
138                                                 }
139                                                 out << endl;
140                                                 
141                                                 currentRow = first;
142                                                 rowMap.clear();
143                                                 
144                                                 //save row you just read
145                                                 rowMap[second] = dist;
146                                                 
147                                                 index++;
148                                                 reading->update(index);
149                                         }else{
150                                                 rowMap[second] = dist;
151                                         }
152                                 }
153                                 
154                                 //print last Row
155                                 //save position in file of each new row
156                                 rowPos[currentRow] = out.tellp();
157                                 
158                                 out << currentRow << '\t' << rowMap.size() << '\t';
159                                 
160                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
161                                         out << itRow->first << '\t' << itRow->second << '\t';
162                                 }
163                                 out << endl;
164                                 
165                                 in.close();
166                                 out.close();
167                                 
168                                 remove(tempFile.c_str());
169                                 remove(outfile.c_str());
170                                 
171                                 if (m->control_pressed) {  remove(distFile.c_str());   delete reading; return 0; }
172
173                         }
174                         else{ //square matrix convert directly to formatted row file
175                                 int index = nseqs;
176                                 map<int, float> rowMap;
177                                 map<int, float>::iterator itRow;
178                                 rowPos.resize(nseqs, -1);
179                 
180                                 for(int i=0;i<nseqs;i++){
181                                         fileHandle >> name;                
182                                                                         
183                                         list->set(i, name);
184                                         
185                                         for(int j=0;j<nseqs;j++){
186                                                 if (m->control_pressed) {  fileHandle.close(); out.close(); remove(distFile.c_str());   delete reading; return 0; }
187                                                 
188                                                 fileHandle >> distance;
189                                         
190                                                 if (distance == -1) { distance = 1000000; }
191                                                 
192                                                 if((distance < cutoff) && (j != i)){
193                                                         rowMap[j] = distance;
194                                                 }
195                                                 index++;
196                                                 reading->update(index);
197                                         }
198                                         
199                                         gobble(fileHandle);
200                         
201                                         //save position in file of each new row
202                                         rowPos[i] = out.tellp();
203
204                                         //output row to file
205                                         out << i << '\t' << rowMap.size() << '\t';
206                                         for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
207                                                 out << itRow->first << '\t' << itRow->second << '\t';
208                                         }
209                                         out << endl;
210                                         
211                                         //clear map for new row's info
212                                         rowMap.clear();
213                                 }
214                         }
215                         reading->finish();
216                         delete reading;
217                         fileHandle.close();
218                         out.close();
219                         
220                         if (m->control_pressed) { remove(distFile.c_str());  return 0; }
221                         
222                         list->setLabel("0");
223                         
224                         return 1;
225                         
226                         
227         }
228         catch(exception& e) {
229                m->errorOut(e, "FormatPhylipMatrix", "read");
230                 exit(1);
231         }
232 }
233 /***********************************************************************/
234 FormatPhylipMatrix::~FormatPhylipMatrix(){}
235 /***********************************************************************/
236
237