]> git.donarmstrong.com Git - mothur.git/blob - formatphylip.cpp
fixed bugs for 1.8
[mothur.git] / formatphylip.cpp
1 /*
2  *  formatphylip.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatphylip.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15         openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18 //not using nameMap
19 void FormatPhylipMatrix::read(NameAssignment* nameMap){
20         try {
21         
22                         float distance;
23                         int square, nseqs;
24                         string name;
25                         ofstream out;
26                         
27                         fileHandle >> nseqs >> name;
28                                                 
29                         list = new ListVector(nseqs);
30                         list->set(0, name);
31                         
32                         char d;
33                         while((d=fileHandle.get()) != EOF){
34                 
35                                 if(isalnum(d)){  //you are square
36                                         square = 1;
37                                         fileHandle.close();  //reset file
38                                         
39                                         //open and get through numSeqs, code below formats rest of file
40                                         openInputFile(filename, fileHandle);
41                                         fileHandle >> nseqs; gobble(fileHandle);
42                                         
43                                         distFile = filename + ".rowFormatted";
44                                         openOutputFile(distFile, out);
45                                         break;
46                                 }
47                                 if(d == '\n'){
48                                         square = 0;
49                                         break;
50                                 }
51                         }
52                         
53                         Progress* reading;
54                         reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
55                         
56                         //lower triangle, so must go to column then formatted row file
57                         if(square == 0){
58                                 int  index = 0;
59                                 
60                                 ofstream outTemp;
61                                 string tempFile = filename + ".temp";
62                                 openOutputFile(tempFile, outTemp);
63                 
64                                 //convert to square column matrix
65                                 for(int i=1;i<nseqs;i++){
66                                         fileHandle >> name;
67                                         
68                                         list->set(i, name);
69                                         
70                                         for(int j=0;j<i;j++){
71                                                 fileHandle >> distance;
72                                                 
73                                                 if (distance == -1) { distance = 1000000; }
74                                                 
75                                                 if(distance < cutoff){
76                                                         outTemp << i << '\t' << j << '\t' << distance << endl;
77                                                         outTemp << j << '\t' << i << '\t' << distance << endl;
78                                                 }
79                                                 index++;
80                                                 reading->update(index);
81                                         }
82                                 }
83                                 outTemp.close();
84                                 
85                                 //format from square column to rowFormatted
86                                 //sort file by first column so the distances for each row are together
87                                 string outfile = getRootName(tempFile) + "sorted.dist.temp";
88                                 
89                                 //use the unix sort 
90                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
91                                         string command = "sort -n " + tempFile + " -o " + outfile;
92                                         system(command.c_str());
93                                 #else //sort using windows sort
94                                         string command = "sort " + tempFile + " /O " + outfile;
95                                         system(command.c_str());
96                                 #endif
97                                 
98
99                                 //output to new file distance for each row and save positions in file where new row begins
100                                 ifstream in;
101                                 openInputFile(outfile, in);
102                                 
103                                 distFile = outfile + ".rowFormatted";
104                                 openOutputFile(distFile, out);
105                                 
106                                 rowPos.resize(nseqs, -1);
107                                 int currentRow;
108                                 int first, second;
109                                 float dist;
110                                 map<int, float> rowMap;
111                                 map<int, float>::iterator itRow;
112                                 
113                                 //get first currentRow
114                                 in >> first;
115                                 currentRow = first;
116                                 
117                                 string firstString = toString(first);
118                                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
119                                 
120                                 while(!in.eof()) {
121                                         in >> first >> second >> dist; gobble(in);
122                                         
123                                         if (first != currentRow) {
124                                                 //save position in file of each new row
125                                                 rowPos[currentRow] = out.tellp();
126                                                 
127                                                 out << currentRow << '\t' << rowMap.size() << '\t';
128                                                 
129                                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
130                                                         out << itRow->first << '\t' << itRow->second << '\t';
131                                                 }
132                                                 out << endl;
133                                                 
134                                                 currentRow = first;
135                                                 rowMap.clear();
136                                                 
137                                                 //save row you just read
138                                                 rowMap[second] = dist;
139                                                 
140                                                 index++;
141                                                 reading->update(index);
142                                         }else{
143                                                 rowMap[second] = dist;
144                                         }
145                                 }
146                                 
147                                 //print last Row
148                                 //save position in file of each new row
149                                 rowPos[currentRow] = out.tellp();
150                                 
151                                 out << currentRow << '\t' << rowMap.size() << '\t';
152                                 
153                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
154                                         out << itRow->first << '\t' << itRow->second << '\t';
155                                 }
156                                 out << endl;
157                                 
158                                 in.close();
159                                 out.close();
160                                 
161                                 remove(tempFile.c_str());
162                                 remove(outfile.c_str());
163                         }
164                         else{ //square matrix convert directly to formatted row file
165                                 int index = nseqs;
166                                 map<int, float> rowMap;
167                                 map<int, float>::iterator itRow;
168                                 rowPos.resize(nseqs, -1);
169                 
170                                 for(int i=0;i<nseqs;i++){
171                                         fileHandle >> name;                
172                                                                         
173                                         list->set(i, name);
174                                         
175                                         for(int j=0;j<nseqs;j++){
176                                                 fileHandle >> distance;
177                                         
178                                                 if (distance == -1) { distance = 1000000; }
179                                                 
180                                                 if((distance < cutoff) && (j != i)){
181                                                         rowMap[j] = distance;
182                                                 }
183                                                 index++;
184                                                 reading->update(index);
185                                         }
186                                         
187                                         gobble(fileHandle);
188                         
189                                         //save position in file of each new row
190                                         rowPos[i] = out.tellp();
191
192                                         //output row to file
193                                         out << i << '\t' << rowMap.size() << '\t';
194                                         for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
195                                                 out << itRow->first << '\t' << itRow->second << '\t';
196                                         }
197                                         out << endl;
198                                         
199                                         //clear map for new row's info
200                                         rowMap.clear();
201                                 }
202                         }
203                         reading->finish();
204                         delete reading;
205                         
206                         list->setLabel("0");
207                         fileHandle.close();
208                         out.close();
209                         
210         }
211         catch(exception& e) {
212                errorOut(e, "FormatPhylipMatrix", "read");
213                 exit(1);
214         }
215 }
216 /***********************************************************************/
217 FormatPhylipMatrix::~FormatPhylipMatrix(){}
218 /***********************************************************************/
219
220