]> git.donarmstrong.com Git - mothur.git/blob - formatphylip.cpp
fixes while testing 1.33.0
[mothur.git] / formatphylip.cpp
1 /*
2  *  formatphylip.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatphylip.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15         m->openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18 //not using nameMap
19 int FormatPhylipMatrix::read(NameAssignment* nameMap){
20         try {
21         
22                         float distance;
23                         int square, nseqs;
24                         string name;
25                         ofstream out;
26                         
27                         string numTest;
28                         fileHandle >> numTest >> name;
29                         
30                         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
31                         else { convert(numTest, nseqs); }
32                 
33             if(nameMap == NULL){
34                 list = new ListVector(nseqs);
35                 list->set(0, name);
36             }
37             else{
38                 list = new ListVector(nameMap->getListVector());
39                 if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
40             }                   
41                         
42                         char d;
43                         while((d=fileHandle.get()) != EOF){
44                 
45                                 if(isalnum(d)){  //you are square
46                                         square = 1;
47                                         fileHandle.close();  //reset file
48                                         
49                                         //open and get through numSeqs, code below formats rest of file
50                                         m->openInputFile(filename, fileHandle);
51                                         fileHandle >> nseqs; m->gobble(fileHandle);
52                                         
53                                         distFile = filename + ".rowFormatted";
54                                         m->openOutputFile(distFile, out);
55                                         break;
56                                 }
57                                 if(d == '\n'){
58                                         square = 0;
59                                         break;
60                                 }
61                         }
62                         
63                         Progress* reading;
64                         reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
65                         
66                         //lower triangle, so must go to column then formatted row file
67                         if(square == 0){
68                                 int  index = 0;
69                                 
70                                 ofstream outTemp;
71                                 string tempFile = filename + ".temp";
72                                 m->openOutputFile(tempFile, outTemp);
73                 
74                                 //convert to square column matrix
75                                 for(int i=1;i<nseqs;i++){
76                                 
77                                         fileHandle >> name;
78                                         
79                     if(nameMap == NULL){ list->set(i, name); }
80                     else { if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
81                     }
82                                         
83                                         for(int j=0;j<i;j++){
84                                         
85                                                 if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close();  delete reading; return 0; }
86                                                                                         
87                                                 fileHandle >> distance;
88                                                 
89                                                 if (distance == -1) { distance = 1000000; }
90                                                 
91                                                 if(distance < cutoff){
92                                                         outTemp << i << '\t' << j << '\t' << distance << endl;
93                                                         outTemp << j << '\t' << i << '\t' << distance << endl;
94                                                 }
95                                                 index++;
96                                                 reading->update(index);
97                                         }
98                                 }
99                                 outTemp.close();
100                                 
101                                 //format from square column to rowFormatted
102                                 //sort file by first column so the distances for each row are together
103                                 string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
104                                 
105                                 //use the unix sort 
106                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
107                                         string command = "sort -n " + tempFile + " -o " + outfile;
108                                         system(command.c_str());
109                                 #else //sort using windows sort
110                                         string command = "sort " + tempFile + " /O " + outfile;
111                                         system(command.c_str());
112                                 #endif
113                                 
114                                 if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile);  delete reading; return 0; }
115
116                                 //output to new file distance for each row and save positions in file where new row begins
117                                 ifstream in;
118                                 m->openInputFile(outfile, in);
119                                 
120                                 distFile = outfile + ".rowFormatted";
121                                 m->openOutputFile(distFile, out);
122                                 
123                                 rowPos.resize(nseqs, -1);
124                                 int currentRow;
125                                 int first, second;
126                                 float dist;
127                                 map<int, float> rowMap;
128                                 map<int, float>::iterator itRow;
129                                 
130                                 //get first currentRow
131                                 in >> first;
132                                 currentRow = first;
133                                 
134                                 string firstString = toString(first);
135                                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
136                                 
137                                 while(!in.eof()) {
138                                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile);  delete reading; return 0; }
139
140                                         in >> first >> second >> dist; m->gobble(in);
141                                         
142                                         if (first != currentRow) {
143                                                 //save position in file of each new row
144                                                 rowPos[currentRow] = out.tellp();
145                                                 
146                                                 out << currentRow << '\t' << rowMap.size() << '\t';
147                                                 
148                                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
149                                                         out << itRow->first << '\t' << itRow->second << '\t';
150                                                 }
151                                                 out << endl;
152                                                 
153                                                 currentRow = first;
154                                                 rowMap.clear();
155                                                 
156                                                 //save row you just read
157                                                 rowMap[second] = dist;
158                                                 
159                                                 index++;
160                                                 reading->update(index);
161                                         }else{
162                                                 rowMap[second] = dist;
163                                         }
164                                 }
165                                 
166                                 //print last Row
167                                 //save position in file of each new row
168                                 rowPos[currentRow] = out.tellp();
169                                 
170                                 out << currentRow << '\t' << rowMap.size() << '\t';
171                                 
172                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
173                                         out << itRow->first << '\t' << itRow->second << '\t';
174                                 }
175                                 out << endl;
176                                 
177                                 in.close();
178                                 out.close();
179                                 
180                                 m->mothurRemove(tempFile);
181                                 m->mothurRemove(outfile);
182                                 
183                                 if (m->control_pressed) {  m->mothurRemove(distFile);   delete reading; return 0; }
184
185                         }
186                         else{ //square matrix convert directly to formatted row file
187                                 int index = nseqs;
188                                 map<int, float> rowMap;
189                                 map<int, float>::iterator itRow;
190                                 rowPos.resize(nseqs, -1);
191                 
192                                 for(int i=0;i<nseqs;i++){
193                                         fileHandle >> name;                
194                                                                         
195                                         if(nameMap == NULL){ list->set(i, name); }
196                     else { if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
197                     }
198                                         
199                                         for(int j=0;j<nseqs;j++){
200                                                 if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
201                                                 
202                                                 fileHandle >> distance;
203                                         
204                                                 if (distance == -1) { distance = 1000000; }
205                                                 
206                                                 if((distance < cutoff) && (j != i)){
207                                                         rowMap[j] = distance;
208                                                 }
209                                                 index++;
210                                                 reading->update(index);
211                                         }
212                                         
213                                         m->gobble(fileHandle);
214                         
215                                         //save position in file of each new row
216                                         rowPos[i] = out.tellp();
217
218                                         //output row to file
219                                         out << i << '\t' << rowMap.size() << '\t';
220                                         for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
221                                                 out << itRow->first << '\t' << itRow->second << '\t';
222                                         }
223                                         out << endl;
224                                         
225                                         //clear map for new row's info
226                                         rowMap.clear();
227                                 }
228                         }
229                         reading->finish();
230                         delete reading;
231                         fileHandle.close();
232                         out.close();
233                         
234                         if (m->control_pressed) { m->mothurRemove(distFile);  return 0; }
235                         
236                         list->setLabel("0");
237                         
238                         return 1;
239                         
240                         
241         }
242         catch(exception& e) {
243                m->errorOut(e, "FormatPhylipMatrix", "read");
244                 exit(1);
245         }
246 }
247 /***********************************************************************/
248 //not using nameMap
249 int FormatPhylipMatrix::read(CountTable* nameMap){
250         try {
251         
252         float distance;
253         int square, nseqs;
254         string name;
255         ofstream out;
256         
257         string numTest;
258         fileHandle >> numTest >> name;
259         
260         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
261         else { convert(numTest, nseqs); }
262                 
263         if(nameMap == NULL){
264             list = new ListVector(nseqs);
265             list->set(0, name);
266         }
267         else{
268             list = new ListVector(nameMap->getListVector());
269             nameMap->get(name);
270         }                       
271         
272         char d;
273         while((d=fileHandle.get()) != EOF){
274             
275             if(isalnum(d)){  //you are square
276                 square = 1;
277                 fileHandle.close();  //reset file
278                 
279                 //open and get through numSeqs, code below formats rest of file
280                 m->openInputFile(filename, fileHandle);
281                 fileHandle >> nseqs; m->gobble(fileHandle);
282                 
283                 distFile = filename + ".rowFormatted";
284                 m->openOutputFile(distFile, out);
285                 break;
286             }
287             if(d == '\n'){
288                 square = 0;
289                 break;
290             }
291         }
292         
293         Progress* reading;
294         reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
295         
296         //lower triangle, so must go to column then formatted row file
297         if(square == 0){
298             int  index = 0;
299             
300             ofstream outTemp;
301             string tempFile = filename + ".temp";
302             m->openOutputFile(tempFile, outTemp);
303             
304             //convert to square column matrix
305             for(int i=1;i<nseqs;i++){
306                                 
307                 fileHandle >> name;
308                 
309                 if(nameMap == NULL){ list->set(i, name); }
310                 else { nameMap->get(name); }
311                 
312                 
313                 for(int j=0;j<i;j++){
314                                         
315                     if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close();  delete reading; return 0; }
316                     
317                     fileHandle >> distance;
318                     
319                     if (distance == -1) { distance = 1000000; }
320                     
321                     if(distance < cutoff){
322                         outTemp << i << '\t' << j << '\t' << distance << endl;
323                         outTemp << j << '\t' << i << '\t' << distance << endl;
324                     }
325                     index++;
326                     reading->update(index);
327                 }
328             }
329             outTemp.close();
330             
331             //format from square column to rowFormatted
332             //sort file by first column so the distances for each row are together
333             string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
334             
335             //use the unix sort 
336 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
337             string command = "sort -n " + tempFile + " -o " + outfile;
338             system(command.c_str());
339 #else //sort using windows sort
340             string command = "sort " + tempFile + " /O " + outfile;
341             system(command.c_str());
342 #endif
343             
344             if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile);  delete reading; return 0; }
345             
346             //output to new file distance for each row and save positions in file where new row begins
347             ifstream in;
348             m->openInputFile(outfile, in);
349             
350             distFile = outfile + ".rowFormatted";
351             m->openOutputFile(distFile, out);
352             
353             rowPos.resize(nseqs, -1);
354             int currentRow;
355             int first, second;
356             float dist;
357             map<int, float> rowMap;
358             map<int, float>::iterator itRow;
359             
360             //get first currentRow
361             in >> first;
362             currentRow = first;
363             
364             string firstString = toString(first);
365             for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
366             
367             while(!in.eof()) {
368                 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile);  delete reading; return 0; }
369                 
370                 in >> first >> second >> dist; m->gobble(in);
371                 
372                 if (first != currentRow) {
373                     //save position in file of each new row
374                     rowPos[currentRow] = out.tellp();
375                     
376                     out << currentRow << '\t' << rowMap.size() << '\t';
377                     
378                     for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
379                         out << itRow->first << '\t' << itRow->second << '\t';
380                     }
381                     out << endl;
382                     
383                     currentRow = first;
384                     rowMap.clear();
385                     
386                     //save row you just read
387                     rowMap[second] = dist;
388                     
389                     index++;
390                     reading->update(index);
391                 }else{
392                     rowMap[second] = dist;
393                 }
394             }
395             
396             //print last Row
397             //save position in file of each new row
398             rowPos[currentRow] = out.tellp();
399             
400             out << currentRow << '\t' << rowMap.size() << '\t';
401             
402             for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
403                 out << itRow->first << '\t' << itRow->second << '\t';
404             }
405             out << endl;
406             
407             in.close();
408             out.close();
409             
410             m->mothurRemove(tempFile);
411             m->mothurRemove(outfile);
412             
413             if (m->control_pressed) {  m->mothurRemove(distFile);   delete reading; return 0; }
414             
415         }
416         else{ //square matrix convert directly to formatted row file
417             int index = nseqs;
418             map<int, float> rowMap;
419             map<int, float>::iterator itRow;
420             rowPos.resize(nseqs, -1);
421             
422             for(int i=0;i<nseqs;i++){
423                 fileHandle >> name;                
424                 
425                 if(nameMap == NULL){ list->set(i, name); }
426                 else { nameMap->get(name); }
427                 
428                 for(int j=0;j<nseqs;j++){
429                     if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
430                     
431                     fileHandle >> distance;
432                                         
433                     if (distance == -1) { distance = 1000000; }
434                     
435                     if((distance < cutoff) && (j != i)){
436                         rowMap[j] = distance;
437                     }
438                     index++;
439                     reading->update(index);
440                 }
441                 
442                 m->gobble(fileHandle);
443                 
444                 //save position in file of each new row
445                 rowPos[i] = out.tellp();
446                 
447                 //output row to file
448                 out << i << '\t' << rowMap.size() << '\t';
449                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
450                     out << itRow->first << '\t' << itRow->second << '\t';
451                 }
452                 out << endl;
453                 
454                 //clear map for new row's info
455                 rowMap.clear();
456             }
457         }
458         reading->finish();
459         delete reading;
460         fileHandle.close();
461         out.close();
462         
463         if (m->control_pressed) { m->mothurRemove(distFile);  return 0; }
464         
465         list->setLabel("0");
466         
467         return 1;
468         
469         
470         }
471         catch(exception& e) {
472         m->errorOut(e, "FormatPhylipMatrix", "read");
473         exit(1);
474         }
475 }
476
477 /***********************************************************************/
478 FormatPhylipMatrix::~FormatPhylipMatrix(){}
479 /***********************************************************************/
480
481