]> git.donarmstrong.com Git - mothur.git/blob - formatphylip.cpp
added threshold parameter to make.contigs command.
[mothur.git] / formatphylip.cpp
1 /*
2  *  formatphylip.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatphylip.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatPhylipMatrix::FormatPhylipMatrix(string df) : filename(df) {
15         m->openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18 //not using nameMap
19 int FormatPhylipMatrix::read(NameAssignment* nameMap){
20         try {
21         
22                         float distance;
23                         int square, nseqs;
24                         string name;
25                         ofstream out;
26                         
27                         string numTest;
28                         fileHandle >> numTest >> name;
29                         
30                         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
31                         else { convert(numTest, nseqs); }
32                 
33                                                 
34                         list = new ListVector(nseqs);
35                         list->set(0, name);
36                         
37                         char d;
38                         while((d=fileHandle.get()) != EOF){
39                 
40                                 if(isalnum(d)){  //you are square
41                                         square = 1;
42                                         fileHandle.close();  //reset file
43                                         
44                                         //open and get through numSeqs, code below formats rest of file
45                                         m->openInputFile(filename, fileHandle);
46                                         fileHandle >> nseqs; m->gobble(fileHandle);
47                                         
48                                         distFile = filename + ".rowFormatted";
49                                         m->openOutputFile(distFile, out);
50                                         break;
51                                 }
52                                 if(d == '\n'){
53                                         square = 0;
54                                         break;
55                                 }
56                         }
57                         
58                         Progress* reading;
59                         reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
60                         
61                         //lower triangle, so must go to column then formatted row file
62                         if(square == 0){
63                                 int  index = 0;
64                                 
65                                 ofstream outTemp;
66                                 string tempFile = filename + ".temp";
67                                 m->openOutputFile(tempFile, outTemp);
68                 
69                                 //convert to square column matrix
70                                 for(int i=1;i<nseqs;i++){
71                                 
72                                         fileHandle >> name;
73                                         
74                                         list->set(i, name);
75                                         
76                                         for(int j=0;j<i;j++){
77                                         
78                                                 if (m->control_pressed) { outTemp.close(); m->mothurRemove(tempFile); fileHandle.close();  delete reading; return 0; }
79                                                                                         
80                                                 fileHandle >> distance;
81                                                 
82                                                 if (distance == -1) { distance = 1000000; }
83                                                 
84                                                 if(distance < cutoff){
85                                                         outTemp << i << '\t' << j << '\t' << distance << endl;
86                                                         outTemp << j << '\t' << i << '\t' << distance << endl;
87                                                 }
88                                                 index++;
89                                                 reading->update(index);
90                                         }
91                                 }
92                                 outTemp.close();
93                                 
94                                 //format from square column to rowFormatted
95                                 //sort file by first column so the distances for each row are together
96                                 string outfile = m->getRootName(tempFile) + "sorted.dist.temp";
97                                 
98                                 //use the unix sort 
99                                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
100                                         string command = "sort -n " + tempFile + " -o " + outfile;
101                                         system(command.c_str());
102                                 #else //sort using windows sort
103                                         string command = "sort " + tempFile + " /O " + outfile;
104                                         system(command.c_str());
105                                 #endif
106                                 
107                                 if (m->control_pressed) { m->mothurRemove(tempFile); m->mothurRemove(outfile);  delete reading; return 0; }
108
109                                 //output to new file distance for each row and save positions in file where new row begins
110                                 ifstream in;
111                                 m->openInputFile(outfile, in);
112                                 
113                                 distFile = outfile + ".rowFormatted";
114                                 m->openOutputFile(distFile, out);
115                                 
116                                 rowPos.resize(nseqs, -1);
117                                 int currentRow;
118                                 int first, second;
119                                 float dist;
120                                 map<int, float> rowMap;
121                                 map<int, float>::iterator itRow;
122                                 
123                                 //get first currentRow
124                                 in >> first;
125                                 currentRow = first;
126                                 
127                                 string firstString = toString(first);
128                                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
129                                 
130                                 while(!in.eof()) {
131                                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempFile); m->mothurRemove(distFile); m->mothurRemove(outfile);  delete reading; return 0; }
132
133                                         in >> first >> second >> dist; m->gobble(in);
134                                         
135                                         if (first != currentRow) {
136                                                 //save position in file of each new row
137                                                 rowPos[currentRow] = out.tellp();
138                                                 
139                                                 out << currentRow << '\t' << rowMap.size() << '\t';
140                                                 
141                                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
142                                                         out << itRow->first << '\t' << itRow->second << '\t';
143                                                 }
144                                                 out << endl;
145                                                 
146                                                 currentRow = first;
147                                                 rowMap.clear();
148                                                 
149                                                 //save row you just read
150                                                 rowMap[second] = dist;
151                                                 
152                                                 index++;
153                                                 reading->update(index);
154                                         }else{
155                                                 rowMap[second] = dist;
156                                         }
157                                 }
158                                 
159                                 //print last Row
160                                 //save position in file of each new row
161                                 rowPos[currentRow] = out.tellp();
162                                 
163                                 out << currentRow << '\t' << rowMap.size() << '\t';
164                                 
165                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
166                                         out << itRow->first << '\t' << itRow->second << '\t';
167                                 }
168                                 out << endl;
169                                 
170                                 in.close();
171                                 out.close();
172                                 
173                                 m->mothurRemove(tempFile);
174                                 m->mothurRemove(outfile);
175                                 
176                                 if (m->control_pressed) {  m->mothurRemove(distFile);   delete reading; return 0; }
177
178                         }
179                         else{ //square matrix convert directly to formatted row file
180                                 int index = nseqs;
181                                 map<int, float> rowMap;
182                                 map<int, float>::iterator itRow;
183                                 rowPos.resize(nseqs, -1);
184                 
185                                 for(int i=0;i<nseqs;i++){
186                                         fileHandle >> name;                
187                                                                         
188                                         list->set(i, name);
189                                         
190                                         for(int j=0;j<nseqs;j++){
191                                                 if (m->control_pressed) {  fileHandle.close(); out.close(); m->mothurRemove(distFile);   delete reading; return 0; }
192                                                 
193                                                 fileHandle >> distance;
194                                         
195                                                 if (distance == -1) { distance = 1000000; }
196                                                 
197                                                 if((distance < cutoff) && (j != i)){
198                                                         rowMap[j] = distance;
199                                                 }
200                                                 index++;
201                                                 reading->update(index);
202                                         }
203                                         
204                                         m->gobble(fileHandle);
205                         
206                                         //save position in file of each new row
207                                         rowPos[i] = out.tellp();
208
209                                         //output row to file
210                                         out << i << '\t' << rowMap.size() << '\t';
211                                         for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
212                                                 out << itRow->first << '\t' << itRow->second << '\t';
213                                         }
214                                         out << endl;
215                                         
216                                         //clear map for new row's info
217                                         rowMap.clear();
218                                 }
219                         }
220                         reading->finish();
221                         delete reading;
222                         fileHandle.close();
223                         out.close();
224                         
225                         if (m->control_pressed) { m->mothurRemove(distFile);  return 0; }
226                         
227                         list->setLabel("0");
228                         
229                         return 1;
230                         
231                         
232         }
233         catch(exception& e) {
234                m->errorOut(e, "FormatPhylipMatrix", "read");
235                 exit(1);
236         }
237 }
238 /***********************************************************************/
239 FormatPhylipMatrix::~FormatPhylipMatrix(){}
240 /***********************************************************************/
241
242