]> git.donarmstrong.com Git - mothur.git/blob - formatcolumn.cpp
added modify names parameter to set.dir
[mothur.git] / formatcolumn.cpp
1 /*
2  *  formatcolumn.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatcolumn.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatColumnMatrix::FormatColumnMatrix(string df) : filename(df){
15         m->openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18
19 int FormatColumnMatrix::read(NameAssignment* nameMap){
20         try {           
21
22                 string firstName, secondName;
23                 float distance;
24                 int nseqs = nameMap->size();
25
26                 list = new ListVector(nameMap->getListVector());
27         
28                 Progress* reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
29
30                 int lt = 1;
31                 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
32                 int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
33
34                 //need to see if this is a square or a triangular matrix...
35                 
36                 ofstream out;
37                 string tempOutFile = filename + ".temp";
38                 m->openOutputFile(tempOutFile, out);
39         
40                 while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
41                 
42                         if (m->control_pressed) { out.close();  m->mothurRemove(tempOutFile); fileHandle.close();  delete reading; return 0; }
43                 
44                         fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
45         
46                         map<string,int>::iterator itA = nameMap->find(firstName);
47                         map<string,int>::iterator itB = nameMap->find(secondName);
48                         if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
49                         if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
50
51                         if (distance == -1) { distance = 1000000; }
52                 
53                         if((distance < cutoff) && (itA != itB)){
54                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
55                                         refRow = itA->second;
56                                         refCol = itB->second;
57                                         
58                                         //making it square
59                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
60                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
61                                 }
62                                 else if(refRow == itA->second && refCol == itB->second){        lt = 0;         } //you are square
63                                 else if(refRow == itB->second && refCol == itA->second){        lt = 0;         }  //you are square
64                                 else{   //making it square
65                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
66                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
67                                 }
68                                 
69                                 reading->update(itA->second * nseqs / 2);
70                         }
71                         m->gobble(fileHandle);
72                 }
73                 out.close();
74                 fileHandle.close();
75         
76                 string squareFile;
77                 if(lt == 0){  // oops, it was square
78                         squareFile = filename;
79                 }else{ squareFile = tempOutFile; }
80                 
81                 //sort file by first column so the distances for each row are together
82                 string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
83                 
84                 //use the unix sort 
85                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
86                         string command = "sort -n " + squareFile + " -o " + outfile;
87                         system(command.c_str());
88                 #else //sort using windows sort
89                         string command = "sort " + squareFile + " /O " + outfile;
90                         system(command.c_str());
91                 #endif
92                 
93                 if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
94
95                 //output to new file distance for each row and save positions in file where new row begins
96                 ifstream in;
97                 m->openInputFile(outfile, in);
98                 
99                 distFile = outfile + ".rowFormatted";
100                 m->openOutputFile(distFile, out);
101                 
102                 rowPos.resize(nseqs, -1);
103                 int currentRow;
104                 int first, second;
105                 float dist;
106                 map<int, float> rowMap;
107                 map<int, float>::iterator itRow;
108                 
109                 //get first currentRow
110                 in >> first;
111                 currentRow = first;
112                 
113                 string firstString = toString(first);
114                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
115                 
116                 while(!in.eof()) {
117                         
118                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
119                         
120                         in >> first >> second >> dist; m->gobble(in);
121                         
122                         if (first != currentRow) {
123                                 //save position in file of each new row
124                                 rowPos[currentRow] = out.tellp();
125                                 
126                                 out << currentRow << '\t' << rowMap.size() << '\t';
127                                 
128                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
129                                         out << itRow->first << '\t' << itRow->second << '\t';
130                                 }
131                                 out << endl;
132                                 
133                                 currentRow = first;
134                                 rowMap.clear();
135                                 
136                                 //save row you just read
137                                 if (dist < cutoff) {
138                                         rowMap[second] = dist;
139                                 }
140                         }else{
141                                 if (dist < cutoff) {
142                                         rowMap[second] = dist;
143                                 }
144                         }
145                 }
146                 
147                 //print last Row
148                 //save position in file of each new row
149                 rowPos[currentRow] = out.tellp();
150                 
151                 out << currentRow << '\t' << rowMap.size() << '\t';
152                 
153                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
154                         out << itRow->first << '\t' << itRow->second << '\t';
155                 }
156                 out << endl;
157                 
158                 
159                 in.close();
160                 out.close();
161                 
162                 if (m->control_pressed) {  m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile);  delete reading; return 0; }
163                 
164                 m->mothurRemove(tempOutFile);
165                 m->mothurRemove(outfile);
166                 
167                 reading->finish();
168                 
169                 delete reading;
170                 list->setLabel("0");
171                 
172                 if (m->control_pressed) {  m->mothurRemove(distFile);  return 0; }
173
174                 return 1;
175
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "FormatColumnMatrix", "read");
179                 exit(1);
180         }
181 }
182 /***********************************************************************/
183
184 int FormatColumnMatrix::read(CountTable* nameMap){
185         try {           
186         
187                 string firstName, secondName;
188                 float distance;
189                 int nseqs = nameMap->size();
190         
191                 list = new ListVector(nameMap->getListVector());
192         
193                 Progress* reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
194         
195                 int lt = 1;
196                 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
197                 int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
198         
199                 //need to see if this is a square or a triangular matrix...
200                 
201                 ofstream out;
202                 string tempOutFile = filename + ".temp";
203                 m->openOutputFile(tempOutFile, out);
204         
205                 while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
206             
207                         if (m->control_pressed) { out.close();  m->mothurRemove(tempOutFile); fileHandle.close();  delete reading; return 0; }
208             
209                         fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
210             
211                         int itA = nameMap->get(firstName);
212                         int itB = nameMap->get(secondName);
213             
214                         if (distance == -1) { distance = 1000000; }
215             
216                         if((distance < cutoff) && (itA != itB)){
217                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
218                                         refRow = itA;
219                                         refCol = itB;
220                                         
221                                         //making it square
222                                         out << itA << '\t' << itB << '\t' << distance << endl;
223                                         out << itB << '\t' << itA << '\t' << distance << endl;
224                                 }
225                                 else if(refRow == itA && refCol == itB){        lt = 0;         } //you are square
226                                 else if(refRow == itB && refCol == itA){        lt = 0;         }  //you are square
227                                 else{   //making it square
228                                         out << itA << '\t' << itB << '\t' << distance << endl;
229                                         out << itB << '\t' << itA << '\t' << distance << endl;
230                                 }
231                                 
232                                 reading->update(itA * nseqs / 2);
233                         }
234                         m->gobble(fileHandle);
235                 }
236                 out.close();
237                 fileHandle.close();
238         
239                 string squareFile;
240                 if(lt == 0){  // oops, it was square
241                         squareFile = filename;
242                 }else{ squareFile = tempOutFile; }
243                 
244                 //sort file by first column so the distances for each row are together
245                 string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
246                 
247                 //use the unix sort 
248 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
249         string command = "sort -n " + squareFile + " -o " + outfile;
250         system(command.c_str());
251 #else //sort using windows sort
252         string command = "sort " + squareFile + " /O " + outfile;
253         system(command.c_str());
254 #endif
255                 
256                 if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
257         
258                 //output to new file distance for each row and save positions in file where new row begins
259                 ifstream in;
260                 m->openInputFile(outfile, in);
261                 
262                 distFile = outfile + ".rowFormatted";
263                 m->openOutputFile(distFile, out);
264                 
265                 rowPos.resize(nseqs, -1);
266                 int currentRow;
267                 int first, second;
268                 float dist;
269                 map<int, float> rowMap;
270                 map<int, float>::iterator itRow;
271                 
272                 //get first currentRow
273                 in >> first;
274                 currentRow = first;
275                 
276                 string firstString = toString(first);
277                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
278                 
279                 while(!in.eof()) {
280                         
281                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
282                         
283                         in >> first >> second >> dist; m->gobble(in);
284                         
285                         if (first != currentRow) {
286                                 //save position in file of each new row
287                                 rowPos[currentRow] = out.tellp();
288                                 
289                                 out << currentRow << '\t' << rowMap.size() << '\t';
290                                 
291                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
292                                         out << itRow->first << '\t' << itRow->second << '\t';
293                                 }
294                                 out << endl;
295                                 
296                                 currentRow = first;
297                                 rowMap.clear();
298                                 
299                                 //save row you just read
300                                 if (dist < cutoff) {
301                                         rowMap[second] = dist;
302                                 }
303                         }else{
304                                 if (dist < cutoff) {
305                                         rowMap[second] = dist;
306                                 }
307                         }
308                 }
309                 
310                 //print last Row
311                 //save position in file of each new row
312                 rowPos[currentRow] = out.tellp();
313                 
314                 out << currentRow << '\t' << rowMap.size() << '\t';
315                 
316                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
317                         out << itRow->first << '\t' << itRow->second << '\t';
318                 }
319                 out << endl;
320                 
321                 
322                 in.close();
323                 out.close();
324                 
325                 if (m->control_pressed) {  m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile);  delete reading; return 0; }
326                 
327                 m->mothurRemove(tempOutFile);
328                 m->mothurRemove(outfile);
329                 
330                 reading->finish();
331                 
332                 delete reading;
333                 list->setLabel("0");
334                 
335                 if (m->control_pressed) {  m->mothurRemove(distFile);  return 0; }
336         
337                 return 1;
338         
339         }
340         catch(exception& e) {
341                 m->errorOut(e, "FormatColumnMatrix", "read");
342                 exit(1);
343         }
344 }
345
346 /***********************************************************************/
347 FormatColumnMatrix::~FormatColumnMatrix(){}
348 /***********************************************************************/
349
350
351