]> git.donarmstrong.com Git - mothur.git/blob - formatcolumn.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / formatcolumn.cpp
1 /*
2  *  formatcolumn.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatcolumn.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatColumnMatrix::FormatColumnMatrix(string df) : filename(df){
15         m->openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18
19 int FormatColumnMatrix::read(NameAssignment* nameMap){
20         try {           
21
22                 string firstName, secondName;
23                 float distance;
24                 int nseqs = nameMap->size();
25
26                 list = new ListVector(nameMap->getListVector());
27         
28                 Progress* reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
29
30                 int lt = 1;
31                 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
32                 int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
33
34                 //need to see if this is a square or a triangular matrix...
35                 
36                 ofstream out;
37                 string tempOutFile = filename + ".temp";
38                 m->openOutputFile(tempOutFile, out);
39         
40                 while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
41                 
42                         if (m->control_pressed) { out.close();  m->mothurRemove(tempOutFile); fileHandle.close();  delete reading; return 0; }
43                 
44                         fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
45         
46                         map<string,int>::iterator itA = nameMap->find(firstName);
47                         map<string,int>::iterator itB = nameMap->find(secondName);
48                         if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
49                         if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
50
51                         if (distance == -1) { distance = 1000000; }
52                 
53                         if((distance < cutoff) && (itA != itB)){
54                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
55                                         refRow = itA->second;
56                                         refCol = itB->second;
57                                         
58                                         //making it square
59                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
60                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
61                                 }
62                                 else if(refRow == itA->second && refCol == itB->second){        lt = 0;         } //you are square
63                                 else if(refRow == itB->second && refCol == itA->second){        lt = 0;         }  //you are square
64                                 else{   //making it square
65                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
66                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
67                                 }
68                                 
69                                 reading->update(itA->second * nseqs / 2);
70                         }
71                         m->gobble(fileHandle);
72                 }
73                 out.close();
74                 fileHandle.close();
75         
76                 string squareFile;
77                 if(lt == 0){  // oops, it was square
78                         squareFile = filename;
79                 }else{ squareFile = tempOutFile; }
80                 
81                 //sort file by first column so the distances for each row are together
82                 string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
83                 
84                 //use the unix sort 
85                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
86                         string command = "sort -n " + squareFile + " -o " + outfile;
87                         system(command.c_str());
88                 #else //sort using windows sort
89                         string command = "sort " + squareFile + " /O " + outfile;
90                         system(command.c_str());
91                 #endif
92                 
93                 if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
94
95                 //output to new file distance for each row and save positions in file where new row begins
96                 ifstream in;
97                 m->openInputFile(outfile, in);
98                 
99                 distFile = outfile + ".rowFormatted";
100                 m->openOutputFile(distFile, out);
101                 
102                 rowPos.resize(nseqs, -1);
103                 int currentRow;
104                 int first, second;
105                 float dist;
106                 map<int, float> rowMap;
107                 map<int, float>::iterator itRow;
108                 
109                 //get first currentRow
110                 in >> first;
111                 currentRow = first;
112                 
113                 string firstString = toString(first);
114                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
115                 
116                 while(!in.eof()) {
117                         
118                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
119                         
120                         in >> first >> second >> dist; m->gobble(in);
121                         
122                         if (first != currentRow) {
123                                 //save position in file of each new row
124                                 rowPos[currentRow] = out.tellp();
125                                 
126                                 out << currentRow << '\t' << rowMap.size() << '\t';
127                                 
128                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
129                                         out << itRow->first << '\t' << itRow->second << '\t';
130                                 }
131                                 out << endl;
132                                 
133                                 currentRow = first;
134                                 rowMap.clear();
135                                 
136                                 //save row you just read
137                                 if (dist < cutoff) {
138                                         rowMap[second] = dist;
139                                 }
140                         }else{
141                                 if (dist < cutoff) {
142                                         rowMap[second] = dist;
143                                 }
144                         }
145                 }
146                 
147                 //print last Row
148                 //save position in file of each new row
149                 rowPos[currentRow] = out.tellp();
150                 
151                 out << currentRow << '\t' << rowMap.size() << '\t';
152                 
153                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
154                         out << itRow->first << '\t' << itRow->second << '\t';
155                 }
156                 out << endl;
157                 
158                 
159                 in.close();
160                 out.close();
161                 
162                 if (m->control_pressed) {  m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile);  delete reading; return 0; }
163                 
164                 m->mothurRemove(tempOutFile);
165                 m->mothurRemove(outfile);
166                 
167                 reading->finish();
168                 
169                 delete reading;
170                 list->setLabel("0");
171                 
172                 if (m->control_pressed) {  m->mothurRemove(distFile);  return 0; }
173
174                 return 1;
175
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "FormatColumnMatrix", "read");
179                 exit(1);
180         }
181 }
182
183 /***********************************************************************/
184 FormatColumnMatrix::~FormatColumnMatrix(){}
185 /***********************************************************************/
186
187
188