]> git.donarmstrong.com Git - mothur.git/blob - formatcolumn.cpp
created mothurOut class to handle logfiles
[mothur.git] / formatcolumn.cpp
1 /*
2  *  formatcolumn.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatcolumn.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatColumnMatrix::FormatColumnMatrix(string df) : filename(df){
15         openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18
19 void FormatColumnMatrix::read(NameAssignment* nameMap){
20         try {           
21
22                 string firstName, secondName;
23                 float distance;
24                 int nseqs = nameMap->size();
25
26                 list = new ListVector(nameMap->getListVector());
27         
28                 Progress* reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
29
30                 int lt = 1;
31                 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
32                 int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
33
34                 //need to see if this is a square or a triangular matrix...
35                 
36                 ofstream out;
37                 string tempOutFile = filename + ".temp";
38                 openOutputFile(tempOutFile, out);
39         
40                 while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
41                 
42                         fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
43         
44                         map<string,int>::iterator itA = nameMap->find(firstName);
45                         map<string,int>::iterator itB = nameMap->find(secondName);
46                         if(itA == nameMap->end()){      cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);   }
47                         if(itB == nameMap->end()){      cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
48
49                         if (distance == -1) { distance = 1000000; }
50                 
51                         if((distance < cutoff) && (itA != itB)){
52                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
53                                         refRow = itA->second;
54                                         refCol = itB->second;
55                                         
56                                         //making it square
57                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
58                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
59                                 }
60                                 else if(refRow == itA->second && refCol == itB->second){        lt = 0;         } //you are square
61                                 else if(refRow == itB->second && refCol == itA->second){        lt = 0;         }  //you are square
62                                 else{   //making it square
63                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
64                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
65                                 }
66                                 
67                                 reading->update(itA->second * nseqs / 2);
68                         }
69                         gobble(fileHandle);
70                 }
71                 out.close();
72                 fileHandle.close();
73         
74                 string squareFile;
75                 if(lt == 0){  // oops, it was square
76                         squareFile = filename;
77                 }else{ squareFile = tempOutFile; }
78                 
79                 //sort file by first column so the distances for each row are together
80                 string outfile = getRootName(squareFile) + "sorted.dist.temp";
81                 
82                 //use the unix sort 
83                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
84                         string command = "sort -n " + squareFile + " -o " + outfile;
85                         system(command.c_str());
86                 #else //sort using windows sort
87                         string command = "sort " + squareFile + " /O " + outfile;
88                         system(command.c_str());
89                 #endif
90                 
91
92                 //output to new file distance for each row and save positions in file where new row begins
93                 ifstream in;
94                 openInputFile(outfile, in);
95                 
96                 distFile = outfile + ".rowFormatted";
97                 openOutputFile(distFile, out);
98                 
99                 rowPos.resize(nseqs, -1);
100                 int currentRow;
101                 int first, second;
102                 float dist;
103                 map<int, float> rowMap;
104                 map<int, float>::iterator itRow;
105                 
106                 //get first currentRow
107                 in >> first;
108                 currentRow = first;
109                 
110                 string firstString = toString(first);
111                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
112                 
113                 while(!in.eof()) {
114                         in >> first >> second >> dist; gobble(in);
115                         
116                         if (first != currentRow) {
117                                 //save position in file of each new row
118                                 rowPos[currentRow] = out.tellp();
119                                 
120                                 out << currentRow << '\t' << rowMap.size() << '\t';
121                                 
122                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
123                                         out << itRow->first << '\t' << itRow->second << '\t';
124                                 }
125                                 out << endl;
126                                 
127                                 currentRow = first;
128                                 rowMap.clear();
129                                 
130                                 //save row you just read
131                                 if (dist < cutoff) {
132                                         rowMap[second] = dist;
133                                 }
134                         }else{
135                                 if (dist < cutoff) {
136                                         rowMap[second] = dist;
137                                 }
138                         }
139                 }
140                 
141                 //print last Row
142                 //save position in file of each new row
143                 rowPos[currentRow] = out.tellp();
144                 
145                 out << currentRow << '\t' << rowMap.size() << '\t';
146                 
147                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
148                         out << itRow->first << '\t' << itRow->second << '\t';
149                 }
150                 out << endl;
151                 
152                 
153                 in.close();
154                 out.close();
155                 
156                 
157                 remove(tempOutFile.c_str());
158                 remove(outfile.c_str());
159                 
160                 reading->finish();
161                 list->setLabel("0");
162
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "FormatColumnMatrix", "read");
166                 exit(1);
167         }
168 }
169
170 /***********************************************************************/
171 FormatColumnMatrix::~FormatColumnMatrix(){}
172 /***********************************************************************/
173
174
175