]> git.donarmstrong.com Git - mothur.git/blob - formatcolumn.cpp
added formatmatrix, formatcolumn, and formatphylip classes. Used these classes in...
[mothur.git] / formatcolumn.cpp
1 /*
2  *  formatcolumn.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 1/13/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "formatcolumn.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14 FormatColumnMatrix::FormatColumnMatrix(string df) : filename(df){
15         openInputFile(filename, fileHandle);
16 }
17 /***********************************************************************/
18
19 void FormatColumnMatrix::read(NameAssignment* nameMap){
20         try {           
21
22                 string firstName, secondName;
23                 float distance;
24                 int nseqs = nameMap->size();
25
26                 list = new ListVector(nameMap->getListVector());
27         
28                 Progress* reading = new Progress("Formatting matrix:     ", nseqs * nseqs);
29
30                 int lt = 1;
31                 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
32                 int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
33
34                 //need to see if this is a square or a triangular matrix...
35                 
36                 ofstream out;
37                 string tempOutFile = filename + ".temp";
38                 openOutputFile(tempOutFile, out);
39         
40                 while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
41                 
42                         fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
43         
44                         map<string,int>::iterator itA = nameMap->find(firstName);
45                         map<string,int>::iterator itB = nameMap->find(secondName);
46                         if(itA == nameMap->end()){      cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);   }
47                         if(itB == nameMap->end()){      cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
48
49                         if (distance == -1) { distance = 1000000; }
50                         
51                         if(distance < cutoff && itA != itB){
52                                                         
53                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
54                                         refRow = itA->second;
55                                         refCol = itB->second;
56                                         
57                                         //making it square
58                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
59                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
60                                 }
61                                 else if(refRow == itA->second && refCol == itB->second){        lt = 0;         } //you are square
62                                 else if(refRow == itB->second && refCol == itA->second){        lt = 0;         }  //you are square
63                                 else{   //making it square
64                                         out << itA->second << '\t' << itB->second << '\t' << distance << endl;
65                                         out << itB->second << '\t' << itA->second << '\t' << distance << endl;
66                                 }
67                                 
68                                 reading->update(itA->second * nseqs / 2);
69                         }
70                         gobble(fileHandle);
71                 }
72                 out.close();
73                 fileHandle.close();
74                 
75                 string squareFile;
76                 if(lt == 0){  // oops, it was square
77                         squareFile = filename;
78                 }else{ squareFile = tempOutFile; }
79                 
80                 //sort file by first column so the distances for each row are together
81                 string outfile = getRootName(squareFile) + "sorted.dist.temp";
82                 
83                 //use the unix sort 
84                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
85                         string command = "sort -n " + squareFile + " -o " + outfile;
86                         system(command.c_str());
87                 #else //sort using windows sort
88                         string command = "sort " + squareFile + " /O " + outfile;
89                         system(command.c_str());
90                 #endif
91                 
92
93                 //output to new file distance for each row and save positions in file where new row begins
94                 ifstream in;
95                 openInputFile(outfile, in);
96                 
97                 distFile = outfile + ".rowFormatted";
98                 openOutputFile(distFile, out);
99                 
100                 rowPos.resize(nseqs, -1);
101                 int currentRow;
102                 int first, second;
103                 float dist;
104                 map<int, float> rowMap;
105                 map<int, float>::iterator itRow;
106                 
107                 //get first currentRow
108                 in >> first;
109                 currentRow = first;
110                 
111                 string firstString = toString(first);
112                 for(int k = 0; k < firstString.length(); k++)  {   in.putback(firstString[k]);  }
113                 
114                 while(!in.eof()) {
115                         in >> first >> second >> dist; gobble(in);
116                         
117                         if (first != currentRow) {
118                                 //save position in file of each new row
119                                 rowPos[currentRow] = out.tellp();
120                                 
121                                 out << currentRow << '\t' << rowMap.size() << '\t';
122                                 
123                                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
124                                         out << itRow->first << '\t' << itRow->second << '\t';
125                                 }
126                                 out << endl;
127                                 
128                                 currentRow = first;
129                                 rowMap.clear();
130                                 
131                                 //save row you just read
132                                 rowMap[second] = dist;
133
134                         }else{
135                                 rowMap[second] = dist;
136                         }
137                 }
138                 
139                 //print last Row
140                 //save position in file of each new row
141                 rowPos[currentRow] = out.tellp();
142                 
143                 out << currentRow << '\t' << rowMap.size() << '\t';
144                 
145                 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
146                         out << itRow->first << '\t' << itRow->second << '\t';
147                 }
148                 out << endl;
149                 
150                 
151                 in.close();
152                 out.close();
153                 
154                 
155                 remove(tempOutFile.c_str());
156                 remove(outfile.c_str());
157                 
158                 reading->finish();
159                 list->setLabel("0");
160
161         }
162         catch(exception& e) {
163                 errorOut(e, "FormatColumnMatrix", "read");
164                 exit(1);
165         }
166 }
167
168 /***********************************************************************/
169 FormatColumnMatrix::~FormatColumnMatrix(){}
170 /***********************************************************************/
171
172
173