5 * Created by westcott on 1/13/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "formatcolumn.h"
11 #include "progress.hpp"
13 /***********************************************************************/
14 FormatColumnMatrix::FormatColumnMatrix(string df) : filename(df){
15 m->openInputFile(filename, fileHandle);
17 /***********************************************************************/
19 int FormatColumnMatrix::read(NameAssignment* nameMap){
22 string firstName, secondName;
24 int nseqs = nameMap->size();
26 list = new ListVector(nameMap->getListVector());
28 Progress* reading = new Progress("Formatting matrix: ", nseqs * nseqs);
31 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
32 int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix
34 //need to see if this is a square or a triangular matrix...
37 string tempOutFile = filename + ".temp";
38 m->openOutputFile(tempOutFile, out);
40 while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
42 if (m->control_pressed) { out.close(); m->mothurRemove(tempOutFile); fileHandle.close(); delete reading; return 0; }
44 fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
46 map<string,int>::iterator itA = nameMap->find(firstName);
47 map<string,int>::iterator itB = nameMap->find(secondName);
48 if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); }
49 if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); }
51 if (distance == -1) { distance = 1000000; }
53 if((distance < cutoff) && (itA != itB)){
54 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
59 out << itA->second << '\t' << itB->second << '\t' << distance << endl;
60 out << itB->second << '\t' << itA->second << '\t' << distance << endl;
62 else if(refRow == itA->second && refCol == itB->second){ lt = 0; } //you are square
63 else if(refRow == itB->second && refCol == itA->second){ lt = 0; } //you are square
64 else{ //making it square
65 out << itA->second << '\t' << itB->second << '\t' << distance << endl;
66 out << itB->second << '\t' << itA->second << '\t' << distance << endl;
69 reading->update(itA->second * nseqs / 2);
71 m->gobble(fileHandle);
77 if(lt == 0){ // oops, it was square
78 squareFile = filename;
79 }else{ squareFile = tempOutFile; }
81 //sort file by first column so the distances for each row are together
82 string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
85 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
86 string command = "sort -n " + squareFile + " -o " + outfile;
87 system(command.c_str());
88 #else //sort using windows sort
89 string command = "sort " + squareFile + " /O " + outfile;
90 system(command.c_str());
93 if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
95 //output to new file distance for each row and save positions in file where new row begins
97 m->openInputFile(outfile, in);
99 distFile = outfile + ".rowFormatted";
100 m->openOutputFile(distFile, out);
102 rowPos.resize(nseqs, -1);
106 map<int, float> rowMap;
107 map<int, float>::iterator itRow;
109 //get first currentRow
113 string firstString = toString(first);
114 for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); }
118 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
120 in >> first >> second >> dist; m->gobble(in);
122 if (first != currentRow) {
123 //save position in file of each new row
124 rowPos[currentRow] = out.tellp();
126 out << currentRow << '\t' << rowMap.size() << '\t';
128 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
129 out << itRow->first << '\t' << itRow->second << '\t';
136 //save row you just read
138 rowMap[second] = dist;
142 rowMap[second] = dist;
148 //save position in file of each new row
149 rowPos[currentRow] = out.tellp();
151 out << currentRow << '\t' << rowMap.size() << '\t';
153 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
154 out << itRow->first << '\t' << itRow->second << '\t';
162 if (m->control_pressed) { m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
164 m->mothurRemove(tempOutFile);
165 m->mothurRemove(outfile);
172 if (m->control_pressed) { m->mothurRemove(distFile); return 0; }
177 catch(exception& e) {
178 m->errorOut(e, "FormatColumnMatrix", "read");
182 /***********************************************************************/
184 int FormatColumnMatrix::read(CountTable* nameMap){
187 string firstName, secondName;
189 int nseqs = nameMap->size();
191 list = new ListVector(nameMap->getListVector());
193 Progress* reading = new Progress("Formatting matrix: ", nseqs * nseqs);
196 int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
197 int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix
199 //need to see if this is a square or a triangular matrix...
202 string tempOutFile = filename + ".temp";
203 m->openOutputFile(tempOutFile, out);
205 while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
207 if (m->control_pressed) { out.close(); m->mothurRemove(tempOutFile); fileHandle.close(); delete reading; return 0; }
209 fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
211 int itA = nameMap->get(firstName);
212 int itB = nameMap->get(secondName);
214 if (distance == -1) { distance = 1000000; }
216 if((distance < cutoff) && (itA != itB)){
217 if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol...
222 out << itA << '\t' << itB << '\t' << distance << endl;
223 out << itB << '\t' << itA << '\t' << distance << endl;
225 else if(refRow == itA && refCol == itB){ lt = 0; } //you are square
226 else if(refRow == itB && refCol == itA){ lt = 0; } //you are square
227 else{ //making it square
228 out << itA << '\t' << itB << '\t' << distance << endl;
229 out << itB << '\t' << itA << '\t' << distance << endl;
232 reading->update(itA * nseqs / 2);
234 m->gobble(fileHandle);
240 if(lt == 0){ // oops, it was square
241 squareFile = filename;
242 }else{ squareFile = tempOutFile; }
244 //sort file by first column so the distances for each row are together
245 string outfile = m->getRootName(squareFile) + "sorted.dist.temp";
248 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
249 string command = "sort -n " + squareFile + " -o " + outfile;
250 system(command.c_str());
251 #else //sort using windows sort
252 string command = "sort " + squareFile + " /O " + outfile;
253 system(command.c_str());
256 if (m->control_pressed) { m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
258 //output to new file distance for each row and save positions in file where new row begins
260 m->openInputFile(outfile, in);
262 distFile = outfile + ".rowFormatted";
263 m->openOutputFile(distFile, out);
265 rowPos.resize(nseqs, -1);
269 map<int, float> rowMap;
270 map<int, float>::iterator itRow;
272 //get first currentRow
276 string firstString = toString(first);
277 for(int k = 0; k < firstString.length(); k++) { in.putback(firstString[k]); }
281 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
283 in >> first >> second >> dist; m->gobble(in);
285 if (first != currentRow) {
286 //save position in file of each new row
287 rowPos[currentRow] = out.tellp();
289 out << currentRow << '\t' << rowMap.size() << '\t';
291 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
292 out << itRow->first << '\t' << itRow->second << '\t';
299 //save row you just read
301 rowMap[second] = dist;
305 rowMap[second] = dist;
311 //save position in file of each new row
312 rowPos[currentRow] = out.tellp();
314 out << currentRow << '\t' << rowMap.size() << '\t';
316 for (itRow = rowMap.begin(); itRow != rowMap.end(); itRow++) {
317 out << itRow->first << '\t' << itRow->second << '\t';
325 if (m->control_pressed) { m->mothurRemove(distFile); m->mothurRemove(tempOutFile); m->mothurRemove(outfile); delete reading; return 0; }
327 m->mothurRemove(tempOutFile);
328 m->mothurRemove(outfile);
335 if (m->control_pressed) { m->mothurRemove(distFile); return 0; }
340 catch(exception& e) {
341 m->errorOut(e, "FormatColumnMatrix", "read");
346 /***********************************************************************/
347 FormatColumnMatrix::~FormatColumnMatrix(){}
348 /***********************************************************************/