]> git.donarmstrong.com Git - mothur.git/blob - readcolumn.cpp
59c3bd5520929f40086279d744c666ab18fe6d8b
[mothur.git] / readcolumn.cpp
1 /*
2  *  readcolumn.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 4/21/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readcolumn.h"
11 #include "progress.hpp"
12
13 /***********************************************************************/
14
15 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
16         
17         successOpen = openInputFile(distFile, fileHandle);
18         
19 }
20
21 /***********************************************************************/
22
23 void ReadColumnMatrix::read(NameAssignment* nameMap){
24         try {           
25         
26                         string firstName, secondName;
27                         float distance;
28                         int nseqs = nameMap->size();
29
30                         list = new ListVector(nameMap->getListVector());
31                 
32                         Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
33         
34                         int lt = 1;
35                         int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
36                         int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
37         
38                         //need to see if this is a square or a triangular matrix...
39                         while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
40                         
41                                 fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
42                 
43                                 if(nameMap->count(firstName)==0){
44                                         cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
45                                 }
46                                 if(nameMap->count(secondName)==0){
47                                         cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
48                                 }
49                                 
50                                 if (distance == -1) { distance = 1000000; }
51                                 
52                                 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
53                                         if(nameMap->get(firstName) > nameMap->get(secondName)){
54                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
55                                 
56                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
57                                                         refRow = nameMap->get(firstName);
58                                                         refCol = nameMap->get(secondName);
59                                                         D->addCell(value);
60                                                 }
61                                                 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
62                                                         lt = 0;
63                                                 }
64                                                 else{
65                                                         D->addCell(value);
66                                                 }
67                                         }
68                                         else if(nameMap->get(firstName) < nameMap->get(secondName)){
69                                                 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
70                                 
71                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
72                                                         refRow = nameMap->get(firstName);
73                                                         refCol = nameMap->get(secondName);
74                                                         D->addCell(value);
75                                                 }
76                                                 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
77                                                         lt = 0;
78                                                 }
79                                                 else{
80                                                         D->addCell(value);
81                                                 }
82                                         }
83                                         reading->update(nameMap->get(firstName) * nseqs);
84                                 }
85                                 gobble(fileHandle);
86                         }
87
88                         if(lt == 0){  // oops, it was square
89                                 fileHandle.close();  //let's start over
90                                 D->clear();  //let's start over
91                            
92                                 openInputFile(distFile, fileHandle);  //let's start over
93
94                                 while(fileHandle){
95                                         fileHandle >> firstName >> secondName >> distance;
96                         
97                                         if(nameMap->count(firstName)==0){
98                                                 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
99                                         }
100                                         if(nameMap->count(secondName)==0){
101                                                 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
102                                         }
103                                         
104                                         if (distance == -1) { distance = 1000000; }
105                                         
106                                         if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
107                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
108                                                 D->addCell(value);
109                                                 reading->update(nameMap->get(firstName) * nseqs);
110                                         }
111                         
112                                         gobble(fileHandle);
113                                 }
114                         }
115                 //      else if(lt == 0){
116                 //              while(fileHandle){
117                 //                      fileHandle >> firstName >> secondName >> distance;
118                 //                      
119                 //                      if(nameMap->count(firstName)==0){
120                 //                              cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
121                 //                      }
122                 //                      if(nameMap->count(secondName)==0){
123                 //                              cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
124                 //                      }
125                 //                      if (distance == -1) { distance = 1000000; }
126                 
127                 //                      if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
128                 ////                            cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
129                 //                              D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
130                 //                              reading->update((*nameMap)[secondName].second * nseqs);
131                 //                      }
132                 //
133                 //                      gobble(fileHandle);
134                 //              }
135                 //      }       
136                         reading->finish();
137                         fileHandle.close();
138         
139                         list->setLabel("0");
140         
141         }
142         catch(exception& e) {
143                 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
144                 exit(1);
145         }
146         catch(...) {
147                 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
148                 exit(1);
149         }
150
151 }
152
153 /***********************************************************************/
154
155 ReadColumnMatrix::~ReadColumnMatrix(){
156         //delete D;
157         //delete list;
158 }
159
160