]> git.donarmstrong.com Git - mothur.git/blob - readmatrix.cpp
72dd799ec6d2e419dd843db6288de313fa0b4078
[mothur.git] / readmatrix.cpp
1 /*
2  *  readmatrix.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/13/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include <string>
13 #include <map>
14 #include "utilities.hpp"
15 #include "sparsematrix.hpp"
16 #include "progress.hpp"
17 #include "listvector.hpp"
18 #include "rabundvector.hpp"
19 #include <exception>
20
21 #include "readmatrix.hpp"
22
23
24 /***********************************************************************/
25
26 ReadPhylipMatrix::ReadPhylipMatrix(string distFile){
27         
28         successOpen = openInputFile(distFile, fileHandle);
29         
30 }
31
32 /***********************************************************************/
33
34 void ReadPhylipMatrix::read(NameAssignment* nameMap){
35         try {
36         
37                         float distance;
38                         int square, nseqs;
39                         string name;
40                         vector<string> matrixNames;
41         
42                         fileHandle >> nseqs >> name;
43
44                         matrixNames.push_back(name);
45
46                         if(nameMap == NULL){
47                                 list = new ListVector(nseqs);
48                                 list->set(0, name);
49                         }
50                         else{
51                                 list = new ListVector(nameMap->getListVector());
52                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
53                         }
54         
55                         char d;
56                         while((d=fileHandle.get()) != EOF){
57                 
58                                 if(isalnum(d)){
59                                         square = 1;
60                                         fileHandle.putback(d);
61                                         for(int i=0;i<nseqs;i++){
62                                                 fileHandle >> distance;
63                                         }
64                                         break;
65                                 }
66                                 if(d == '\n'){
67                                         square = 0;
68                                         break;
69                                 }
70                         }
71         
72                         Progress* reading;
73         
74                         if(square == 0){
75
76                                 reading = new Progress("Reading matrix:    ", nseqs * (nseqs - 1) / 2);
77                 
78                                 int     index = 0;
79                 
80                                 for(int i=1;i<nseqs;i++){
81                                         fileHandle >> name;
82                                         matrixNames.push_back(name);
83         
84                                         //there's A LOT of repeated code throughout this method...
85                                         if(nameMap == NULL){
86                                                 list->set(i, name);
87                                         
88                                                 for(int j=0;j<i;j++){
89                                                         fileHandle >> distance;
90                                                 
91                                                         if(distance < cutoff){
92                                                                 PCell value(i, j, distance);
93                                                                 D->addCell(value);
94                                                         }
95                                                         index++;
96                                                         reading->update(index);
97                                                 }
98                                 
99                                         }
100                                         else{
101                                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
102                                 
103                                                 for(int j=0;j<i;j++){
104                                                         fileHandle >> distance;
105                                                 
106                                                         if(distance < cutoff){
107                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
108                                                                 D->addCell(value);
109                                                         }
110                                                         index++;
111                                                         reading->update(index);
112                                                 }
113                                         }
114                                 }
115                         }
116                         else{
117
118                                 reading = new Progress("Reading matrix:    ", nseqs * nseqs);
119                         
120                                 int index = nseqs;
121                 
122                                 for(int i=1;i<nseqs;i++){
123                                         fileHandle >> name;             
124                                         matrixNames.push_back(name);
125         
126                                         if(nameMap == NULL){
127                                                 list->set(i, name);
128                                                 for(int j=0;j<nseqs;j++){
129                                                         fileHandle >> distance;
130                                         
131                                                         if(distance < cutoff && j < i){
132                                                                 PCell value(i, j, distance);
133                                                                 D->addCell(value);
134                                                         }
135                                                         index++;
136                                                         reading->update(index);
137                                                 }
138                                         
139                                         }
140                                         else{
141                                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
142                                 
143                                                 for(int j=0;j<nseqs;j++){
144                                                         fileHandle >> distance;
145                                         
146                                                         if(distance < cutoff && j < i){
147                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
148                                                                 D->addCell(value);
149                                                         }
150                                                         index++;
151                                                         reading->update(index);
152                                                 }
153                                         }
154                                 }
155                         }
156                         reading->finish();
157                         delete reading;
158
159                         list->setLabel("0");
160                         fileHandle.close();
161
162                         if(nameMap != NULL){
163                                 for(int i=0;i<matrixNames.size();i++){
164                                         nameMap->erase(matrixNames[i]);
165                                 }
166                                 if(nameMap->size() > 0){
167                                         //should probably tell them what is missing if we missed something
168                                         cout << "missed something" << '\t' << nameMap->size() << endl;
169                                 }
170                         }
171
172                 }
173         catch(exception& e) {
174                 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175                 exit(1);
176         }
177         catch(...) {
178                 cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }
181 }
182
183 /***********************************************************************/
184
185 ReadPhylipMatrix::~ReadPhylipMatrix(){
186         delete D;
187         delete list;
188 }
189
190 /***********************************************************************/
191
192 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
193         
194         successOpen = openInputFile(distFile, fileHandle);
195         
196 }
197
198 /***********************************************************************/
199
200 void ReadColumnMatrix::read(NameAssignment* nameMap){
201         try {           
202         
203                         string firstName, secondName;
204                         float distance;
205                         int nseqs = nameMap->size();
206
207                         list = new ListVector(nameMap->getListVector());
208                 
209                         Progress* reading = new Progress("Reading matrix:    ", nseqs * nseqs);
210         
211                         int lt = 1;
212                         int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
213                         int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
214         
215                         //need to see if this is a square or a triangular matrix...
216                         while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
217                         
218                                 fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
219                 
220                                 if(nameMap->count(firstName)==0){
221                                         cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
222                                 }
223                                 if(nameMap->count(secondName)==0){
224                                         cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
225                                 }
226                 
227                                 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
228                                         if(nameMap->get(firstName) > nameMap->get(secondName)){
229                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
230                                 
231                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
232                                                         refRow = nameMap->get(firstName);
233                                                         refCol = nameMap->get(secondName);
234                                                         D->addCell(value);
235                                                 }
236                                                 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
237                                                         lt = 0;
238                                                 }
239                                                 else{
240                                                         D->addCell(value);
241                                                 }
242                                         }
243                                         else if(nameMap->get(firstName) < nameMap->get(secondName)){
244                                                 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
245                                 
246                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
247                                                         refRow = nameMap->get(firstName);
248                                                         refCol = nameMap->get(secondName);
249                                                         D->addCell(value);
250                                                 }
251                                                 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
252                                                         lt = 0;
253                                                 }
254                                                 else{
255                                                         D->addCell(value);
256                                                 }
257                                         }
258                                         reading->update(nameMap->get(firstName) * nseqs);
259                                 }
260                                 gobble(fileHandle);
261                         }
262
263                         if(lt == 0){  // oops, it was square
264                                 fileHandle.close();  //let's start over
265                                 D->clear();  //let's start over
266                            
267                                 openInputFile(distFile, fileHandle);  //let's start over
268
269                                 while(fileHandle){
270                                         fileHandle >> firstName >> secondName >> distance;
271                         
272                                         if(nameMap->count(firstName)==0){
273                                                 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
274                                         }
275                                         if(nameMap->count(secondName)==0){
276                                                 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
277                                         }
278                         
279                                         if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
280                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
281                                                 D->addCell(value);
282                                                 reading->update(nameMap->get(firstName) * nseqs);
283                                         }
284                         
285                                         gobble(fileHandle);
286                                 }
287                         }
288                 //      else if(lt == 0){
289                 //              while(fileHandle){
290                 //                      fileHandle >> firstName >> secondName >> distance;
291                 //                      
292                 //                      if(nameMap->count(firstName)==0){
293                 //                              cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
294                 //                      }
295                 //                      if(nameMap->count(secondName)==0){
296                 //                              cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
297                 //                      }
298                 //                      
299                 //                      if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
300                 ////                            cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
301                 //                              D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
302                 //                              reading->update((*nameMap)[secondName].second * nseqs);
303                 //                      }
304                 //
305                 //                      gobble(fileHandle);
306                 //              }
307                 //      }       
308                         reading->finish();
309                         fileHandle.close();
310         
311                         list->setLabel("0");
312         
313         }
314         catch(exception& e) {
315                 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
316                 exit(1);
317         }
318         catch(...) {
319                 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
320                 exit(1);
321         }
322
323 }
324
325 /***********************************************************************/
326
327 ReadColumnMatrix::~ReadColumnMatrix(){
328         delete D;
329         delete list;
330 }
331
332
333 /***********************************************************************/
334
335 ReadPhilFile::ReadPhilFile(string pf): philFile(pf){
336         
337         successOpen = openInputFile(philFile, fileHandle);
338         
339 }
340
341 /***********************************************************************/
342 //This function reads the list, rabund or sabund files to be used by collect and rarefact command.
343 void ReadPhilFile::read(GlobalData* globaldata){
344         try {
345                 if (globaldata->getOrderFile() == "") {
346                         //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.  
347                         //So when you run the collect or summary commands you miss a line.
348                         input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
349                         inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
350                 }else {//there is an orderfile
351                         input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
352                 }
353                 globaldata->ginput = input;     //saving to be used by collector and rarefact commands.
354                 
355                 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
356                         order = input->getOrderVector();
357                         globaldata->gorder = order;     //saving to be used by collect and rarefact commands.
358                         sabund = inputSabund->getSAbundVector(); 
359                         globaldata->sabund = sabund; //saving to be used by summary command.
360                 }else if (globaldata->getFormat() == "shared") {
361                         SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
362                         globaldata->gSharedList = SharedList;
363                 }
364         }
365         catch(exception& e) {
366                 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhilFile class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
367                 exit(1);
368         }
369         catch(...) {
370                 cout << "An unknown error has occurred in the ReadPhilFile class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
371                 exit(1);
372         }
373 }
374
375 /***********************************************************************/
376
377 ReadPhilFile::~ReadPhilFile(){
378 //      delete input;
379 //      delete order;
380 }
381
382 /***********************************************************************/
383