]> git.donarmstrong.com Git - mothur.git/blob - readmatrix.cpp
fixed bug with get.oturep and fixed problem with errorcheckor that did not allow...
[mothur.git] / readmatrix.cpp
1 /*
2  *  readmatrix.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/13/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include "sparsematrix.hpp"
13 #include "progress.hpp"
14 #include "listvector.hpp"
15 #include "rabundvector.hpp"
16 #include "readmatrix.hpp"
17
18
19 /***********************************************************************/
20
21 ReadPhylipMatrix::ReadPhylipMatrix(string distFile){
22         
23         successOpen = openInputFile(distFile, fileHandle);
24         
25 }
26
27 /***********************************************************************/
28
29 void ReadPhylipMatrix::read(NameAssignment* nameMap){
30         try {
31         
32                         float distance;
33                         int square, nseqs;
34                         string name;
35                         vector<string> matrixNames;
36         
37                         fileHandle >> nseqs >> name;
38
39                         matrixNames.push_back(name);
40
41                         if(nameMap == NULL){
42                                 list = new ListVector(nseqs);
43                                 list->set(0, name);
44                         }
45                         else{
46                                 list = new ListVector(nameMap->getListVector());
47                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
48                         }
49         
50                         char d;
51                         while((d=fileHandle.get()) != EOF){
52                 
53                                 if(isalnum(d)){
54                                         square = 1;
55                                         fileHandle.putback(d);
56                                         for(int i=0;i<nseqs;i++){
57                                                 fileHandle >> distance;
58                                         }
59                                         break;
60                                 }
61                                 if(d == '\n'){
62                                         square = 0;
63                                         break;
64                                 }
65                         }
66         
67                         Progress* reading;
68         
69                         if(square == 0){
70
71                                 reading = new Progress("Reading matrix:     ", nseqs * (nseqs - 1) / 2);
72                 
73                                 int     index = 0;
74                 
75                                 for(int i=1;i<nseqs;i++){
76                                         fileHandle >> name;
77                                         matrixNames.push_back(name);
78         
79                                         //there's A LOT of repeated code throughout this method...
80                                         if(nameMap == NULL){
81                                                 list->set(i, name);
82                                         
83                                                 for(int j=0;j<i;j++){
84                                                         fileHandle >> distance;
85                                                 
86                                                         if(distance < cutoff){
87                                                                 PCell value(i, j, distance);
88                                                                 D->addCell(value);
89                                                         }
90                                                         index++;
91                                                         reading->update(index);
92                                                 }
93                                 
94                                         }
95                                         else{
96                                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
97                                 
98                                                 for(int j=0;j<i;j++){
99                                                         fileHandle >> distance;
100                                                 
101                                                         if(distance < cutoff){
102                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
103                                                                 D->addCell(value);
104                                                         }
105                                                         index++;
106                                                         reading->update(index);
107                                                 }
108                                         }
109                                 }
110                         }
111                         else{
112
113                                 reading = new Progress("Reading matrix:     ", nseqs * nseqs);
114                         
115                                 int index = nseqs;
116                 
117                                 for(int i=1;i<nseqs;i++){
118                                         fileHandle >> name;             
119                                         matrixNames.push_back(name);
120         
121                                         if(nameMap == NULL){
122                                                 list->set(i, name);
123                                                 for(int j=0;j<nseqs;j++){
124                                                         fileHandle >> distance;
125                                         
126                                                         if(distance < cutoff && j < i){
127                                                                 PCell value(i, j, distance);
128                                                                 D->addCell(value);
129                                                         }
130                                                         index++;
131                                                         reading->update(index);
132                                                 }
133                                         
134                                         }
135                                         else{
136                                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
137                                 
138                                                 for(int j=0;j<nseqs;j++){
139                                                         fileHandle >> distance;
140                                         
141                                                         if(distance < cutoff && j < i){
142                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
143                                                                 D->addCell(value);
144                                                         }
145                                                         index++;
146                                                         reading->update(index);
147                                                 }
148                                         }
149                                 }
150                         }
151                         reading->finish();
152                         delete reading;
153
154                         list->setLabel("0");
155                         fileHandle.close();
156
157                         if(nameMap != NULL){
158                                 for(int i=0;i<matrixNames.size();i++){
159                                         nameMap->erase(matrixNames[i]);
160                                 }
161                                 if(nameMap->size() > 0){
162                                         //should probably tell them what is missing if we missed something
163                                         cout << "missed something" << '\t' << nameMap->size() << endl;
164                                 }
165                         }
166
167                 }
168         catch(exception& e) {
169                 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
170                 exit(1);
171         }
172         catch(...) {
173                 cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174                 exit(1);
175         }
176 }
177
178 /***********************************************************************/
179
180 ReadPhylipMatrix::~ReadPhylipMatrix(){
181         delete D;
182         delete list;
183 }
184
185 /***********************************************************************/
186
187 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
188         
189         successOpen = openInputFile(distFile, fileHandle);
190         
191 }
192
193 /***********************************************************************/
194
195 void ReadColumnMatrix::read(NameAssignment* nameMap){
196         try {           
197         
198                         string firstName, secondName;
199                         float distance;
200                         int nseqs = nameMap->size();
201
202                         list = new ListVector(nameMap->getListVector());
203                 
204                         Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
205         
206                         int lt = 1;
207                         int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
208                         int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
209         
210                         //need to see if this is a square or a triangular matrix...
211                         while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
212                         
213                                 fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
214                 
215                                 if(nameMap->count(firstName)==0){
216                                         cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
217                                 }
218                                 if(nameMap->count(secondName)==0){
219                                         cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
220                                 }
221                 
222                                 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
223                                         if(nameMap->get(firstName) > nameMap->get(secondName)){
224                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
225                                 
226                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
227                                                         refRow = nameMap->get(firstName);
228                                                         refCol = nameMap->get(secondName);
229                                                         D->addCell(value);
230                                                 }
231                                                 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
232                                                         lt = 0;
233                                                 }
234                                                 else{
235                                                         D->addCell(value);
236                                                 }
237                                         }
238                                         else if(nameMap->get(firstName) < nameMap->get(secondName)){
239                                                 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
240                                 
241                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
242                                                         refRow = nameMap->get(firstName);
243                                                         refCol = nameMap->get(secondName);
244                                                         D->addCell(value);
245                                                 }
246                                                 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
247                                                         lt = 0;
248                                                 }
249                                                 else{
250                                                         D->addCell(value);
251                                                 }
252                                         }
253                                         reading->update(nameMap->get(firstName) * nseqs);
254                                 }
255                                 gobble(fileHandle);
256                         }
257
258                         if(lt == 0){  // oops, it was square
259                                 fileHandle.close();  //let's start over
260                                 D->clear();  //let's start over
261                            
262                                 openInputFile(distFile, fileHandle);  //let's start over
263
264                                 while(fileHandle){
265                                         fileHandle >> firstName >> secondName >> distance;
266                         
267                                         if(nameMap->count(firstName)==0){
268                                                 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
269                                         }
270                                         if(nameMap->count(secondName)==0){
271                                                 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
272                                         }
273                         
274                                         if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
275                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
276                                                 D->addCell(value);
277                                                 reading->update(nameMap->get(firstName) * nseqs);
278                                         }
279                         
280                                         gobble(fileHandle);
281                                 }
282                         }
283                 //      else if(lt == 0){
284                 //              while(fileHandle){
285                 //                      fileHandle >> firstName >> secondName >> distance;
286                 //                      
287                 //                      if(nameMap->count(firstName)==0){
288                 //                              cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
289                 //                      }
290                 //                      if(nameMap->count(secondName)==0){
291                 //                              cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
292                 //                      }
293                 //                      
294                 //                      if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
295                 ////                            cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
296                 //                              D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
297                 //                              reading->update((*nameMap)[secondName].second * nseqs);
298                 //                      }
299                 //
300                 //                      gobble(fileHandle);
301                 //              }
302                 //      }       
303                         reading->finish();
304                         fileHandle.close();
305         
306                         list->setLabel("0");
307         
308         }
309         catch(exception& e) {
310                 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
311                 exit(1);
312         }
313         catch(...) {
314                 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
315                 exit(1);
316         }
317
318 }
319
320 /***********************************************************************/
321
322 ReadColumnMatrix::~ReadColumnMatrix(){
323         delete D;
324         delete list;
325 }
326
327
328 /***********************************************************************/
329
330 ReadPhilFile::ReadPhilFile(string pf): philFile(pf){
331         
332         successOpen = openInputFile(philFile, fileHandle);
333         
334 }
335
336 /***********************************************************************/
337 //This function reads the list, rabund or sabund files to be used by collect and rarefact command.
338 void ReadPhilFile::read(GlobalData* globaldata){
339         try {
340                 if (globaldata->getOrderFile() == "") {
341                         //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.  
342                         //So when you run the collect or summary commands you miss a line.
343                         input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
344                         inputList = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
345                         inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
346                 }else {//there is an orderfile
347                         input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
348                 }
349                 
350                 //memory leak prevention
351                 //if (globaldata->ginput != NULL) { delete globaldata->ginput;  }
352                 globaldata->ginput = input;     //saving to be used by collector and rarefact commands.
353                 
354                 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
355                         order = input->getOrderVector();
356                         //memory leak prevention
357                         //if (globaldata->gorder != NULL) { delete globaldata->gorder;  }
358                         globaldata->gorder = order;     //saving to be used by collect and rarefact commands.
359                         sabund = inputSabund->getSAbundVector(); 
360                         globaldata->sabund = sabund; //saving to be used by summary command.
361                         list = inputList->getListVector();
362                         globaldata->gListVector = list;
363                 }else if (globaldata->getFormat() == "shared") {
364                         SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
365                         //memory leak prevention
366                         //if (globaldata->gSharedList != NULL) { delete globaldata->gSharedList;  }
367                         globaldata->gSharedList = SharedList;
368                 }
369         }
370         catch(exception& e) {
371                 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhilFile class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
372                 exit(1);
373         }
374         catch(...) {
375                 cout << "An unknown error has occurred in the ReadPhilFile class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
376                 exit(1);
377         }
378 }
379
380 /***********************************************************************/
381
382 ReadPhilFile::~ReadPhilFile(){
383 //      delete input;
384 //      delete order;
385 }
386
387 /***********************************************************************/
388