]> git.donarmstrong.com Git - mothur.git/blob - readmatrix.cpp
put back no command and worked on libshuff
[mothur.git] / readmatrix.cpp
1 /*
2  *  readmatrix.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/13/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include "utilities.hpp"
13 #include "sparsematrix.hpp"
14 #include "progress.hpp"
15 #include "listvector.hpp"
16 #include "rabundvector.hpp"
17 #include "readmatrix.hpp"
18
19
20 /***********************************************************************/
21
22 ReadPhylipMatrix::ReadPhylipMatrix(string distFile){
23         
24         successOpen = openInputFile(distFile, fileHandle);
25         
26 }
27
28 /***********************************************************************/
29
30 void ReadPhylipMatrix::read(NameAssignment* nameMap){
31         try {
32         
33                         float distance;
34                         int square, nseqs;
35                         string name;
36                         vector<string> matrixNames;
37         
38                         fileHandle >> nseqs >> name;
39
40                         matrixNames.push_back(name);
41
42                         if(nameMap == NULL){
43                                 list = new ListVector(nseqs);
44                                 list->set(0, name);
45                         }
46                         else{
47                                 list = new ListVector(nameMap->getListVector());
48                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
49                         }
50         
51                         char d;
52                         while((d=fileHandle.get()) != EOF){
53                 
54                                 if(isalnum(d)){
55                                         square = 1;
56                                         fileHandle.putback(d);
57                                         for(int i=0;i<nseqs;i++){
58                                                 fileHandle >> distance;
59                                         }
60                                         break;
61                                 }
62                                 if(d == '\n'){
63                                         square = 0;
64                                         break;
65                                 }
66                         }
67         
68                         Progress* reading;
69         
70                         if(square == 0){
71
72                                 reading = new Progress("Reading matrix:     ", nseqs * (nseqs - 1) / 2);
73                 
74                                 int     index = 0;
75                 
76                                 for(int i=1;i<nseqs;i++){
77                                         fileHandle >> name;
78                                         matrixNames.push_back(name);
79         
80                                         //there's A LOT of repeated code throughout this method...
81                                         if(nameMap == NULL){
82                                                 list->set(i, name);
83                                         
84                                                 for(int j=0;j<i;j++){
85                                                         fileHandle >> distance;
86                                                 
87                                                         if(distance < cutoff){
88                                                                 PCell value(i, j, distance);
89                                                                 D->addCell(value);
90                                                         }
91                                                         index++;
92                                                         reading->update(index);
93                                                 }
94                                 
95                                         }
96                                         else{
97                                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
98                                 
99                                                 for(int j=0;j<i;j++){
100                                                         fileHandle >> distance;
101                                                 
102                                                         if(distance < cutoff){
103                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
104                                                                 D->addCell(value);
105                                                         }
106                                                         index++;
107                                                         reading->update(index);
108                                                 }
109                                         }
110                                 }
111                         }
112                         else{
113
114                                 reading = new Progress("Reading matrix:     ", nseqs * nseqs);
115                         
116                                 int index = nseqs;
117                 
118                                 for(int i=1;i<nseqs;i++){
119                                         fileHandle >> name;             
120                                         matrixNames.push_back(name);
121         
122                                         if(nameMap == NULL){
123                                                 list->set(i, name);
124                                                 for(int j=0;j<nseqs;j++){
125                                                         fileHandle >> distance;
126                                         
127                                                         if(distance < cutoff && j < i){
128                                                                 PCell value(i, j, distance);
129                                                                 D->addCell(value);
130                                                         }
131                                                         index++;
132                                                         reading->update(index);
133                                                 }
134                                         
135                                         }
136                                         else{
137                                                 if(nameMap->count(name)==0){    cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; }
138                                 
139                                                 for(int j=0;j<nseqs;j++){
140                                                         fileHandle >> distance;
141                                         
142                                                         if(distance < cutoff && j < i){
143                                                                 PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance);
144                                                                 D->addCell(value);
145                                                         }
146                                                         index++;
147                                                         reading->update(index);
148                                                 }
149                                         }
150                                 }
151                         }
152                         reading->finish();
153                         delete reading;
154
155                         list->setLabel("0");
156                         fileHandle.close();
157
158                         if(nameMap != NULL){
159                                 for(int i=0;i<matrixNames.size();i++){
160                                         nameMap->erase(matrixNames[i]);
161                                 }
162                                 if(nameMap->size() > 0){
163                                         //should probably tell them what is missing if we missed something
164                                         cout << "missed something" << '\t' << nameMap->size() << endl;
165                                 }
166                         }
167
168                 }
169         catch(exception& e) {
170                 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
171                 exit(1);
172         }
173         catch(...) {
174                 cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175                 exit(1);
176         }
177 }
178
179 /***********************************************************************/
180
181 ReadPhylipMatrix::~ReadPhylipMatrix(){
182         delete D;
183         delete list;
184 }
185
186 /***********************************************************************/
187
188 ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
189         
190         successOpen = openInputFile(distFile, fileHandle);
191         
192 }
193
194 /***********************************************************************/
195
196 void ReadColumnMatrix::read(NameAssignment* nameMap){
197         try {           
198         
199                         string firstName, secondName;
200                         float distance;
201                         int nseqs = nameMap->size();
202
203                         list = new ListVector(nameMap->getListVector());
204                 
205                         Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
206         
207                         int lt = 1;
208                         int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
209                         int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
210         
211                         //need to see if this is a square or a triangular matrix...
212                         while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
213                         
214                                 fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
215                 
216                                 if(nameMap->count(firstName)==0){
217                                         cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
218                                 }
219                                 if(nameMap->count(secondName)==0){
220                                         cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
221                                 }
222                 
223                                 if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
224                                         if(nameMap->get(firstName) > nameMap->get(secondName)){
225                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
226                                 
227                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
228                                                         refRow = nameMap->get(firstName);
229                                                         refCol = nameMap->get(secondName);
230                                                         D->addCell(value);
231                                                 }
232                                                 else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
233                                                         lt = 0;
234                                                 }
235                                                 else{
236                                                         D->addCell(value);
237                                                 }
238                                         }
239                                         else if(nameMap->get(firstName) < nameMap->get(secondName)){
240                                                 PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
241                                 
242                                                 if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
243                                                         refRow = nameMap->get(firstName);
244                                                         refCol = nameMap->get(secondName);
245                                                         D->addCell(value);
246                                                 }
247                                                 else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
248                                                         lt = 0;
249                                                 }
250                                                 else{
251                                                         D->addCell(value);
252                                                 }
253                                         }
254                                         reading->update(nameMap->get(firstName) * nseqs);
255                                 }
256                                 gobble(fileHandle);
257                         }
258
259                         if(lt == 0){  // oops, it was square
260                                 fileHandle.close();  //let's start over
261                                 D->clear();  //let's start over
262                            
263                                 openInputFile(distFile, fileHandle);  //let's start over
264
265                                 while(fileHandle){
266                                         fileHandle >> firstName >> secondName >> distance;
267                         
268                                         if(nameMap->count(firstName)==0){
269                                                 cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
270                                         }
271                                         if(nameMap->count(secondName)==0){
272                                                 cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
273                                         }
274                         
275                                         if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
276                                                 PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
277                                                 D->addCell(value);
278                                                 reading->update(nameMap->get(firstName) * nseqs);
279                                         }
280                         
281                                         gobble(fileHandle);
282                                 }
283                         }
284                 //      else if(lt == 0){
285                 //              while(fileHandle){
286                 //                      fileHandle >> firstName >> secondName >> distance;
287                 //                      
288                 //                      if(nameMap->count(firstName)==0){
289                 //                              cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
290                 //                      }
291                 //                      if(nameMap->count(secondName)==0){
292                 //                              cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
293                 //                      }
294                 //                      
295                 //                      if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
296                 ////                            cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
297                 //                              D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
298                 //                              reading->update((*nameMap)[secondName].second * nseqs);
299                 //                      }
300                 //
301                 //                      gobble(fileHandle);
302                 //              }
303                 //      }       
304                         reading->finish();
305                         fileHandle.close();
306         
307                         list->setLabel("0");
308         
309         }
310         catch(exception& e) {
311                 cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
312                 exit(1);
313         }
314         catch(...) {
315                 cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
316                 exit(1);
317         }
318
319 }
320
321 /***********************************************************************/
322
323 ReadColumnMatrix::~ReadColumnMatrix(){
324         delete D;
325         delete list;
326 }
327
328
329 /***********************************************************************/
330
331 ReadPhilFile::ReadPhilFile(string pf): philFile(pf){
332         
333         successOpen = openInputFile(philFile, fileHandle);
334         
335 }
336
337 /***********************************************************************/
338 //This function reads the list, rabund or sabund files to be used by collect and rarefact command.
339 void ReadPhilFile::read(GlobalData* globaldata){
340         try {
341                 if (globaldata->getOrderFile() == "") {
342                         //you have two inputs because in the next if statement if you only have one then it moves ahead in the same file.  
343                         //So when you run the collect or summary commands you miss a line.
344                         input = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund.
345                         inputSabund = new InputData(philFile, globaldata->getFormat()); //format tells you whether philFile is list, rabund, sabund or shared.
346                 }else {//there is an orderfile
347                         input = new InputData(philFile, globaldata->getOrderFile(), globaldata->getFormat());
348                 }
349                 
350                 //memory leak prevention
351                 //if (globaldata->ginput != NULL) { delete globaldata->ginput;  }
352                 globaldata->ginput = input;     //saving to be used by collector and rarefact commands.
353                 
354                 if ((globaldata->getFormat() == "list") || (globaldata->getFormat() == "rabund") || (globaldata->getFormat() == "sabund")) {//you are reading a list, rabund or sabund file for collect, rarefaction or summary.
355                         order = input->getOrderVector();
356                         //memory leak prevention
357                         //if (globaldata->gorder != NULL) { delete globaldata->gorder;  }
358                         globaldata->gorder = order;     //saving to be used by collect and rarefact commands.
359                         sabund = inputSabund->getSAbundVector(); 
360                         globaldata->sabund = sabund; //saving to be used by summary command.
361                 }else if (globaldata->getFormat() == "shared") {
362                         SharedList = input->getSharedListVector(); //you are reading for collect.shared, rarefaction.shared, summary.shared, parselist command, or shared commands.
363                         //memory leak prevention
364                         //if (globaldata->gSharedList != NULL) { delete globaldata->gSharedList;  }
365                         globaldata->gSharedList = SharedList;
366                 }
367         }
368         catch(exception& e) {
369                 cout << "Standard Error: " << e.what() << " has occurred in the ReadPhilFile class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370                 exit(1);
371         }
372         catch(...) {
373                 cout << "An unknown error has occurred in the ReadPhilFile class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
374                 exit(1);
375         }
376 }
377
378 /***********************************************************************/
379
380 ReadPhilFile::~ReadPhilFile(){
381 //      delete input;
382 //      delete order;
383 }
384
385 /***********************************************************************/
386