]> git.donarmstrong.com Git - mothur.git/blobdiff - readcolumn.cpp
pat's mods to morisitahorn and pre.cluster
[mothur.git] / readcolumn.cpp
index 59c3bd5520929f40086279d744c666ab18fe6d8b..6b8892b6eb1068563e8cf69e30b0274e07e0b15a 100644 (file)
-/*
- *  readcolumn.cpp
- *  Mothur
- *
- *  Created by Sarah Westcott on 4/21/09.
- *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "readcolumn.h"
-#include "progress.hpp"
-
-/***********************************************************************/
-
-ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
-       
-       successOpen = openInputFile(distFile, fileHandle);
-       
-}
-
-/***********************************************************************/
-
-void ReadColumnMatrix::read(NameAssignment* nameMap){
-       try {           
-       
-                       string firstName, secondName;
-                       float distance;
-                       int nseqs = nameMap->size();
-
-                       list = new ListVector(nameMap->getListVector());
-               
-                       Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
-       
-                       int lt = 1;
-                       int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
-                       int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
-       
-                       //need to see if this is a square or a triangular matrix...
-                       while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
-                       
-                               fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
-               
-                               if(nameMap->count(firstName)==0){
-                                       cerr << "AError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-                               }
-                               if(nameMap->count(secondName)==0){
-                                       cerr << "AError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-                               }
-                               
-                               if (distance == -1) { distance = 1000000; }
-                               
-                               if(distance < cutoff && nameMap->get(firstName) != nameMap->get(secondName)){
-                                       if(nameMap->get(firstName) > nameMap->get(secondName)){
-                                               PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
-                               
-                                               if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
-                                                       refRow = nameMap->get(firstName);
-                                                       refCol = nameMap->get(secondName);
-                                                       D->addCell(value);
-                                               }
-                                               else if(refRow == nameMap->get(firstName) && refCol == nameMap->get(secondName)){
-                                                       lt = 0;
-                                               }
-                                               else{
-                                                       D->addCell(value);
-                                               }
-                                       }
-                                       else if(nameMap->get(firstName) < nameMap->get(secondName)){
-                                               PCell value(nameMap->get(secondName), nameMap->get(firstName), distance);
-                               
-                                               if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
-                                                       refRow = nameMap->get(firstName);
-                                                       refCol = nameMap->get(secondName);
-                                                       D->addCell(value);
-                                               }
-                                               else if(refRow == nameMap->get(secondName) && refCol == nameMap->get(firstName)){
-                                                       lt = 0;
-                                               }
-                                               else{
-                                                       D->addCell(value);
-                                               }
-                                       }
-                                       reading->update(nameMap->get(firstName) * nseqs);
-                               }
-                               gobble(fileHandle);
-                       }
-
-                       if(lt == 0){  // oops, it was square
-                               fileHandle.close();  //let's start over
-                               D->clear();  //let's start over
-                          
-                               openInputFile(distFile, fileHandle);  //let's start over
-
-                               while(fileHandle){
-                                       fileHandle >> firstName >> secondName >> distance;
-                       
-                                       if(nameMap->count(firstName)==0){
-                                               cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-                                       }
-                                       if(nameMap->count(secondName)==0){
-                                               cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-                                       }
-                                       
-                                       if (distance == -1) { distance = 1000000; }
-                                       
-                                       if(distance < cutoff && nameMap->get(firstName) > nameMap->get(secondName)){
-                                               PCell value(nameMap->get(firstName), nameMap->get(secondName), distance);
-                                               D->addCell(value);
-                                               reading->update(nameMap->get(firstName) * nseqs);
-                                       }
-                       
-                                       gobble(fileHandle);
-                               }
-                       }
-               //      else if(lt == 0){
-               //              while(fileHandle){
-               //                      fileHandle >> firstName >> secondName >> distance;
-               //                      
-               //                      if(nameMap->count(firstName)==0){
-               //                              cerr << "CError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-               //                      }
-               //                      if(nameMap->count(secondName)==0){
-               //                              cerr << "CError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-               //                      }
-               //                      if (distance == -1) { distance = 1000000; }
-               
-               //                      if(distance < cutoff && (*nameMap)[firstName].second < (*nameMap)[secondName].second){
-               ////                            cout << (*nameMap)[secondName] << ' ' << (*nameMap)[firstName] << ' ' << distance << endl;
-               //                              D->addCell(Cell((*nameMap)[secondName].second, (*nameMap)[firstName].second, distance));
-               //                              reading->update((*nameMap)[secondName].second * nseqs);
-               //                      }
-               //
-               //                      gobble(fileHandle);
-               //              }
-               //      }       
-                       reading->finish();
-                       fileHandle.close();
-       
-                       list->setLabel("0");
-       
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadColumnMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadColumnMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-
-}
-
-/***********************************************************************/
-
-ReadColumnMatrix::~ReadColumnMatrix(){
-       //delete D;
-       //delete list;
-}
-
-
+/*\r
+ *  readcolumn.cpp\r
+ *  Mothur\r
+ *\r
+ *  Created by Sarah Westcott on 4/21/09.\r
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.\r
+ *\r
+ */\r
+\r
+#include "readcolumn.h"\r
+#include "progress.hpp"\r
+\r
+/***********************************************************************/\r
+\r
+ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){\r
+       \r
+       successOpen = openInputFile(distFile, fileHandle);\r
+       \r
+}\r
+\r
+/***********************************************************************/\r
+\r
+void ReadColumnMatrix::read(NameAssignment* nameMap){\r
+       try {           \r
+\r
+               string firstName, secondName;\r
+               float distance;\r
+               int nseqs = nameMap->size();\r
+\r
+               list = new ListVector(nameMap->getListVector());\r
+       \r
+               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);\r
+\r
+               int lt = 1;\r
+               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose\r
+               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix\r
+\r
+               //need to see if this is a square or a triangular matrix...\r
+       \r
+               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...\r
+               \r
+                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance\r
+       \r
+                       map<string,int>::iterator itA = nameMap->find(firstName);\r
+                       map<string,int>::iterator itB = nameMap->find(secondName);\r
+                       \r
+                       if(itA == nameMap->end()){\r
+                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);\r
+                       }\r
+                       if(itB == nameMap->end()){\r
+                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);\r
+                       }\r
+\r
+                       if (distance == -1) { distance = 1000000; }\r
+                       \r
+                       if(distance < cutoff && itA != itB){\r
+                               if(itA->second > itB->second){\r
+                                       PCell value(itA->second, itB->second, distance);\r
+                       \r
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
+                                               refRow = itA->second;\r
+                                               refCol = itB->second;\r
+                                               D->addCell(value);\r
+                                       }\r
+                                       else if(refRow == itA->second && refCol == itB->second){\r
+                                               lt = 0;\r
+                                       }\r
+                                       else{\r
+                                               D->addCell(value);\r
+                                       }\r
+                               }\r
+                               else if(itA->second < itB->second){\r
+                                       PCell value(itB->second, itA->second, distance);\r
+                       \r
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
+                                               refRow = itA->second;\r
+                                               refCol = itB->second;\r
+                                               D->addCell(value);\r
+                                       }\r
+                                       else if(refRow == itB->second && refCol == itA->second){\r
+                                               lt = 0;\r
+                                       }\r
+                                       else{\r
+                                               D->addCell(value);\r
+                                       }\r
+                               }\r
+                               reading->update(itA->second * nseqs);\r
+                       }\r
+                       gobble(fileHandle);\r
+               }\r
+\r
+               if(lt == 0){  // oops, it was square\r
+                       fileHandle.close();  //let's start over\r
+                       D->clear();  //let's start over\r
+                  \r
+                       openInputFile(distFile, fileHandle);  //let's start over\r
+\r
+                       while(fileHandle){\r
+                               fileHandle >> firstName >> secondName >> distance;\r
+               \r
+                               map<string,int>::iterator itA = nameMap->find(firstName);\r
+                               map<string,int>::iterator itB = nameMap->find(secondName);\r
+                               \r
+                               if(itA == nameMap->end()){\r
+                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
+                               }\r
+                               if(itB == nameMap->end()){\r
+                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
+                               }\r
+                               \r
+                               if (distance == -1) { distance = 1000000; }\r
+                               \r
+                               if(distance < cutoff && itA->second > itB->second){\r
+                                       PCell value(itA->second, itB->second, distance);\r
+                                       D->addCell(value);\r
+                                       reading->update(itA->second * nseqs);\r
+                               }\r
+               \r
+                               gobble(fileHandle);\r
+                       }\r
+               }\r
+\r
+               reading->finish();\r
+               fileHandle.close();\r
+\r
+               list->setLabel("0");\r
+\r
+       }\r
+       catch(exception& e) {\r
+               errorOut(e, "ReadColumnMatrix", "read");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+/***********************************************************************/\r
+\r
+ReadColumnMatrix::~ReadColumnMatrix(){\r
+       //delete D;\r
+       //delete list;\r
+}\r
+\r
+\r