]> git.donarmstrong.com Git - mothur.git/blobdiff - readcolumn.cpp
added hcluster command and fixed some bugs, namely one with smart distancing.
[mothur.git] / readcolumn.cpp
index 29c967e0cbed51e375d21e0e755f14b7d1c7bd0d..fccc2cd06f8b045e48e4516229fc58d4ebe3e30d 100644 (file)
-/*
- *  readcolumn.cpp
- *  Mothur
- *
- *  Created by Sarah Westcott on 4/21/09.
- *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
- *
- */
-
-#include "readcolumn.h"
-#include "progress.hpp"
-
-/***********************************************************************/
-
-ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){
-       
-       successOpen = openInputFile(distFile, fileHandle);
-       
-}
-
-/***********************************************************************/
-
-void ReadColumnMatrix::read(NameAssignment* nameMap){
-       try {           
-
-               string firstName, secondName;
-               float distance;
-               int nseqs = nameMap->size();
-
-               list = new ListVector(nameMap->getListVector());
-       
-               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);
-
-               int lt = 1;
-               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose
-               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix
-
-               //need to see if this is a square or a triangular matrix...
-       
-               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...
-               
-                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance
-       
-                       map<string,int>::iterator itA = nameMap->find(firstName);
-                       map<string,int>::iterator itB = nameMap->find(secondName);
-                       
-                       if(itA == nameMap->end()){
-                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-                       }
-                       if(itB == nameMap->end()){
-                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-                       }
-
-                       if (distance == -1) { distance = 1000000; }
-                       
-                       if(distance < cutoff && itA != itB){
-                               if(itA->second > itB->second){
-                                       PCell value(itA->second, itB->second, distance);
-                       
-                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
-                                               refRow = itA->second;
-                                               refCol = itB->second;
-                                               D->addCell(value);
-                                       }
-                                       else if(refRow == itA->second && refCol == itB->second){
-                                               lt = 0;
-                                       }
-                                       else{
-                                               D->addCell(value);
-                                       }
-                               }
-                               else if(itA->second < itB->second){
-                                       PCell value(itB->second, itA->second, distance);
-                       
-                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...
-                                               refRow = itA->second;
-                                               refCol = itB->second;
-                                               D->addCell(value);
-                                       }
-                                       else if(refRow == itB->second && refCol == itA->second){
-                                               lt = 0;
-                                       }
-                                       else{
-                                               D->addCell(value);
-                                       }
-                               }
-                               reading->update(itA->second * nseqs);
-                       }
-                       gobble(fileHandle);
-               }
-
-               if(lt == 0){  // oops, it was square
-                       fileHandle.close();  //let's start over
-                       D->clear();  //let's start over
-                  
-                       openInputFile(distFile, fileHandle);  //let's start over
-
-                       while(fileHandle){
-                               fileHandle >> firstName >> secondName >> distance;
-               
-                               map<string,int>::iterator itA = nameMap->find(firstName);
-                               map<string,int>::iterator itB = nameMap->find(secondName);
-                               
-                               if(itA == nameMap->end()){
-                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";
-                               }
-                               if(itB == nameMap->end()){
-                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";
-                               }
-                               
-                               if (distance == -1) { distance = 1000000; }
-                               
-                               if(distance < cutoff && itA->second > itB->second){
-                                       PCell value(itA->second, itB->second, distance);
-                                       D->addCell(value);
-                                       reading->update(itA->second * nseqs);
-                               }
-               
-                               gobble(fileHandle);
-                       }
-               }
-
-               reading->finish();
-               fileHandle.close();
-
-               list->setLabel("0");
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ReadColumnMatrix", "read");
-               exit(1);
-       }
-}
-
-/***********************************************************************/
-
-ReadColumnMatrix::~ReadColumnMatrix(){
-       //delete D;
-       //delete list;
-}
-
-
+/*\r
+ *  readcolumn.cpp\r
+ *  Mothur\r
+ *\r
+ *  Created by Sarah Westcott on 4/21/09.\r
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.\r
+ *\r
+ */\r
+\r
+#include "readcolumn.h"\r
+#include "progress.hpp"\r
+\r
+/***********************************************************************/\r
+\r
+ReadColumnMatrix::ReadColumnMatrix(string df) : distFile(df){\r
+       \r
+       successOpen = openInputFile(distFile, fileHandle);\r
+       \r
+}\r
+\r
+/***********************************************************************/\r
+\r
+void ReadColumnMatrix::read(NameAssignment* nameMap){\r
+       try {           \r
+\r
+               string firstName, secondName;\r
+               float distance;\r
+               int nseqs = nameMap->size();\r
+\r
+               list = new ListVector(nameMap->getListVector());\r
+       \r
+               Progress* reading = new Progress("Reading matrix:     ", nseqs * nseqs);\r
+\r
+               int lt = 1;\r
+               int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose\r
+               int refCol = 0; //shows up later - Cell(refCol,refRow).  If it does, then its a square matrix\r
+\r
+               //need to see if this is a square or a triangular matrix...\r
+       \r
+               while(fileHandle && lt == 1){  //let's assume it's a triangular matrix...\r
+               \r
+                       fileHandle >> firstName >> secondName >> distance;      // get the row and column names and distance\r
+       \r
+                       map<string,int>::iterator itA = nameMap->find(firstName);\r
+                       map<string,int>::iterator itB = nameMap->find(secondName);\r
+                       \r
+                       if(itA == nameMap->end()){\r
+                               cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
+                       }\r
+                       if(itB == nameMap->end()){\r
+                               cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
+                       }\r
+\r
+                       if (distance == -1) { distance = 1000000; }\r
+                       \r
+                       if(distance < cutoff && itA != itB){\r
+                               if(itA->second > itB->second){\r
+                                       PCell value(itA->second, itB->second, distance);\r
+                       \r
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
+                                               refRow = itA->second;\r
+                                               refCol = itB->second;\r
+                                               D->addCell(value);\r
+                                       }\r
+                                       else if(refRow == itA->second && refCol == itB->second){\r
+                                               lt = 0;\r
+                                       }\r
+                                       else{\r
+                                               D->addCell(value);\r
+                                       }\r
+                               }\r
+                               else if(itA->second < itB->second){\r
+                                       PCell value(itB->second, itA->second, distance);\r
+                       \r
+                                       if(refRow == refCol){           // in other words, if we haven't loaded refRow and refCol...\r
+                                               refRow = itA->second;\r
+                                               refCol = itB->second;\r
+                                               D->addCell(value);\r
+                                       }\r
+                                       else if(refRow == itB->second && refCol == itA->second){\r
+                                               lt = 0;\r
+                                       }\r
+                                       else{\r
+                                               D->addCell(value);\r
+                                       }\r
+                               }\r
+                               reading->update(itA->second * nseqs);\r
+                       }\r
+                       gobble(fileHandle);\r
+               }\r
+\r
+               if(lt == 0){  // oops, it was square\r
+                       fileHandle.close();  //let's start over\r
+                       D->clear();  //let's start over\r
+                  \r
+                       openInputFile(distFile, fileHandle);  //let's start over\r
+\r
+                       while(fileHandle){\r
+                               fileHandle >> firstName >> secondName >> distance;\r
+               \r
+                               map<string,int>::iterator itA = nameMap->find(firstName);\r
+                               map<string,int>::iterator itB = nameMap->find(secondName);\r
+                               \r
+                               if(itA == nameMap->end()){\r
+                                       cerr << "BError: Sequence '" << firstName << "' was not found in the names file, please correct\n";\r
+                               }\r
+                               if(itB == nameMap->end()){\r
+                                       cerr << "BError: Sequence '" << secondName << "' was not found in the names file, please correct\n";\r
+                               }\r
+                               \r
+                               if (distance == -1) { distance = 1000000; }\r
+                               \r
+                               if(distance < cutoff && itA->second > itB->second){\r
+                                       PCell value(itA->second, itB->second, distance);\r
+                                       D->addCell(value);\r
+                                       reading->update(itA->second * nseqs);\r
+                               }\r
+               \r
+                               gobble(fileHandle);\r
+                       }\r
+               }\r
+\r
+               reading->finish();\r
+               fileHandle.close();\r
+\r
+               list->setLabel("0");\r
+\r
+       }\r
+       catch(exception& e) {\r
+               errorOut(e, "ReadColumnMatrix", "read");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+/***********************************************************************/\r
+\r
+ReadColumnMatrix::~ReadColumnMatrix(){\r
+       //delete D;\r
+       //delete list;\r
+}\r
+\r
+\r