]> git.donarmstrong.com Git - mothur.git/blobdiff - clusterclassic.cpp
moved mothur's source into a folder to make grabbing just the source easier on github
[mothur.git] / clusterclassic.cpp
diff --git a/clusterclassic.cpp b/clusterclassic.cpp
deleted file mode 100644 (file)
index 1ce81c4..0000000
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- *  clusterclassic.cpp
- *  Mothur
- *
- *  Created by westcott on 10/29/10.
- *  Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#include "clusterclassic.h"
-#include "progress.hpp"
-
-/***********************************************************************/
-ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist(1e6), nseqs(0), sim(s) {
-       try {
-               mapWanted = false;  //set to true by mgcluster to speed up overlap merge
-       
-               //save so you can modify as it changes in average neighbor
-               cutoff = c;
-               aboveCutoff = cutoff + 10000.0;
-               m = MothurOut::getInstance();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "ClusterClassic");
-               exit(1);
-       }
-}
-/***********************************************************************/
-int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
-       try {
-               double distance;
-               int square;
-               string name;
-               vector<string> matrixNames;
-               
-               ifstream fileHandle;
-               m->openInputFile(filename, fileHandle);
-               
-               fileHandle >> nseqs >> name;
-
-               matrixNames.push_back(name);
-
-               if(nameMap == NULL){
-                               list = new ListVector(nseqs);
-                               list->set(0, name);
-               }
-               else{
-                               list = new ListVector(nameMap->getListVector());
-                               if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
-               }
-               
-               //initialize distance matrix to cutoff
-               dMatrix.resize(nseqs);
-               //colDist temp(0, 0, aboveCutoff);
-               //rowSmallDists.resize(nseqs, temp);
-               for (int i = 1; i < nseqs; i++) {                       
-                       dMatrix[i].resize(i, aboveCutoff);              
-               }                                                                                               
-                                                                                                               
-               
-               char d;
-               while((d=fileHandle.get()) != EOF){
-
-                               if(isalnum(d)){
-                                               square = 1;
-                                               fileHandle.putback(d);
-                                               for(int i=0;i<nseqs;i++){
-                                                               fileHandle >> distance;
-                                               }
-                                               break;
-                               }
-                               if(d == '\n'){
-                                               square = 0;
-                                               break;
-                               }
-               }
-
-               Progress* reading;
-
-               if(square == 0){
-
-                               reading = new Progress("Reading matrix:     ", nseqs * (nseqs - 1) / 2);
-
-                               int        index = 0;
-
-                               for(int i=1;i<nseqs;i++){
-                                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
-                                               
-                                               fileHandle >> name;
-                                               matrixNames.push_back(name);
-               
-
-                                               //there's A LOT of repeated code throughout this method...
-                                               if(nameMap == NULL){
-                                                               list->set(i, name);
-                                               
-                                                               for(int j=0;j<i;j++){
-                                                               
-                                                                               if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
-                                                                               
-                                                                               fileHandle >> distance;
-                                                       
-                                                                               if (distance == -1) { distance = 1000000; }
-                                                                               else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
-                                                               
-                                                                               //if(distance < cutoff){
-                                                                                       dMatrix[i][j] = distance;
-                                                                                       if (distance < smallDist) { smallDist = distance; }
-                                                                                       //if (rowSmallDists[i].dist > distance) {  rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
-                                                                                       //if (rowSmallDists[j].dist > distance) {  rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
-                                                                               //}
-                                                                               index++;
-                                                                               reading->update(index);
-                                                               }
-                               
-                                               }
-                                               else{
-                                                               if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
-                               
-                                                               for(int j=0;j<i;j++){
-                                                                               fileHandle >> distance;
-                                                                               
-                                                                               if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
-                               
-                                                                               if (distance == -1) { distance = 1000000; }
-                                                                               else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
-                                                                               
-                                                                               //if(distance < cutoff){
-                                                                                       if (distance < smallDist) { smallDist = distance; }
-                                                                                       
-                                                                                       int row = nameMap->get(matrixNames[i]);
-                                                                                       int col = nameMap->get(matrixNames[j]);
-                                                                                       
-                                                                                       if (row < col) {  dMatrix[col][row] = distance; }
-                                                                                       else { dMatrix[row][col] = distance; }
-                                                                                       
-                                                                                       //if (rowSmallDists[row].dist > distance) {  rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
-                                                                                       //if (rowSmallDists[col].dist > distance) {  rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
-                                                                               //}
-                                                                               index++;
-                                                                               reading->update(index);
-                                                               }
-                                               }
-                               }
-               }
-               else{
-
-                               reading = new Progress("Reading matrix:     ", nseqs * nseqs);
-               
-                               int index = nseqs;
-
-                               for(int i=1;i<nseqs;i++){
-                                               fileHandle >> name;                
-                                               matrixNames.push_back(name);
-                                               
-                                               if(nameMap == NULL){
-                                                               list->set(i, name);
-                                                               for(int j=0;j<nseqs;j++){
-                                                                               fileHandle >> distance;
-                                                                               
-                                                                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
-                                                                               
-                                                                               if (distance == -1) { distance = 1000000; }
-                                                                               else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
-                                                                               
-                                                                               if(j < i){
-                                                                                       if (distance < smallDist) { smallDist = distance; }
-                                                                                       
-                                                                                       dMatrix[i][j] = distance;
-                                                                                       //if (rowSmallDists[i].dist > distance) {  rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
-                                                                                       //if (rowSmallDists[j].dist > distance) {  rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
-                                                                               }
-                                                                               index++;
-                                                                               reading->update(index);
-                                                               }
-                                               
-                                               }
-                                               else{
-                                                               if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
-                               
-                                                               for(int j=0;j<nseqs;j++){
-                                                                               fileHandle >> distance;
-                                                                               
-                                                                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
-                                                                               
-                                                                          if (distance == -1) { distance = 1000000; }
-                                                                               else if (sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.                                                        
-                                                                               
-                                                                               if(j < i){
-                                                                                       if (distance < smallDist) { smallDist = distance; }
-                                                                                       
-                                                                                       int row = nameMap->get(matrixNames[i]);
-                                                                                       int col = nameMap->get(matrixNames[j]);
-                                                                                       
-                                                                                       if (row < col) {  dMatrix[col][row] = distance; }
-                                                                                       else { dMatrix[row][col] = distance; }
-                                                                                       
-                                                                                       //if (rowSmallDists[row].dist > distance) {  rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
-                                                                                       //if (rowSmallDists[col].dist > distance) {  rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
-                                                                               }
-                                                                               index++;
-                                                                               reading->update(index);
-                                                               }
-                                               }
-                               }
-               }
-               
-               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
-               
-               reading->finish();
-               delete reading;
-
-               list->setLabel("0");
-               rabund = new RAbundVector(list->getRAbundVector());
-               
-               fileHandle.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "readPhylipFile");
-               exit(1);
-       }
-
-}
-/***********************************************************************/
-//sets smallCol and smallRow, returns distance
-double ClusterClassic::getSmallCell() {
-       try {
-                       
-               smallDist = aboveCutoff;
-               smallRow = 1;
-               smallCol = 0;
-               
-               vector<colDist> mins;
-               
-               for(int i=1;i<nseqs;i++){
-                       for(int j=0;j<i;j++){ 
-                               if (dMatrix[i][j] < smallDist) {
-                                       mins.clear();
-                                       colDist temp(i, j, dMatrix[i][j]);
-                                       mins.push_back(temp); 
-                                       smallDist = dMatrix[i][j];
-                               }else if (dMatrix[i][j] == smallDist) {
-                                       colDist temp(i, j, dMatrix[i][j]);
-                                       mins.push_back(temp); 
-
-                               }
-                       }
-               }
-               
-               if (mins.size() > 0) {
-                       int zrand = 0;
-                       if (mins.size() > 1) {
-                               //pick random number between 0 and mins.size()
-                               zrand = (int)((float)(rand()) / (RAND_MAX / (mins.size()-1) + 1));
-                       }
-                       
-                       smallRow = mins[zrand].row;
-                       smallCol = mins[zrand].col;
-               
-               }
-       //cout << smallRow << '\t' << smallCol << '\t' << smallDist << endl;
-               //eliminate smallCell
-               if (smallRow < smallCol) { dMatrix[smallCol][smallRow] = aboveCutoff; }
-               else { dMatrix[smallRow][smallCol] = aboveCutoff; }
-               
-               return smallDist;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "getSmallCell");
-               exit(1);
-       }
-}
-/***********************************************************************/
-void ClusterClassic::clusterBins(){
-       try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
-
-               rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol));     
-               rabund->set(smallCol, 0);       
-               /*for (int i = smallCol+1; i < rabund->size(); i++) {
-                       rabund->set((i-1), rabund->get(i));
-               }
-               rabund->resize((rabund->size()-1));*/
-               rabund->setLabel(toString(smallDist));
-
-       //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "clusterBins");
-               exit(1);
-       }
-}
-/***********************************************************************/
-void ClusterClassic::clusterNames(){
-       try {
-       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
-               if (mapWanted) {  updateMap();  }
-               
-               list->set(smallRow, list->get(smallRow)+','+list->get(smallCol));
-               list->set(smallCol, "");        
-               /*for (int i = smallCol+1; i < list->size(); i++) {
-                       list->set((i-1), list->get(i));
-               }
-               list->resize((list->size()-1));*/
-               list->setLabel(toString(smallDist));
-       
-       //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "clusterNames");
-               exit(1);
-       }
-}
-/***********************************************************************/
-void ClusterClassic::update(double& cutOFF){
-       try {
-//print();             
-               getSmallCell();
-               
-               int r, c;
-               r = smallRow; c = smallCol;
-                               
-               for(int i=0;i<nseqs;i++){
-                       if(i != r && i != c){
-                               double distRow, distCol, newDist;
-                               if (i > r) { distRow = dMatrix[i][r]; }
-                               else { distRow =  dMatrix[r][i]; }
-
-                               if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell
-                               else { distCol =  dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; }
-                               
-                               if(method == "furthest"){
-                                       newDist = max(distRow, distCol);
-                               }
-                               else if (method == "average"){
-                                       int rowBin = rabund->get(r);
-                                       int colBin = rabund->get(c);
-                                       newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin);
-                               }
-                               else if (method == "weighted"){
-                                       newDist = (distCol + distRow) / 2.0;
-                               }
-                               else if (method == "nearest"){
-                                       newDist = min(distRow, distCol);
-                               }
-                               //cout << "newDist = " << newDist << endl;      
-                               if (i > r) { dMatrix[i][r] = newDist; }
-                               else { dMatrix[r][i] = newDist; }
-                               
-                       }
-               }
-                       
-               clusterBins();
-               clusterNames();
-               
-               //resize each row
-               /*for(int i=0;i<nseqs;i++){
-                       for(int j=c+1;j<dMatrix[i].size();j++){
-                               dMatrix[i][j-1]=dMatrix[i][j];
-                       }
-               }                       
-               
-               //resize each col
-               for(int i=c+1;i<nseqs;i++){
-                       for(int j=0;j < dMatrix[i-1].size();j++){
-                               dMatrix[i-1][j]=dMatrix[i][j];
-                       }
-               }       
-               
-               nseqs--;
-               dMatrix.pop_back();*/
-
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "update");
-               exit(1);
-       }
-}
-/***********************************************************************/
-void ClusterClassic::setMapWanted(bool f)  {  
-       try {
-               mapWanted = f;
-               
-               //initialize map
-               for (int i = 0; i < list->getNumBins(); i++) {
-                       
-                       //parse bin 
-                       string names = list->get(i);
-                       while (names.find_first_of(',') != -1) { 
-                               //get name from bin
-                               string name = names.substr(0,names.find_first_of(','));
-                               //save name and bin number
-                               seq2Bin[name] = i;
-                               names = names.substr(names.find_first_of(',')+1, names.length());
-                       }
-                       
-                       //get last name
-                       seq2Bin[names] = i;
-               }
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "setMapWanted");
-               exit(1);
-       }
-}
-/***********************************************************************/
-void ClusterClassic::updateMap() {
-try {
-               //update location of seqs in smallRow since they move to smallCol now
-               string names = list->get(smallRow);
-               while (names.find_first_of(',') != -1) { 
-                       //get name from bin
-                       string name = names.substr(0,names.find_first_of(','));
-                       //save name and bin number
-                       seq2Bin[name] = smallCol;
-                       names = names.substr(names.find_first_of(',')+1, names.length());
-               }
-                       
-               //get last name
-               seq2Bin[names] = smallCol;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "updateMap");
-               exit(1);
-       }
-}
-/***********************************************************************/
-void ClusterClassic::print() {
-try {
-               //update location of seqs in smallRow since they move to smallCol now
-               for (int i = 0; i < dMatrix.size(); i++) {
-                       m->mothurOut("row = " + toString(i) + "\t");
-                       for (int j = 0; j < dMatrix[i].size(); j++) {
-                               m->mothurOut(toString(dMatrix[i][j]) + "\t");
-                       }
-                       m->mothurOutEndLine();
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterClassic", "updateMap");
-               exit(1);
-       }
-}
-/***********************************************************************/
-