]> git.donarmstrong.com Git - mothur.git/commitdiff
added cluster.classic command
authorwestcott <westcott>
Fri, 29 Oct 2010 18:13:12 +0000 (18:13 +0000)
committerwestcott <westcott>
Fri, 29 Oct 2010 18:13:12 +0000 (18:13 +0000)
Mothur.xcodeproj/project.pbxproj
clusterclassic.cpp [new file with mode: 0644]
clusterclassic.h [new file with mode: 0644]
clusterdoturcommand.cpp [new file with mode: 0644]
clusterdoturcommand.h [new file with mode: 0644]
commandfactory.cpp
mothur
mothurout.cpp
mothurout.h
subsamplecommand.cpp [new file with mode: 0644]
subsamplecommand.h [new file with mode: 0644]

index 4a328a3bca837ad2b90ef16b0722aa5bdad21d30..855dad891d3b9b4e3b0422e57a3a5be488aa3c12 100644 (file)
@@ -45,6 +45,8 @@
                A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
                A72C66A1125B356F0058C2F9 /* pipelinepdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pipelinepdscommand.h; sourceTree = "<group>"; };
                A72C66A2125B356F0058C2F9 /* pipelinepdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pipelinepdscommand.cpp; sourceTree = "<group>"; };
+               A72E295D127ADCE300905B79 /* clusterclassic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterclassic.h; sourceTree = "<group>"; };
+               A72E295E127ADCE300905B79 /* clusterclassic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterclassic.cpp; sourceTree = "<group>"; };
                A732505E11E49EF100484B90 /* sffinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sffinfocommand.h; sourceTree = "<group>"; };
                A732505F11E49EF100484B90 /* sffinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffinfocommand.cpp; sourceTree = "<group>"; };
                A73953DA11987ED100B0B160 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chopseqscommand.h; sourceTree = "<group>"; };
                A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterfragmentscommand.h; sourceTree = "<group>"; };
                A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfragmentscommand.cpp; sourceTree = "<group>"; };
                A7639F8D1175DF35008F5578 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
+               A7653CB112789EFD009D6C09 /* subsamplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = subsamplecommand.h; sourceTree = "<group>"; };
+               A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = subsamplecommand.cpp; sourceTree = "<group>"; };
+               A7653D111278B09C009D6C09 /* clusterdoturcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterdoturcommand.h; sourceTree = "<group>"; };
+               A7653D121278B09C009D6C09 /* clusterdoturcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterdoturcommand.cpp; sourceTree = "<group>"; };
                A76714DD126DE45A003F359A /* deuniqueseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniqueseqscommand.h; sourceTree = "<group>"; };
                A76714DE126DE45A003F359A /* deuniqueseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniqueseqscommand.cpp; sourceTree = "<group>"; };
                A768D95D1248FEAA008AB1D0 /* mothur */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.executable"; path = mothur; sourceTree = "<group>"; };
                                A7DA1FFF113FECD400BF472F /* blastalign.cpp */,
                                A7DA201F113FECD400BF472F /* cluster.cpp */,
                                A7DA2020113FECD400BF472F /* cluster.hpp */,
+                               A72E295D127ADCE300905B79 /* clusterclassic.h */,
+                               A72E295E127ADCE300905B79 /* clusterclassic.cpp */,
                                A7CB593B11402EF90010EB83 /* calculators */,
                                A7CB594A11402FB40010EB83 /* chimeras */,
                                A7CB594511402F6E0010EB83 /* classifers */,
                                A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */,
                                A7DA2022113FECD400BF472F /* clustercommand.h */,
                                A7DA2021113FECD400BF472F /* clustercommand.cpp */,
+                               A7653D111278B09C009D6C09 /* clusterdoturcommand.h */,
+                               A7653D121278B09C009D6C09 /* clusterdoturcommand.cpp */,
                                A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */,
                                A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */,
                                A71D924311AEB42400D00CBC /* clustersplitcommand.h */,
                                A71D924411AEB42400D00CBC /* splitabundcommand.cpp */,
                                A7F139481247C3CB0033324C /* splitgroupscommand.h */,
                                A7F139491247C3CB0033324C /* splitgroupscommand.cpp */,
+                               A7653CB112789EFD009D6C09 /* subsamplecommand.h */,
+                               A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */,
                                A7DA2155113FECD400BF472F /* summarycommand.h */,
                                A7DA2154113FECD400BF472F /* summarycommand.cpp */,
                                A7DA2159113FECD400BF472F /* summarysharedcommand.h */,
                        };
                        buildConfigurationList = 1DEB919308733D9F0010E9CD /* Build configuration list for PBXProject "Mothur" */;
                        compatibilityVersion = "Xcode 3.0";
-                       developmentRegion = English;
                        hasScannedForEncodings = 1;
                        knownRegions = (
                                English,
diff --git a/clusterclassic.cpp b/clusterclassic.cpp
new file mode 100644 (file)
index 0000000..7a701aa
--- /dev/null
@@ -0,0 +1,476 @@
+/*
+ *  clusterclassic.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/29/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clusterclassic.h"
+#include "progress.hpp"
+
+/***********************************************************************/
+ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), nseqs(0) {
+       try {
+               mapWanted = false;  //set to true by mgcluster to speed up overlap merge
+       
+               //save so you can modify as it changes in average neighbor
+               cutoff = c;
+               m = MothurOut::getInstance();
+               globaldata = GlobalData::getInstance();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "ClusterClassic");
+               exit(1);
+       }
+}
+/***********************************************************************/
+int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
+       try {
+               double distance;
+               int square;
+               string name;
+               vector<string> matrixNames;
+               
+               ifstream fileHandle;
+               m->openInputFile(filename, fileHandle);
+               
+               fileHandle >> nseqs >> name;
+
+               matrixNames.push_back(name);
+
+               if(nameMap == NULL){
+                               list = new ListVector(nseqs);
+                               list->set(0, name);
+               }
+               else{
+                               list = new ListVector(nameMap->getListVector());
+                               if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+               }
+               
+               //initialize distance matrix to cutoff
+               dMatrix.resize(nseqs);
+               colDist temp(0, 0, cutoff);
+               rowSmallDists.resize(nseqs, temp);
+               for (int i = 1; i < nseqs; i++) {                       
+                       dMatrix[i].resize(i, cutoff);           
+               }                                                                                               
+                                                                                                               
+               
+               char d;
+               while((d=fileHandle.get()) != EOF){
+
+                               if(isalnum(d)){
+                                               square = 1;
+                                               fileHandle.putback(d);
+                                               for(int i=0;i<nseqs;i++){
+                                                               fileHandle >> distance;
+                                               }
+                                               break;
+                               }
+                               if(d == '\n'){
+                                               square = 0;
+                                               break;
+                               }
+               }
+
+               Progress* reading;
+
+               if(square == 0){
+
+                               reading = new Progress("Reading matrix:     ", nseqs * (nseqs - 1) / 2);
+
+                               int        index = 0;
+
+                               for(int i=1;i<nseqs;i++){
+                                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                                               
+                                               fileHandle >> name;
+                                               matrixNames.push_back(name);
+               
+
+                                               //there's A LOT of repeated code throughout this method...
+                                               if(nameMap == NULL){
+                                                               list->set(i, name);
+                                               
+                                                               for(int j=0;j<i;j++){
+                                                               
+                                                                               if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
+                                                                               
+                                                                               fileHandle >> distance;
+                                                       
+                                                                               if (distance == -1) { distance = 1000000; }
+                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                                                               
+                                                                               if(distance < cutoff){
+                                                                                       dMatrix[i][j] = distance;
+                                                                                       if (distance < smallDist) { smallDist = distance; }
+                                                                                       if (rowSmallDists[i].dist > distance) {  rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
+                                                                                       if (rowSmallDists[j].dist > distance) {  rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
+                                                                               }
+                                                                               index++;
+                                                                               reading->update(index);
+                                                               }
+                               
+                                               }
+                                               else{
+                                                               if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+                               
+                                                               for(int j=0;j<i;j++){
+                                                                               fileHandle >> distance;
+                                                                               
+                                                                               if (m->control_pressed) { delete reading; fileHandle.close(); return 0;  }
+                               
+                                                                               if (distance == -1) { distance = 1000000; }
+                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                                                                               
+                                                                               if(distance < cutoff){
+                                                                                       if (distance < smallDist) { smallDist = distance; }
+                                                                                       
+                                                                                       int row = nameMap->get(matrixNames[i]);
+                                                                                       int col = nameMap->get(matrixNames[j]);
+                                                                                       
+                                                                                       if (row < col) {  dMatrix[col][row] = distance; }
+                                                                                       else { dMatrix[row][col] = distance; }
+                                                                                       
+                                                                                       if (rowSmallDists[row].dist > distance) {  rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
+                                                                                       if (rowSmallDists[col].dist > distance) {  rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
+                                                                               }
+                                                                               index++;
+                                                                               reading->update(index);
+                                                               }
+                                               }
+                               }
+               }
+               else{
+
+                               reading = new Progress("Reading matrix:     ", nseqs * nseqs);
+               
+                               int index = nseqs;
+
+                               for(int i=1;i<nseqs;i++){
+                                               fileHandle >> name;                
+                                               matrixNames.push_back(name);
+                                               
+                                               if(nameMap == NULL){
+                                                               list->set(i, name);
+                                                               for(int j=0;j<nseqs;j++){
+                                                                               fileHandle >> distance;
+                                                                               
+                                                                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                                                                               
+                                                                               if (distance == -1) { distance = 1000000; }
+                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.
+                                                                               
+                                                                               if(distance < cutoff && j < i){
+                                                                                       if (distance < smallDist) { smallDist = distance; }
+                                                                                       
+                                                                                       dMatrix[i][j] = distance;
+                                                                                       if (rowSmallDists[i].dist > distance) {  rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
+                                                                                       if (rowSmallDists[j].dist > distance) {  rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
+                                                                               }
+                                                                               index++;
+                                                                               reading->update(index);
+                                                               }
+                                               
+                                               }
+                                               else{
+                                                               if(nameMap->count(name)==0){        m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+                               
+                                                               for(int j=0;j<nseqs;j++){
+                                                                               fileHandle >> distance;
+                                                                               
+                                                                               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+                                                                               
+                                                                          if (distance == -1) { distance = 1000000; }
+                                                                               else if (globaldata->sim) { distance = 1.0 - distance;  }  //user has entered a sim matrix that we need to convert.                                                        
+                                                                               
+                                                                               if(distance < cutoff && j < i){
+                                                                                       if (distance < smallDist) { smallDist = distance; }
+                                                                                       
+                                                                                       int row = nameMap->get(matrixNames[i]);
+                                                                                       int col = nameMap->get(matrixNames[j]);
+                                                                                       
+                                                                                       if (row < col) {  dMatrix[col][row] = distance; }
+                                                                                       else { dMatrix[row][col] = distance; }
+                                                                                       
+                                                                                       if (rowSmallDists[row].dist > distance) {  rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
+                                                                                       if (rowSmallDists[col].dist > distance) {  rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
+                                                                               }
+                                                                               index++;
+                                                                               reading->update(index);
+                                                               }
+                                               }
+                               }
+               }
+               
+               if (m->control_pressed) {  fileHandle.close();  delete reading; return 0; }
+               
+               reading->finish();
+               delete reading;
+
+               list->setLabel("0");
+               rabund = new RAbundVector(list->getRAbundVector());
+               
+               fileHandle.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "readPhylipFile");
+               exit(1);
+       }
+
+}
+/***********************************************************************/
+//sets smallCol and smallRow, returns distance
+double ClusterClassic::getSmallCell() {
+       try {
+               
+               smallDist = cutoff;
+               smallRow = 1;
+               smallCol = 0;
+               
+               vector<colDist> mins;
+               
+               for(int i=0;i<nseqs;i++){ 
+
+                       if (rowSmallDists[i].dist < smallDist) {
+                               mins.clear();
+                               mins.push_back(rowSmallDists[i]); 
+                               smallDist = rowSmallDists[i].dist;
+                       }else if (rowSmallDists[i].dist == smallDist) {
+                               mins.push_back(rowSmallDists[i]);
+                       }
+               }
+               
+               if (mins.size() > 0) {
+                       int zrand = 0;
+                       if (mins.size() > 1) {
+                               //pick random number between 0 and mins.size()
+                               zrand = (int)((float)(rand()) / (RAND_MAX / (mins.size()-1) + 1));
+                       }
+                       
+                       smallRow = mins[zrand].row;
+                       smallCol = mins[zrand].col;
+               
+               }
+       //cout << smallRow << '\t' << smallCol << '\t' << smallDist << endl;
+       
+               if (smallRow < smallCol) { dMatrix[smallCol][smallRow] = cutoff; }
+               else { dMatrix[smallRow][smallCol] = cutoff; }
+               
+               return smallDist;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "getSmallCell");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void ClusterClassic::clusterBins(){
+       try {
+       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
+
+               rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));     
+               rabund->set(smallRow, 0);       
+               rabund->setLabel(toString(smallDist));
+
+       //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "clusterBins");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void ClusterClassic::clusterNames(){
+       try {
+       //      cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
+               if (mapWanted) {  updateMap();  }
+               
+               list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
+               list->set(smallRow, "");        
+               list->setLabel(toString(smallDist));
+       
+       //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "clusterNames");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void ClusterClassic::update(double& cutOFF){
+       try {
+//cout << "before update " << endl;
+//print();             
+               getSmallCell();
+               
+               int r, c;
+               //because we only store lt, we need to make sure we grab the right location
+               if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value
+               else { r = smallRow; c = smallCol; } //smallRow is the row value
+               
+               //reset rows smallest distance
+               rowSmallDists[r].dist = cutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0;
+               rowSmallDists[c].dist = cutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0;
+               
+               //if your rows smallest distance is from smallRow or smallCol, reset
+               for(int i=0;i<nseqs;i++){
+                       if ((rowSmallDists[i].row == r) || (rowSmallDists[i].row == c) || (rowSmallDists[i].col == r) || (rowSmallDists[i].col == c)) {
+                               rowSmallDists[i].dist = cutoff; rowSmallDists[i].row = 0; rowSmallDists[i].col = 0;
+                       }
+               }
+               
+               for(int i=0;i<nseqs;i++){
+                       if(i != r && i != c){
+                               double distRow, distCol, newDist;
+                               if (i > r) { distRow = dMatrix[i][r]; }
+                               else { distRow =  dMatrix[r][i]; }
+                               
+                               if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = cutoff; } //like removeCell
+                               else { distCol =  dMatrix[c][i]; dMatrix[c][i] = cutoff; }
+                                       
+                               if(method == "furthest"){
+                                       newDist = max(distRow, distCol);
+                               }
+                               else if (method == "average"){
+                                       if ((distRow == cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+                                               newDist = cutoff; //eliminate value
+                                       }else if ((distRow == cutoff) && (distCol != cutoff)) { //you are merging with a value above cutoff
+                                               newDist = cutoff; //eliminate value
+                                               if (cutOFF > distCol) { cutOFF = distCol; }
+                                       }else if ((distRow != cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+                                               newDist = cutoff; //eliminate value
+                                               if (cutOFF > distRow) { cutOFF = distRow; }
+                                       }else {
+                                               int rowBin = rabund->get(r);
+                                               int colBin = rabund->get(c);
+                                               newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin);
+                                       }
+                               }
+                               else if (method == "weighted"){
+                                       if ((distRow == cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+                                               newDist = cutoff; //eliminate value
+                                       }else if ((distRow == cutoff) && (distCol != cutoff)) { //you are merging with a value above cutoff
+                                               newDist = cutoff; //eliminate value
+                                               if (cutOFF > distCol) { cutOFF = distCol; }
+                                       }else if ((distRow != cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+                                               newDist = cutoff; //eliminate value
+                                               if (cutOFF > distRow) { cutOFF = distRow; }
+                                       }else {
+                                               newDist = (distCol + distRow) / 2.0;
+                                       }
+                               }
+                               else if (method == "nearest"){
+                                       newDist = min(distRow, distCol);
+                               }
+                               
+                               if (i > r) { dMatrix[i][r] = newDist; }
+                               else { dMatrix[r][i] = newDist; }
+                               
+                               if (newDist < rowSmallDists[i].dist) {  rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i;  }
+                       }
+                       //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl;  
+               }
+                       
+               clusterBins();
+               clusterNames();
+       
+               //find new small for 2 rows we just merged
+               colDist temp(0,0,100.0);
+               rowSmallDists[r] = temp;
+
+               for (int i = 0; i < dMatrix[r].size(); i++) {
+                       if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
+               }
+               for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) {
+                       if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
+               }
+               
+               rowSmallDists[c] = temp;
+               for (int i = 0; i < dMatrix[c].size(); i++) {
+                       if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
+               }
+               for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) {
+                       if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
+               }
+               
+               //cout << "after update " << endl;
+               //print();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "update");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void ClusterClassic::setMapWanted(bool f)  {  
+       try {
+               mapWanted = f;
+               
+               //initialize map
+               for (int i = 0; i < list->getNumBins(); i++) {
+                       
+                       //parse bin 
+                       string names = list->get(i);
+                       while (names.find_first_of(',') != -1) { 
+                               //get name from bin
+                               string name = names.substr(0,names.find_first_of(','));
+                               //save name and bin number
+                               seq2Bin[name] = i;
+                               names = names.substr(names.find_first_of(',')+1, names.length());
+                       }
+                       
+                       //get last name
+                       seq2Bin[names] = i;
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "setMapWanted");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void ClusterClassic::updateMap() {
+try {
+               //update location of seqs in smallRow since they move to smallCol now
+               string names = list->get(smallRow);
+               while (names.find_first_of(',') != -1) { 
+                       //get name from bin
+                       string name = names.substr(0,names.find_first_of(','));
+                       //save name and bin number
+                       seq2Bin[name] = smallCol;
+                       names = names.substr(names.find_first_of(',')+1, names.length());
+               }
+                       
+               //get last name
+               seq2Bin[names] = smallCol;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "updateMap");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void ClusterClassic::print() {
+try {
+               //update location of seqs in smallRow since they move to smallCol now
+               for (int i = 0; i < dMatrix.size(); i++) {
+                       cout << "row = " << i << '\t';
+                       for (int j = 0; j < dMatrix[i].size(); j++) {
+                               cout << dMatrix[i][j] << '\t';
+                       }
+                       cout << endl;
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterClassic", "updateMap");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
diff --git a/clusterclassic.h b/clusterclassic.h
new file mode 100644 (file)
index 0000000..37d9b1b
--- /dev/null
@@ -0,0 +1,68 @@
+#ifndef CLUSTER_H
+#define CLUSTER_H
+
+
+#include "mothur.h"
+#include "mothurout.h"
+#include "listvector.hpp"
+#include "globaldata.hpp"
+#include "rabundvector.hpp"
+
+/*
+ *  clusterclassic.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/29/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+class ClusterClassic {
+       
+public:
+       ClusterClassic(float, string);
+       int readPhylipFile(string, NameAssignment*);
+       void update(double&);
+       double getSmallDist() { return smallDist; }     
+       int getNSeqs() { return nseqs; }        
+       ListVector* getListVector() { return list; }
+       RAbundVector* getRAbundVector() { return rabund; }              
+       string getTag();
+       void setMapWanted(bool m);  
+       map<string, int> getSeqtoBin()  {  return seq2Bin;      }
+
+private:       
+       double getSmallCell();
+       void clusterBins();
+       void clusterNames();
+       void updateMap();
+       void print();
+       
+       struct colDist {
+               int col;
+               int row;
+               double dist;
+               colDist(int i, int r, double d) : row(r), col(i), dist(d) {}
+       };
+       
+       RAbundVector* rabund;
+       ListVector* list;
+       vector< vector<double> > dMatrix;       
+       vector<colDist> rowSmallDists;
+       
+       int smallRow;
+       int smallCol, nseqs;
+       double smallDist;
+       bool mapWanted;
+       float cutoff;
+       map<string, int> seq2Bin;
+       string method;
+       
+       MothurOut* m;
+       GlobalData* globaldata;
+};
+
+#endif
+
+
diff --git a/clusterdoturcommand.cpp b/clusterdoturcommand.cpp
new file mode 100644 (file)
index 0000000..1c0c78d
--- /dev/null
@@ -0,0 +1,310 @@
+/*
+ *  clusterdoturcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/27/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clusterdoturcommand.h"
+#include "clusterclassic.h"
+
+//**********************************************************************************************************************
+vector<string> ClusterDoturCommand::getValidParameters(){      
+       try {
+               string AlignArray[] =  {"phylip","cutoff","precision","method","outputdir","inputdir"};
+               vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ClusterDoturCommand::ClusterDoturCommand(){    
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["list"] = tempOutNames;
+               outputTypes["rabund"] = tempOutNames;
+               outputTypes["sabund"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ClusterDoturCommand::getRequiredParameters(){   
+       try {
+               string Array[] =  {"phylip"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ClusterDoturCommand::getRequiredFiles(){        
+       try {
+               vector<string> myArray; 
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
+ClusterDoturCommand::ClusterDoturCommand(string option)  {
+       try{
+               
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"phylip","cutoff","precision","method","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       map<string,string>::iterator it;
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
+                                       abort = true;
+                               }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       outputTypes["rabund"] = tempOutNames;
+                       outputTypes["sabund"] = tempOutNames;
+               
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //check for required parameters
+                       phylipfile = validParameter.validFile(parameters, "phylip", true);
+                       if (phylipfile == "not open") { abort = true; }
+                       else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("When executing the cluster.dotur command you must enter a phylip file."); m->mothurOutEndLine(); abort = true; }   
+
+               
+                       //check for optional parameter and set defaults
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }   
+                       else if (namefile == "not found") { namefile = ""; }
+                       
+                       string temp;
+                       temp = validParameter.validFile(parameters, "precision", false);
+                       if (temp == "not found") { temp = "100"; }
+                       //saves precision legnth for formatting below
+                       length = temp.length();
+                       convert(temp, precision); 
+                       
+                       temp = validParameter.validFile(parameters, "cutoff", false);
+                       if (temp == "not found") { temp = "10"; }
+                       convert(temp, cutoff); 
+                       cutoff += (5 / (precision * 10.0));  
+                       
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       hard = m->isTrue(temp);
+                       
+                       method = validParameter.validFile(parameters, "method", false);
+                       if (method == "not found") { method = "furthest"; }
+                       
+                       if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { 
+                               if (method == "furthest") { tag = "fn"; }
+                               else if (method == "nearest") { tag = "nn"; }
+                               else if (method == "average") { tag = "an"; }
+                               else if (method == "weighted") { tag = "wn"; }
+                       }else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void ClusterDoturCommand::help(){
+       try {
+               m->mothurOut("The cluster.classic command clusters using the algorithm from dotur. \n");
+               m->mothurOut("The cluster.classic command parameter options are phylip, name, method, cuttoff, hard, precision. Phylip is required.\n");
+               m->mothurOut("The cluster.classic command should be in the following format: \n");
+               m->mothurOut("cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+               m->mothurOut("The acceptable cluster methods are furthest, nearest, weighted and average.  If no method is provided then furthest is assumed.\n\n");    
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+ClusterDoturCommand::~ClusterDoturCommand(){}
+
+//**********************************************************************************************************************
+
+int ClusterDoturCommand::execute(){
+       try {
+       
+               if (abort == true) {    return 0;       }
+               
+               if(namefile != ""){     
+                       nameMap = new NameAssignment(namefile);
+                       nameMap->readMap();
+               }else{
+                       nameMap = NULL;
+               }
+               
+               //reads phylip file storing data in 2D vector, also fills list and rabund
+               ClusterClassic* cluster = new ClusterClassic(cutoff, method);
+               cluster->readPhylipFile(phylipfile, nameMap);
+               
+               if (m->control_pressed) { delete cluster; delete list; delete rabund; return 0; }
+               
+               list = cluster->getListVector();
+               rabund = cluster->getRAbundVector();
+                                               
+               if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
+               fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
+                       
+               m->openOutputFile(fileroot+ tag + ".sabund",    sabundFile);
+               m->openOutputFile(fileroot+ tag + ".rabund",    rabundFile);
+               m->openOutputFile(fileroot+ tag + ".list",              listFile);
+                               
+               outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+               outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+               outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+               
+               float previousDist = 0.00000;
+               float rndPreviousDist = 0.00000;
+               oldRAbund = *rabund;
+               oldList = *list;
+
+               double saveCutoff = cutoff;
+               
+               int estart = time(NULL);
+       
+               while (cluster->getSmallDist() < cutoff && cluster->getNSeqs() > 0){
+               
+                       if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close();  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());         } outputTypes.clear();  return 0;  }
+               
+                       cluster->update(cutoff);
+       
+                       float dist = cluster->getSmallDist();
+                       float rndDist;
+                       if (hard) {
+                               rndDist = m->ceilDist(dist, precision); 
+                       }else{
+                               rndDist = m->roundDist(dist, precision); 
+                       }
+
+                       if(previousDist <= 0.0000 && dist != previousDist){
+                               printData("unique");
+                       }
+                       else if(rndDist != rndPreviousDist){
+                               printData(toString(rndPreviousDist,  length-1));
+                       }
+               
+                       previousDist = dist;
+                       rndPreviousDist = rndDist;
+                       oldRAbund = *rabund;
+                       oldList = *list;
+               }
+       
+               if(previousDist <= 0.0000){
+                       printData("unique");
+               }
+               else if(rndPreviousDist<cutoff){
+                       printData(toString(rndPreviousDist, length-1));
+               }
+                                       
+               sabundFile.close();
+               rabundFile.close();
+               listFile.close();
+               
+               delete cluster; delete nameMap; delete list; delete rabund;
+       
+               if (saveCutoff != cutoff) { 
+                       if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+                       else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void ClusterDoturCommand::printData(string label){
+       try {
+       
+               oldRAbund.setLabel(label);
+               oldRAbund.print(rabundFile);
+               oldRAbund.getSAbundVector().print(sabundFile);
+               
+               oldRAbund.getSAbundVector().print(cout);
+               
+               oldList.setLabel(label);
+               oldList.print(listFile);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterDoturCommand", "printData");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
diff --git a/clusterdoturcommand.h b/clusterdoturcommand.h
new file mode 100644 (file)
index 0000000..da2f4a7
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef CLUSTERDOTURCOMMAND_H
+#define CLUSTERDOTURCOMMAND_H
+
+/*
+ *  clusterdoturcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/27/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "nameassignment.hpp"
+#include "globaldata.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+
+
+class ClusterDoturCommand : public Command {
+       
+public:
+       ClusterDoturCommand(string);
+       ClusterDoturCommand();
+       ~ClusterDoturCommand();
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();  
+       void help();
+       
+private:
+       bool abort, hard;
+       string method, fileroot, tag, outputDir, phylipfile, namefile;
+       double cutoff;
+       int precision, length;
+       ofstream sabundFile, rabundFile, listFile;
+       NameAssignment* nameMap;
+       ListVector* list;
+       RAbundVector* rabund;
+       RAbundVector oldRAbund;
+       ListVector oldList;
+       
+       void printData(string label);
+       vector<string> outputNames;
+       map<string, vector<string> > outputTypes;
+};
+
+#endif
+
index 224ad217dd103dcb419466978e384cd40941f5b3..927f3742216529f54ea12a769bae752501b7fe33 100644 (file)
@@ -95,6 +95,7 @@
 #include "pipelinepdscommand.h"
 #include "deuniqueseqscommand.h"
 #include "pairwiseseqscommand.h"
+#include "clusterdoturcommand.h"
 
 
 /*******************************************************/
@@ -195,6 +196,7 @@ CommandFactory::CommandFactory(){
        commands["remove.lineage"]              = "remove.lineage";
        commands["fastq.info"]                  = "fastq.info";
        commands["deunique.seqs"]               = "deunique.seqs";
+       commands["cluster.classic"]             = "cluster.classic";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -341,6 +343,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "pipeline.pds")                  {       command = new PipelineCommand(optionString);                            }
                else if(commandName == "deunique.seqs")                 {       command = new DeUniqueSeqsCommand(optionString);                        }
                else if(commandName == "pairwise.seqs")                 {       command = new PairwiseSeqsCommand(optionString);                        }
+               else if(commandName == "cluster.classic")               {       command = new ClusterDoturCommand(optionString);                        }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -454,6 +457,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "fastq.info")                    {       pipecommand = new ParseFastaQCommand(optionString);                             }
                else if(commandName == "deunique.seqs")                 {       pipecommand = new DeUniqueSeqsCommand(optionString);                    }
                else if(commandName == "pairwise.seqs")                 {       pipecommand = new PairwiseSeqsCommand(optionString);                    }
+               else if(commandName == "cluster.classic")               {       pipecommand = new ClusterDoturCommand(optionString);                    }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -555,6 +559,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "fastq.info")                    {       shellcommand = new ParseFastaQCommand();                        }
                else if(commandName == "deunique.seqs")                 {       shellcommand = new DeUniqueSeqsCommand();                       }
                else if(commandName == "pairwise.seqs")                 {       shellcommand = new PairwiseSeqsCommand();                       }
+               else if(commandName == "cluster.classic")               {       shellcommand = new ClusterDoturCommand();                       }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
diff --git a/mothur b/mothur
index ff0d8092bad351f1c68308e2f87fdc18d23ea12e..dfb46e652663bde20b1e7e22471e0a183df5e165 100755 (executable)
Binary files a/mothur and b/mothur differ
index cd9e3212bfb3e4759ed798895fcfb004784e31c5..de6da8c217a36a17368be48c21a7116d36a43108 100644 (file)
@@ -1563,6 +1563,7 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
                exit(1);
        }
 }
+
 /**************************************************************************************************/
 
 
index 9921a40dccec386ad1266e9954ed72686f7be130..efe1294245d915a66dc76e0baea439b8cf9caa1a 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "mothur.h"
 
+
 /***********************************************/
 
 class MothurOut {
@@ -88,7 +89,6 @@ class MothurOut {
                int control_pressed;
                bool executing;
                
-
        private:
                static MothurOut* _uniqueInstance;
                MothurOut( const MothurOut& ); // Disable copy constructor
diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp
new file mode 100644 (file)
index 0000000..04f6c7c
--- /dev/null
@@ -0,0 +1,359 @@
+/*
+ *  subsamplecommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/27/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "subsamplecommand.h"
+
+//**********************************************************************************************************************
+vector<string> SubSampleCommand::getValidParameters(){ 
+       try {
+               string Array[] =  {"groups","label","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SubSampleCommand::SubSampleCommand(){  
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["shared"] = tempOutNames;
+               outputTypes["list"] = tempOutNames;
+               outputTypes["rabund"] = tempOutNames;
+               outputTypes["sabund"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SubSampleCommand::getRequiredParameters(){      
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SubSampleCommand::getRequiredFiles(){   
+       try {
+               string Array[] =  {"shared","list","rabund","sabund","or"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SubSampleCommand::SubSampleCommand(string option) {
+       try {
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               allLines = 1;
+               labels.clear();
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string AlignArray[] =  {"groups","label","outputdir","inputdir"};
+                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["shared"] = tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       outputTypes["rabund"] = tempOutNames;
+                       outputTypes["sabund"] = tempOutNames;
+                                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
+                       }
+                       
+                       //make sure the user has already run the read.otu command
+                       if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; }
+
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //if the user has not specified any labels use the ones from read.otu
+                       if (label == "") {  
+                               allLines = globaldata->allLines; 
+                               labels = globaldata->labels; 
+                       }
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; pickedGroups = false; }
+                       else { 
+                               pickedGroups = true;
+                               m->splitAtDash(groups, Groups);
+                               globaldata->Groups = Groups;
+                       }
+                       
+               }
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "SubSampleCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+void SubSampleCommand::help(){
+       try {
+               m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n");
+               m->mothurOut("The get.relabund command parameters are groups, scale and label.  No parameters are required.\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
+               m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
+               m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n");
+               m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n");
+               m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n");
+               m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
+               m->mothurOut("The get.relabund command outputs a .relabund file.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+SubSampleCommand::~SubSampleCommand(){
+}
+
+//**********************************************************************************************************************
+
+int SubSampleCommand::execute(){
+       try {
+       
+               if (abort == true) { return 0; }
+               
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" +  m->getExtension(globaldata->inputFileName);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               string format = globaldata->getFormat();
+               
+               read = new ReadOTUFile(globaldata->inputFileName);      
+               read->read(&*globaldata); 
+               input = globaldata->ginput;
+               
+               if (format == "sharedfile") {
+                       lookup = input->getSharedRAbundVectors();
+                       outputTypes["shared"].push_back(outputFileName);
+                       getSubSampleShared(lookup, out);
+               }else if (format == "list") { 
+                       list = globaldata->glist;
+                       outputTypes["list"].push_back(outputFileName);
+                       //getSubSamplesList();
+               }else if (format == "rabund") { 
+                       rabund = globaldata->rabund;
+                       outputTypes["rabund"].push_back(outputFileName);
+                       //getSubSamplesRabund();
+               
+               }else if (format == "sabund") { 
+                       sabund = globaldata->sabund;
+                       outputTypes["sabund"].push_back(outputFileName);
+                       //getSubSamplesSabund();
+               }
+               
+               out.close();
+                                       
+               //reset groups parameter
+               delete input; globaldata->ginput = NULL;
+               delete read;
+               
+               if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); 
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup, ofstream& filename) {
+       try {
+       
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+
+               string lastLabel = lookup[0]->getLabel();
+       
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       if (m->control_pressed) {  return 0;  }
+       
+                       if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
+
+                               m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                               
+                               //process lookup
+                               
+                                                               
+                                                                                                                               
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                       }
+                       
+                       if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = lookup[0]->getLabel();
+               
+                               for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
+               
+                               lookup = input->getSharedRAbundVectors(lastLabel);
+                               m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                               
+                               //process lookup                                
+                               processedLabels.insert(lookup[0]->getLabel());
+                               userLabels.erase(lookup[0]->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               lookup[0]->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = lookup[0]->getLabel();
+                       //prevent memory leak
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+                                               
+                       //get next line to process
+                       lookup = input->getSharedRAbundVectors();                               
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+       
+               //run last label if you need to
+               if (needToRun == true)  {
+                       for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
+                       lookup = input->getSharedRAbundVectors(lastLabel);
+                       
+                       m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+                       
+                       //process lookup
+                       
+                       
+                       
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
+               }
+       
+               //reset groups parameter
+               globaldata->Groups.clear();  
+                               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "getSubSampleShared");
+               exit(1);
+       }
+}
+
+
+//**********************************************************************************************************************
+int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
+       try {
+               
+               vector<SharedRAbundVector*> newLookup;
+               for (int i = 0; i < thislookup.size(); i++) {
+                       SharedRAbundVector* temp = new SharedRAbundVector();
+                       temp->setLabel(thislookup[i]->getLabel());
+                       temp->setGroup(thislookup[i]->getGroup());
+                       newLookup.push_back(temp);
+               }
+               
+               //for each bin
+               for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
+                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+               
+                       //look at each sharedRabund and make sure they are not all zero
+                       bool allZero = true;
+                       for (int j = 0; j < thislookup.size(); j++) {
+                               if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
+                       }
+                       
+                       //if they are not all zero add this bin
+                       if (!allZero) {
+                               for (int j = 0; j < thislookup.size(); j++) {
+                                       newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
+                               }
+                       }
+               }
+
+               for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
+
+               thislookup = newLookup;
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+
diff --git a/subsamplecommand.h b/subsamplecommand.h
new file mode 100644 (file)
index 0000000..98085ee
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef SUBSAMPLECOMMAND_H
+#define SUBSAMPLECOMMAND_H
+
+/*
+ *  subsamplecommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/27/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+#include "command.hpp"
+#include "inputdata.h"
+#include "readotu.h"
+#include "sharedrabundvector.h"
+
+class GlobalData;
+
+class SubSampleCommand : public Command {
+
+public:
+       SubSampleCommand(string);
+       SubSampleCommand();
+       ~SubSampleCommand();
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();
+       void help();
+       
+private:
+       GlobalData* globaldata;
+       ReadOTUFile* read;
+       InputData* input;
+       vector<SharedRAbundVector*> lookup;
+       ListVector* list;
+       RAbundVector* rabund;
+       SAbundVector* sabund;
+       
+       bool abort, allLines, pickedGroups;
+       set<string> labels; //holds labels to be used
+       string groups, label, outputDir;
+       vector<string> Groups, outputNames;
+       map<string, vector<string> > outputTypes;
+       
+       int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
+       int getSubSampleShared(vector<SharedRAbundVector*>&, ofstream&);
+};
+
+#endif
+