From: westcott Date: Fri, 29 Oct 2010 18:13:12 +0000 (+0000) Subject: added cluster.classic command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=6cf9539162b0fb0b5c0b99673e999df3cd717c55 added cluster.classic command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 4a328a3..855dad8 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -45,6 +45,8 @@ A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = ""; }; A72C66A1125B356F0058C2F9 /* pipelinepdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pipelinepdscommand.h; sourceTree = ""; }; A72C66A2125B356F0058C2F9 /* pipelinepdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pipelinepdscommand.cpp; sourceTree = ""; }; + A72E295D127ADCE300905B79 /* clusterclassic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterclassic.h; sourceTree = ""; }; + A72E295E127ADCE300905B79 /* clusterclassic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterclassic.cpp; sourceTree = ""; }; A732505E11E49EF100484B90 /* sffinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sffinfocommand.h; sourceTree = ""; }; A732505F11E49EF100484B90 /* sffinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffinfocommand.cpp; sourceTree = ""; }; A73953DA11987ED100B0B160 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chopseqscommand.h; sourceTree = ""; }; @@ -56,6 +58,10 @@ A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterfragmentscommand.h; sourceTree = ""; }; A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfragmentscommand.cpp; sourceTree = ""; }; A7639F8D1175DF35008F5578 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = ""; }; + A7653CB112789EFD009D6C09 /* subsamplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = subsamplecommand.h; sourceTree = ""; }; + A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = subsamplecommand.cpp; sourceTree = ""; }; + A7653D111278B09C009D6C09 /* clusterdoturcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterdoturcommand.h; sourceTree = ""; }; + A7653D121278B09C009D6C09 /* clusterdoturcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterdoturcommand.cpp; sourceTree = ""; }; A76714DD126DE45A003F359A /* deuniqueseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniqueseqscommand.h; sourceTree = ""; }; A76714DE126DE45A003F359A /* deuniqueseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniqueseqscommand.cpp; sourceTree = ""; }; A768D95D1248FEAA008AB1D0 /* mothur */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.executable"; path = mothur; sourceTree = ""; }; @@ -524,6 +530,8 @@ A7DA1FFF113FECD400BF472F /* blastalign.cpp */, A7DA201F113FECD400BF472F /* cluster.cpp */, A7DA2020113FECD400BF472F /* cluster.hpp */, + A72E295D127ADCE300905B79 /* clusterclassic.h */, + A72E295E127ADCE300905B79 /* clusterclassic.cpp */, A7CB593B11402EF90010EB83 /* calculators */, A7CB594A11402FB40010EB83 /* chimeras */, A7CB594511402F6E0010EB83 /* classifers */, @@ -762,6 +770,8 @@ A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */, A7DA2022113FECD400BF472F /* clustercommand.h */, A7DA2021113FECD400BF472F /* clustercommand.cpp */, + A7653D111278B09C009D6C09 /* clusterdoturcommand.h */, + A7653D121278B09C009D6C09 /* clusterdoturcommand.cpp */, A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */, A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */, A71D924311AEB42400D00CBC /* clustersplitcommand.h */, @@ -890,6 +900,8 @@ A71D924411AEB42400D00CBC /* splitabundcommand.cpp */, A7F139481247C3CB0033324C /* splitgroupscommand.h */, A7F139491247C3CB0033324C /* splitgroupscommand.cpp */, + A7653CB112789EFD009D6C09 /* subsamplecommand.h */, + A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */, A7DA2155113FECD400BF472F /* summarycommand.h */, A7DA2154113FECD400BF472F /* summarycommand.cpp */, A7DA2159113FECD400BF472F /* summarysharedcommand.h */, @@ -1112,7 +1124,6 @@ }; buildConfigurationList = 1DEB919308733D9F0010E9CD /* Build configuration list for PBXProject "Mothur" */; compatibilityVersion = "Xcode 3.0"; - developmentRegion = English; hasScannedForEncodings = 1; knownRegions = ( English, diff --git a/clusterclassic.cpp b/clusterclassic.cpp new file mode 100644 index 0000000..7a701aa --- /dev/null +++ b/clusterclassic.cpp @@ -0,0 +1,476 @@ +/* + * clusterclassic.cpp + * Mothur + * + * Created by westcott on 10/29/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "clusterclassic.h" +#include "progress.hpp" + +/***********************************************************************/ +ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), nseqs(0) { + try { + mapWanted = false; //set to true by mgcluster to speed up overlap merge + + //save so you can modify as it changes in average neighbor + cutoff = c; + m = MothurOut::getInstance(); + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "ClusterClassic"); + exit(1); + } +} +/***********************************************************************/ +int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { + try { + double distance; + int square; + string name; + vector matrixNames; + + ifstream fileHandle; + m->openInputFile(filename, fileHandle); + + fileHandle >> nseqs >> name; + + matrixNames.push_back(name); + + if(nameMap == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ + list = new ListVector(nameMap->getListVector()); + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } + } + + //initialize distance matrix to cutoff + dMatrix.resize(nseqs); + colDist temp(0, 0, cutoff); + rowSmallDists.resize(nseqs, temp); + for (int i = 1; i < nseqs; i++) { + dMatrix[i].resize(i, cutoff); + } + + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + fileHandle.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + + if(square == 0){ + + reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); + + int index = 0; + + for(int i=1;icontrol_pressed) { fileHandle.close(); delete reading; return 0; } + + fileHandle >> name; + matrixNames.push_back(name); + + + //there's A LOT of repeated code throughout this method... + if(nameMap == NULL){ + list->set(i, name); + + for(int j=0;jcontrol_pressed) { delete reading; fileHandle.close(); return 0; } + + fileHandle >> distance; + + if (distance == -1) { distance = 1000000; } + else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff){ + dMatrix[i][j] = distance; + if (distance < smallDist) { smallDist = distance; } + if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; } + if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; } + } + index++; + reading->update(index); + } + + } + else{ + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } + + for(int j=0;j> distance; + + if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } + + if (distance == -1) { distance = 1000000; } + else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff){ + if (distance < smallDist) { smallDist = distance; } + + int row = nameMap->get(matrixNames[i]); + int col = nameMap->get(matrixNames[j]); + + if (row < col) { dMatrix[col][row] = distance; } + else { dMatrix[row][col] = distance; } + + if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; } + if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; } + } + index++; + reading->update(index); + } + } + } + } + else{ + + reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int index = nseqs; + + for(int i=1;i> name; + matrixNames.push_back(name); + + if(nameMap == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff && j < i){ + if (distance < smallDist) { smallDist = distance; } + + dMatrix[i][j] = distance; + if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; } + if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; } + } + index++; + reading->update(index); + } + + } + else{ + if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); } + + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff && j < i){ + if (distance < smallDist) { smallDist = distance; } + + int row = nameMap->get(matrixNames[i]); + int col = nameMap->get(matrixNames[j]); + + if (row < col) { dMatrix[col][row] = distance; } + else { dMatrix[row][col] = distance; } + + if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; } + if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; } + } + index++; + reading->update(index); + } + } + } + } + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + reading->finish(); + delete reading; + + list->setLabel("0"); + rabund = new RAbundVector(list->getRAbundVector()); + + fileHandle.close(); + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "readPhylipFile"); + exit(1); + } + +} +/***********************************************************************/ +//sets smallCol and smallRow, returns distance +double ClusterClassic::getSmallCell() { + try { + + smallDist = cutoff; + smallRow = 1; + smallCol = 0; + + vector mins; + + for(int i=0;i 0) { + int zrand = 0; + if (mins.size() > 1) { + //pick random number between 0 and mins.size() + zrand = (int)((float)(rand()) / (RAND_MAX / (mins.size()-1) + 1)); + } + + smallRow = mins[zrand].row; + smallCol = mins[zrand].col; + + } + //cout << smallRow << '\t' << smallCol << '\t' << smallDist << endl; + + if (smallRow < smallCol) { dMatrix[smallCol][smallRow] = cutoff; } + else { dMatrix[smallRow][smallCol] = cutoff; } + + return smallDist; + + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "getSmallCell"); + exit(1); + } +} +/***********************************************************************/ +void ClusterClassic::clusterBins(){ + try { + // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol); + + rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol)); + rabund->set(smallRow, 0); + rabund->setLabel(toString(smallDist)); + + // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl; + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "clusterBins"); + exit(1); + } +} +/***********************************************************************/ +void ClusterClassic::clusterNames(){ + try { + // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol); + if (mapWanted) { updateMap(); } + + list->set(smallCol, list->get(smallRow)+','+list->get(smallCol)); + list->set(smallRow, ""); + list->setLabel(toString(smallDist)); + + // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl; + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "clusterNames"); + exit(1); + } +} +/***********************************************************************/ +void ClusterClassic::update(double& cutOFF){ + try { +//cout << "before update " << endl; +//print(); + getSmallCell(); + + int r, c; + //because we only store lt, we need to make sure we grab the right location + if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value + else { r = smallRow; c = smallCol; } //smallRow is the row value + + //reset rows smallest distance + rowSmallDists[r].dist = cutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0; + rowSmallDists[c].dist = cutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0; + + //if your rows smallest distance is from smallRow or smallCol, reset + for(int i=0;i r) { distRow = dMatrix[i][r]; } + else { distRow = dMatrix[r][i]; } + + if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = cutoff; } //like removeCell + else { distCol = dMatrix[c][i]; dMatrix[c][i] = cutoff; } + + if(method == "furthest"){ + newDist = max(distRow, distCol); + } + else if (method == "average"){ + if ((distRow == cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff + newDist = cutoff; //eliminate value + }else if ((distRow == cutoff) && (distCol != cutoff)) { //you are merging with a value above cutoff + newDist = cutoff; //eliminate value + if (cutOFF > distCol) { cutOFF = distCol; } + }else if ((distRow != cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff + newDist = cutoff; //eliminate value + if (cutOFF > distRow) { cutOFF = distRow; } + }else { + int rowBin = rabund->get(r); + int colBin = rabund->get(c); + newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin); + } + } + else if (method == "weighted"){ + if ((distRow == cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff + newDist = cutoff; //eliminate value + }else if ((distRow == cutoff) && (distCol != cutoff)) { //you are merging with a value above cutoff + newDist = cutoff; //eliminate value + if (cutOFF > distCol) { cutOFF = distCol; } + }else if ((distRow != cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff + newDist = cutoff; //eliminate value + if (cutOFF > distRow) { cutOFF = distRow; } + }else { + newDist = (distCol + distRow) / 2.0; + } + } + else if (method == "nearest"){ + newDist = min(distRow, distCol); + } + + if (i > r) { dMatrix[i][r] = newDist; } + else { dMatrix[r][i] = newDist; } + + if (newDist < rowSmallDists[i].dist) { rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i; } + } + //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl; + } + + clusterBins(); + clusterNames(); + + //find new small for 2 rows we just merged + colDist temp(0,0,100.0); + rowSmallDists[r] = temp; + + for (int i = 0; i < dMatrix[r].size(); i++) { + if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; } + } + for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) { + if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; } + } + + rowSmallDists[c] = temp; + for (int i = 0; i < dMatrix[c].size(); i++) { + if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; } + } + for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) { + if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; } + } + + //cout << "after update " << endl; + //print(); + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "update"); + exit(1); + } +} +/***********************************************************************/ +void ClusterClassic::setMapWanted(bool f) { + try { + mapWanted = f; + + //initialize map + for (int i = 0; i < list->getNumBins(); i++) { + + //parse bin + string names = list->get(i); + while (names.find_first_of(',') != -1) { + //get name from bin + string name = names.substr(0,names.find_first_of(',')); + //save name and bin number + seq2Bin[name] = i; + names = names.substr(names.find_first_of(',')+1, names.length()); + } + + //get last name + seq2Bin[names] = i; + } + + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "setMapWanted"); + exit(1); + } +} +/***********************************************************************/ +void ClusterClassic::updateMap() { +try { + //update location of seqs in smallRow since they move to smallCol now + string names = list->get(smallRow); + while (names.find_first_of(',') != -1) { + //get name from bin + string name = names.substr(0,names.find_first_of(',')); + //save name and bin number + seq2Bin[name] = smallCol; + names = names.substr(names.find_first_of(',')+1, names.length()); + } + + //get last name + seq2Bin[names] = smallCol; + + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "updateMap"); + exit(1); + } +} +/***********************************************************************/ +void ClusterClassic::print() { +try { + //update location of seqs in smallRow since they move to smallCol now + for (int i = 0; i < dMatrix.size(); i++) { + cout << "row = " << i << '\t'; + for (int j = 0; j < dMatrix[i].size(); j++) { + cout << dMatrix[i][j] << '\t'; + } + cout << endl; + } + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "updateMap"); + exit(1); + } +} +/***********************************************************************/ + diff --git a/clusterclassic.h b/clusterclassic.h new file mode 100644 index 0000000..37d9b1b --- /dev/null +++ b/clusterclassic.h @@ -0,0 +1,68 @@ +#ifndef CLUSTER_H +#define CLUSTER_H + + +#include "mothur.h" +#include "mothurout.h" +#include "listvector.hpp" +#include "globaldata.hpp" +#include "rabundvector.hpp" + +/* + * clusterclassic.h + * Mothur + * + * Created by westcott on 10/29/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + +class ClusterClassic { + +public: + ClusterClassic(float, string); + int readPhylipFile(string, NameAssignment*); + void update(double&); + double getSmallDist() { return smallDist; } + int getNSeqs() { return nseqs; } + ListVector* getListVector() { return list; } + RAbundVector* getRAbundVector() { return rabund; } + string getTag(); + void setMapWanted(bool m); + map getSeqtoBin() { return seq2Bin; } + +private: + double getSmallCell(); + void clusterBins(); + void clusterNames(); + void updateMap(); + void print(); + + struct colDist { + int col; + int row; + double dist; + colDist(int i, int r, double d) : row(r), col(i), dist(d) {} + }; + + RAbundVector* rabund; + ListVector* list; + vector< vector > dMatrix; + vector rowSmallDists; + + int smallRow; + int smallCol, nseqs; + double smallDist; + bool mapWanted; + float cutoff; + map seq2Bin; + string method; + + MothurOut* m; + GlobalData* globaldata; +}; + +#endif + + diff --git a/clusterdoturcommand.cpp b/clusterdoturcommand.cpp new file mode 100644 index 0000000..1c0c78d --- /dev/null +++ b/clusterdoturcommand.cpp @@ -0,0 +1,310 @@ +/* + * clusterdoturcommand.cpp + * Mothur + * + * Created by westcott on 10/27/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "clusterdoturcommand.h" +#include "clusterclassic.h" + +//********************************************************************************************************************** +vector ClusterDoturCommand::getValidParameters(){ + try { + string AlignArray[] = {"phylip","cutoff","precision","method","outputdir","inputdir"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +ClusterDoturCommand::ClusterDoturCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["rabund"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "ClusterCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector ClusterDoturCommand::getRequiredParameters(){ + try { + string Array[] = {"phylip"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector ClusterDoturCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** +//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen. +ClusterDoturCommand::ClusterDoturCommand(string option) { + try{ + + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"phylip","cutoff","precision","method","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + map::iterator it; + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { + abort = true; + } + } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["rabund"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + + //check for required parameters + phylipfile = validParameter.validFile(parameters, "phylip", true); + if (phylipfile == "not open") { abort = true; } + else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("When executing the cluster.dotur command you must enter a phylip file."); m->mothurOutEndLine(); abort = true; } + + + //check for optional parameter and set defaults + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { abort = true; } + else if (namefile == "not found") { namefile = ""; } + + string temp; + temp = validParameter.validFile(parameters, "precision", false); + if (temp == "not found") { temp = "100"; } + //saves precision legnth for formatting below + length = temp.length(); + convert(temp, precision); + + temp = validParameter.validFile(parameters, "cutoff", false); + if (temp == "not found") { temp = "10"; } + convert(temp, cutoff); + cutoff += (5 / (precision * 10.0)); + + temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; } + hard = m->isTrue(temp); + + method = validParameter.validFile(parameters, "method", false); + if (method == "not found") { method = "furthest"; } + + if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { + if (method == "furthest") { tag = "fn"; } + else if (method == "nearest") { tag = "nn"; } + else if (method == "average") { tag = "an"; } + else if (method == "weighted") { tag = "wn"; } + }else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; } + } + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "ClusterCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +void ClusterDoturCommand::help(){ + try { + m->mothurOut("The cluster.classic command clusters using the algorithm from dotur. \n"); + m->mothurOut("The cluster.classic command parameter options are phylip, name, method, cuttoff, hard, precision. Phylip is required.\n"); + m->mothurOut("The cluster.classic command should be in the following format: \n"); + m->mothurOut("cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n"); + m->mothurOut("The acceptable cluster methods are furthest, nearest, weighted and average. If no method is provided then furthest is assumed.\n\n"); + + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +ClusterDoturCommand::~ClusterDoturCommand(){} + +//********************************************************************************************************************** + +int ClusterDoturCommand::execute(){ + try { + + if (abort == true) { return 0; } + + if(namefile != ""){ + nameMap = new NameAssignment(namefile); + nameMap->readMap(); + }else{ + nameMap = NULL; + } + + //reads phylip file storing data in 2D vector, also fills list and rabund + ClusterClassic* cluster = new ClusterClassic(cutoff, method); + cluster->readPhylipFile(phylipfile, nameMap); + + if (m->control_pressed) { delete cluster; delete list; delete rabund; return 0; } + + list = cluster->getListVector(); + rabund = cluster->getRAbundVector(); + + if (outputDir == "") { outputDir += m->hasPath(phylipfile); } + fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile)); + + m->openOutputFile(fileroot+ tag + ".sabund", sabundFile); + m->openOutputFile(fileroot+ tag + ".rabund", rabundFile); + m->openOutputFile(fileroot+ tag + ".list", listFile); + + outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund"); + outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund"); + outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list"); + + float previousDist = 0.00000; + float rndPreviousDist = 0.00000; + oldRAbund = *rabund; + oldList = *list; + + double saveCutoff = cutoff; + + int estart = time(NULL); + + while (cluster->getSmallDist() < cutoff && cluster->getNSeqs() > 0){ + + if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); return 0; } + + cluster->update(cutoff); + + float dist = cluster->getSmallDist(); + float rndDist; + if (hard) { + rndDist = m->ceilDist(dist, precision); + }else{ + rndDist = m->roundDist(dist, precision); + } + + if(previousDist <= 0.0000 && dist != previousDist){ + printData("unique"); + } + else if(rndDist != rndPreviousDist){ + printData(toString(rndPreviousDist, length-1)); + } + + previousDist = dist; + rndPreviousDist = rndDist; + oldRAbund = *rabund; + oldList = *list; + } + + if(previousDist <= 0.0000){ + printData("unique"); + } + else if(rndPreviousDistceilDist(saveCutoff, precision); } + else { saveCutoff = m->roundDist(saveCutoff, precision); } + m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); + } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); + + m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "execute"); + exit(1); + } +} + +//********************************************************************************************************************** + +void ClusterDoturCommand::printData(string label){ + try { + + oldRAbund.setLabel(label); + oldRAbund.print(rabundFile); + oldRAbund.getSAbundVector().print(sabundFile); + + oldRAbund.getSAbundVector().print(cout); + + oldList.setLabel(label); + oldList.print(listFile); + } + catch(exception& e) { + m->errorOut(e, "ClusterDoturCommand", "printData"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/clusterdoturcommand.h b/clusterdoturcommand.h new file mode 100644 index 0000000..da2f4a7 --- /dev/null +++ b/clusterdoturcommand.h @@ -0,0 +1,52 @@ +#ifndef CLUSTERDOTURCOMMAND_H +#define CLUSTERDOTURCOMMAND_H + +/* + * clusterdoturcommand.h + * Mothur + * + * Created by westcott on 10/27/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "nameassignment.hpp" +#include "globaldata.hpp" +#include "rabundvector.hpp" +#include "sabundvector.hpp" +#include "listvector.hpp" + + +class ClusterDoturCommand : public Command { + +public: + ClusterDoturCommand(string); + ClusterDoturCommand(); + ~ClusterDoturCommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + bool abort, hard; + string method, fileroot, tag, outputDir, phylipfile, namefile; + double cutoff; + int precision, length; + ofstream sabundFile, rabundFile, listFile; + NameAssignment* nameMap; + ListVector* list; + RAbundVector* rabund; + RAbundVector oldRAbund; + ListVector oldList; + + void printData(string label); + vector outputNames; + map > outputTypes; +}; + +#endif + diff --git a/commandfactory.cpp b/commandfactory.cpp index 224ad21..927f374 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -95,6 +95,7 @@ #include "pipelinepdscommand.h" #include "deuniqueseqscommand.h" #include "pairwiseseqscommand.h" +#include "clusterdoturcommand.h" /*******************************************************/ @@ -195,6 +196,7 @@ CommandFactory::CommandFactory(){ commands["remove.lineage"] = "remove.lineage"; commands["fastq.info"] = "fastq.info"; commands["deunique.seqs"] = "deunique.seqs"; + commands["cluster.classic"] = "cluster.classic"; commands["pairwise.seqs"] = "MPIEnabled"; commands["pipeline.pds"] = "MPIEnabled"; commands["classify.seqs"] = "MPIEnabled"; @@ -341,6 +343,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "pipeline.pds") { command = new PipelineCommand(optionString); } else if(commandName == "deunique.seqs") { command = new DeUniqueSeqsCommand(optionString); } else if(commandName == "pairwise.seqs") { command = new PairwiseSeqsCommand(optionString); } + else if(commandName == "cluster.classic") { command = new ClusterDoturCommand(optionString); } else { command = new NoCommand(optionString); } return command; @@ -454,6 +457,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "fastq.info") { pipecommand = new ParseFastaQCommand(optionString); } else if(commandName == "deunique.seqs") { pipecommand = new DeUniqueSeqsCommand(optionString); } else if(commandName == "pairwise.seqs") { pipecommand = new PairwiseSeqsCommand(optionString); } + else if(commandName == "cluster.classic") { pipecommand = new ClusterDoturCommand(optionString); } else { pipecommand = new NoCommand(optionString); } return pipecommand; @@ -555,6 +559,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "fastq.info") { shellcommand = new ParseFastaQCommand(); } else if(commandName == "deunique.seqs") { shellcommand = new DeUniqueSeqsCommand(); } else if(commandName == "pairwise.seqs") { shellcommand = new PairwiseSeqsCommand(); } + else if(commandName == "cluster.classic") { shellcommand = new ClusterDoturCommand(); } else { shellcommand = new NoCommand(); } return shellcommand; diff --git a/mothur b/mothur index ff0d809..dfb46e6 100755 Binary files a/mothur and b/mothur differ diff --git a/mothurout.cpp b/mothurout.cpp index cd9e321..de6da8c 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -1563,6 +1563,7 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) { exit(1); } } + /**************************************************************************************************/ diff --git a/mothurout.h b/mothurout.h index 9921a40..efe1294 100644 --- a/mothurout.h +++ b/mothurout.h @@ -12,6 +12,7 @@ #include "mothur.h" + /***********************************************/ class MothurOut { @@ -88,7 +89,6 @@ class MothurOut { int control_pressed; bool executing; - private: static MothurOut* _uniqueInstance; MothurOut( const MothurOut& ); // Disable copy constructor diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp new file mode 100644 index 0000000..04f6c7c --- /dev/null +++ b/subsamplecommand.cpp @@ -0,0 +1,359 @@ +/* + * subsamplecommand.cpp + * Mothur + * + * Created by westcott on 10/27/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "subsamplecommand.h" + +//********************************************************************************************************************** +vector SubSampleCommand::getValidParameters(){ + try { + string Array[] = {"groups","label","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "getValidParameters"); + exit(1); + } +} +//********************************************************************************************************************** +SubSampleCommand::SubSampleCommand(){ + try { + abort = true; + //initialize outputTypes + vector tempOutNames; + outputTypes["shared"] = tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["rabund"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand"); + exit(1); + } +} +//********************************************************************************************************************** +vector SubSampleCommand::getRequiredParameters(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "getRequiredParameters"); + exit(1); + } +} +//********************************************************************************************************************** +vector SubSampleCommand::getRequiredFiles(){ + try { + string Array[] = {"shared","list","rabund","sabund","or"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "getRequiredFiles"); + exit(1); + } +} +//********************************************************************************************************************** +SubSampleCommand::SubSampleCommand(string option) { + try { + globaldata = GlobalData::getInstance(); + abort = false; + allLines = 1; + labels.clear(); + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string AlignArray[] = {"groups","label","outputdir","inputdir"}; + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["shared"] = tempOutNames; + outputTypes["list"] = tempOutNames; + outputTypes["rabund"] = tempOutNames; + outputTypes["sabund"] = tempOutNames; + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it + } + + //make sure the user has already run the read.otu command + if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + label = validParameter.validFile(parameters, "label", false); + if (label == "not found") { label = ""; } + else { + if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } + else { allLines = 1; } + } + + //if the user has not specified any labels use the ones from read.otu + if (label == "") { + allLines = globaldata->allLines; + labels = globaldata->labels; + } + + groups = validParameter.validFile(parameters, "groups", false); + if (groups == "not found") { groups = ""; pickedGroups = false; } + else { + pickedGroups = true; + m->splitAtDash(groups, Groups); + globaldata->Groups = Groups; + } + + } + + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "SubSampleCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +void SubSampleCommand::help(){ + try { + m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n"); + m->mothurOut("The get.relabund command parameters are groups, scale and label. No parameters are required.\n"); + m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n"); + m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n"); + m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n"); + m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n"); + m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n"); + m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"); + m->mothurOut("The get.relabund command outputs a .relabund file.\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); + + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** + +SubSampleCommand::~SubSampleCommand(){ +} + +//********************************************************************************************************************** + +int SubSampleCommand::execute(){ + try { + + if (abort == true) { return 0; } + + string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" + m->getExtension(globaldata->inputFileName); + ofstream out; + m->openOutputFile(outputFileName, out); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + + string format = globaldata->getFormat(); + + read = new ReadOTUFile(globaldata->inputFileName); + read->read(&*globaldata); + input = globaldata->ginput; + + if (format == "sharedfile") { + lookup = input->getSharedRAbundVectors(); + outputTypes["shared"].push_back(outputFileName); + getSubSampleShared(lookup, out); + }else if (format == "list") { + list = globaldata->glist; + outputTypes["list"].push_back(outputFileName); + //getSubSamplesList(); + }else if (format == "rabund") { + rabund = globaldata->rabund; + outputTypes["rabund"].push_back(outputFileName); + //getSubSamplesRabund(); + + }else if (format == "sabund") { + sabund = globaldata->sabund; + outputTypes["sabund"].push_back(outputFileName); + //getSubSamplesSabund(); + } + + out.close(); + + //reset groups parameter + delete input; globaldata->ginput = NULL; + delete read; + + if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;} + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "execute"); + exit(1); + } +} +//********************************************************************************************************************** +int SubSampleCommand::getSubSampleShared(vector& thislookup, ofstream& filename) { + try { + + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. + set processedLabels; + set userLabels = labels; + + string lastLabel = lookup[0]->getLabel(); + + //as long as you are not at the end of the file or done wih the lines you want + while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { + if (m->control_pressed) { return 0; } + + if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + //process lookup + + + + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + } + + if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { + string saveLabel = lookup[0]->getLabel(); + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + + lookup = input->getSharedRAbundVectors(lastLabel); + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + //process lookup + processedLabels.insert(lookup[0]->getLabel()); + userLabels.erase(lookup[0]->getLabel()); + + //restore real lastlabel to save below + lookup[0]->setLabel(saveLabel); + } + + lastLabel = lookup[0]->getLabel(); + //prevent memory leak + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } + + //get next line to process + lookup = input->getSharedRAbundVectors(); + } + + + if (m->control_pressed) { return 0; } + + //output error messages about any remaining user labels + set::iterator it; + bool needToRun = false; + for (it = userLabels.begin(); it != userLabels.end(); it++) { + m->mothurOut("Your file does not include the label " + *it); + if (processedLabels.count(lastLabel) != 1) { + m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); + needToRun = true; + }else { + m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); + } + } + + //run last label if you need to + if (needToRun == true) { + for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } + lookup = input->getSharedRAbundVectors(lastLabel); + + m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); + + //process lookup + + + + for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } + } + + //reset groups parameter + globaldata->Groups.clear(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "getSubSampleShared"); + exit(1); + } +} + + +//********************************************************************************************************************** +int SubSampleCommand::eliminateZeroOTUS(vector& thislookup) { + try { + + vector newLookup; + for (int i = 0; i < thislookup.size(); i++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thislookup[i]->getLabel()); + temp->setGroup(thislookup[i]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int i = 0; i < thislookup[0]->getNumBins(); i++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + + //look at each sharedRabund and make sure they are not all zero + bool allZero = true; + for (int j = 0; j < thislookup.size(); j++) { + if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; } + } + + //if they are not all zero add this bin + if (!allZero) { + for (int j = 0; j < thislookup.size(); j++) { + newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup()); + } + } + } + + for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; } + + thislookup = newLookup; + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS"); + exit(1); + } +} + +//********************************************************************************************************************** + + diff --git a/subsamplecommand.h b/subsamplecommand.h new file mode 100644 index 0000000..98085ee --- /dev/null +++ b/subsamplecommand.h @@ -0,0 +1,53 @@ +#ifndef SUBSAMPLECOMMAND_H +#define SUBSAMPLECOMMAND_H + +/* + * subsamplecommand.h + * Mothur + * + * Created by westcott on 10/27/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "command.hpp" +#include "inputdata.h" +#include "readotu.h" +#include "sharedrabundvector.h" + +class GlobalData; + +class SubSampleCommand : public Command { + +public: + SubSampleCommand(string); + SubSampleCommand(); + ~SubSampleCommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + GlobalData* globaldata; + ReadOTUFile* read; + InputData* input; + vector lookup; + ListVector* list; + RAbundVector* rabund; + SAbundVector* sabund; + + bool abort, allLines, pickedGroups; + set labels; //holds labels to be used + string groups, label, outputDir; + vector Groups, outputNames; + map > outputTypes; + + int eliminateZeroOTUS(vector&); + int getSubSampleShared(vector&, ofstream&); +}; + +#endif +