A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
A72C66A1125B356F0058C2F9 /* pipelinepdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pipelinepdscommand.h; sourceTree = "<group>"; };
A72C66A2125B356F0058C2F9 /* pipelinepdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pipelinepdscommand.cpp; sourceTree = "<group>"; };
+ A72E295D127ADCE300905B79 /* clusterclassic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterclassic.h; sourceTree = "<group>"; };
+ A72E295E127ADCE300905B79 /* clusterclassic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterclassic.cpp; sourceTree = "<group>"; };
A732505E11E49EF100484B90 /* sffinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sffinfocommand.h; sourceTree = "<group>"; };
A732505F11E49EF100484B90 /* sffinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffinfocommand.cpp; sourceTree = "<group>"; };
A73953DA11987ED100B0B160 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chopseqscommand.h; sourceTree = "<group>"; };
A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterfragmentscommand.h; sourceTree = "<group>"; };
A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfragmentscommand.cpp; sourceTree = "<group>"; };
A7639F8D1175DF35008F5578 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
+ A7653CB112789EFD009D6C09 /* subsamplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = subsamplecommand.h; sourceTree = "<group>"; };
+ A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = subsamplecommand.cpp; sourceTree = "<group>"; };
+ A7653D111278B09C009D6C09 /* clusterdoturcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterdoturcommand.h; sourceTree = "<group>"; };
+ A7653D121278B09C009D6C09 /* clusterdoturcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterdoturcommand.cpp; sourceTree = "<group>"; };
A76714DD126DE45A003F359A /* deuniqueseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniqueseqscommand.h; sourceTree = "<group>"; };
A76714DE126DE45A003F359A /* deuniqueseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniqueseqscommand.cpp; sourceTree = "<group>"; };
A768D95D1248FEAA008AB1D0 /* mothur */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.executable"; path = mothur; sourceTree = "<group>"; };
A7DA1FFF113FECD400BF472F /* blastalign.cpp */,
A7DA201F113FECD400BF472F /* cluster.cpp */,
A7DA2020113FECD400BF472F /* cluster.hpp */,
+ A72E295D127ADCE300905B79 /* clusterclassic.h */,
+ A72E295E127ADCE300905B79 /* clusterclassic.cpp */,
A7CB593B11402EF90010EB83 /* calculators */,
A7CB594A11402FB40010EB83 /* chimeras */,
A7CB594511402F6E0010EB83 /* classifers */,
A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */,
A7DA2022113FECD400BF472F /* clustercommand.h */,
A7DA2021113FECD400BF472F /* clustercommand.cpp */,
+ A7653D111278B09C009D6C09 /* clusterdoturcommand.h */,
+ A7653D121278B09C009D6C09 /* clusterdoturcommand.cpp */,
A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */,
A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */,
A71D924311AEB42400D00CBC /* clustersplitcommand.h */,
A71D924411AEB42400D00CBC /* splitabundcommand.cpp */,
A7F139481247C3CB0033324C /* splitgroupscommand.h */,
A7F139491247C3CB0033324C /* splitgroupscommand.cpp */,
+ A7653CB112789EFD009D6C09 /* subsamplecommand.h */,
+ A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */,
A7DA2155113FECD400BF472F /* summarycommand.h */,
A7DA2154113FECD400BF472F /* summarycommand.cpp */,
A7DA2159113FECD400BF472F /* summarysharedcommand.h */,
};
buildConfigurationList = 1DEB919308733D9F0010E9CD /* Build configuration list for PBXProject "Mothur" */;
compatibilityVersion = "Xcode 3.0";
- developmentRegion = English;
hasScannedForEncodings = 1;
knownRegions = (
English,
--- /dev/null
+/*
+ * clusterclassic.cpp
+ * Mothur
+ *
+ * Created by westcott on 10/29/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clusterclassic.h"
+#include "progress.hpp"
+
+/***********************************************************************/
+ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), nseqs(0) {
+ try {
+ mapWanted = false; //set to true by mgcluster to speed up overlap merge
+
+ //save so you can modify as it changes in average neighbor
+ cutoff = c;
+ m = MothurOut::getInstance();
+ globaldata = GlobalData::getInstance();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "ClusterClassic");
+ exit(1);
+ }
+}
+/***********************************************************************/
+int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
+ try {
+ double distance;
+ int square;
+ string name;
+ vector<string> matrixNames;
+
+ ifstream fileHandle;
+ m->openInputFile(filename, fileHandle);
+
+ fileHandle >> nseqs >> name;
+
+ matrixNames.push_back(name);
+
+ if(nameMap == NULL){
+ list = new ListVector(nseqs);
+ list->set(0, name);
+ }
+ else{
+ list = new ListVector(nameMap->getListVector());
+ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+ }
+
+ //initialize distance matrix to cutoff
+ dMatrix.resize(nseqs);
+ colDist temp(0, 0, cutoff);
+ rowSmallDists.resize(nseqs, temp);
+ for (int i = 1; i < nseqs; i++) {
+ dMatrix[i].resize(i, cutoff);
+ }
+
+
+ char d;
+ while((d=fileHandle.get()) != EOF){
+
+ if(isalnum(d)){
+ square = 1;
+ fileHandle.putback(d);
+ for(int i=0;i<nseqs;i++){
+ fileHandle >> distance;
+ }
+ break;
+ }
+ if(d == '\n'){
+ square = 0;
+ break;
+ }
+ }
+
+ Progress* reading;
+
+ if(square == 0){
+
+ reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
+
+ int index = 0;
+
+ for(int i=1;i<nseqs;i++){
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ fileHandle >> name;
+ matrixNames.push_back(name);
+
+
+ //there's A LOT of repeated code throughout this method...
+ if(nameMap == NULL){
+ list->set(i, name);
+
+ for(int j=0;j<i;j++){
+
+ if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
+
+ fileHandle >> distance;
+
+ if (distance == -1) { distance = 1000000; }
+ else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff){
+ dMatrix[i][j] = distance;
+ if (distance < smallDist) { smallDist = distance; }
+ if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
+ if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
+ }
+ index++;
+ reading->update(index);
+ }
+
+ }
+ else{
+ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+
+ for(int j=0;j<i;j++){
+ fileHandle >> distance;
+
+ if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff){
+ if (distance < smallDist) { smallDist = distance; }
+
+ int row = nameMap->get(matrixNames[i]);
+ int col = nameMap->get(matrixNames[j]);
+
+ if (row < col) { dMatrix[col][row] = distance; }
+ else { dMatrix[row][col] = distance; }
+
+ if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
+ if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
+ }
+ index++;
+ reading->update(index);
+ }
+ }
+ }
+ }
+ else{
+
+ reading = new Progress("Reading matrix: ", nseqs * nseqs);
+
+ int index = nseqs;
+
+ for(int i=1;i<nseqs;i++){
+ fileHandle >> name;
+ matrixNames.push_back(name);
+
+ if(nameMap == NULL){
+ list->set(i, name);
+ for(int j=0;j<nseqs;j++){
+ fileHandle >> distance;
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff && j < i){
+ if (distance < smallDist) { smallDist = distance; }
+
+ dMatrix[i][j] = distance;
+ if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
+ if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
+ }
+ index++;
+ reading->update(index);
+ }
+
+ }
+ else{
+ if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
+
+ for(int j=0;j<nseqs;j++){
+ fileHandle >> distance;
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ if (distance == -1) { distance = 1000000; }
+ else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
+
+ if(distance < cutoff && j < i){
+ if (distance < smallDist) { smallDist = distance; }
+
+ int row = nameMap->get(matrixNames[i]);
+ int col = nameMap->get(matrixNames[j]);
+
+ if (row < col) { dMatrix[col][row] = distance; }
+ else { dMatrix[row][col] = distance; }
+
+ if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
+ if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
+ }
+ index++;
+ reading->update(index);
+ }
+ }
+ }
+ }
+
+ if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
+
+ reading->finish();
+ delete reading;
+
+ list->setLabel("0");
+ rabund = new RAbundVector(list->getRAbundVector());
+
+ fileHandle.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "readPhylipFile");
+ exit(1);
+ }
+
+}
+/***********************************************************************/
+//sets smallCol and smallRow, returns distance
+double ClusterClassic::getSmallCell() {
+ try {
+
+ smallDist = cutoff;
+ smallRow = 1;
+ smallCol = 0;
+
+ vector<colDist> mins;
+
+ for(int i=0;i<nseqs;i++){
+
+ if (rowSmallDists[i].dist < smallDist) {
+ mins.clear();
+ mins.push_back(rowSmallDists[i]);
+ smallDist = rowSmallDists[i].dist;
+ }else if (rowSmallDists[i].dist == smallDist) {
+ mins.push_back(rowSmallDists[i]);
+ }
+ }
+
+ if (mins.size() > 0) {
+ int zrand = 0;
+ if (mins.size() > 1) {
+ //pick random number between 0 and mins.size()
+ zrand = (int)((float)(rand()) / (RAND_MAX / (mins.size()-1) + 1));
+ }
+
+ smallRow = mins[zrand].row;
+ smallCol = mins[zrand].col;
+
+ }
+ //cout << smallRow << '\t' << smallCol << '\t' << smallDist << endl;
+
+ if (smallRow < smallCol) { dMatrix[smallCol][smallRow] = cutoff; }
+ else { dMatrix[smallRow][smallCol] = cutoff; }
+
+ return smallDist;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "getSmallCell");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void ClusterClassic::clusterBins(){
+ try {
+ // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
+
+ rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
+ rabund->set(smallRow, 0);
+ rabund->setLabel(toString(smallDist));
+
+ // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "clusterBins");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void ClusterClassic::clusterNames(){
+ try {
+ // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
+ if (mapWanted) { updateMap(); }
+
+ list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
+ list->set(smallRow, "");
+ list->setLabel(toString(smallDist));
+
+ // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "clusterNames");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void ClusterClassic::update(double& cutOFF){
+ try {
+//cout << "before update " << endl;
+//print();
+ getSmallCell();
+
+ int r, c;
+ //because we only store lt, we need to make sure we grab the right location
+ if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value
+ else { r = smallRow; c = smallCol; } //smallRow is the row value
+
+ //reset rows smallest distance
+ rowSmallDists[r].dist = cutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0;
+ rowSmallDists[c].dist = cutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0;
+
+ //if your rows smallest distance is from smallRow or smallCol, reset
+ for(int i=0;i<nseqs;i++){
+ if ((rowSmallDists[i].row == r) || (rowSmallDists[i].row == c) || (rowSmallDists[i].col == r) || (rowSmallDists[i].col == c)) {
+ rowSmallDists[i].dist = cutoff; rowSmallDists[i].row = 0; rowSmallDists[i].col = 0;
+ }
+ }
+
+ for(int i=0;i<nseqs;i++){
+ if(i != r && i != c){
+ double distRow, distCol, newDist;
+ if (i > r) { distRow = dMatrix[i][r]; }
+ else { distRow = dMatrix[r][i]; }
+
+ if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = cutoff; } //like removeCell
+ else { distCol = dMatrix[c][i]; dMatrix[c][i] = cutoff; }
+
+ if(method == "furthest"){
+ newDist = max(distRow, distCol);
+ }
+ else if (method == "average"){
+ if ((distRow == cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+ newDist = cutoff; //eliminate value
+ }else if ((distRow == cutoff) && (distCol != cutoff)) { //you are merging with a value above cutoff
+ newDist = cutoff; //eliminate value
+ if (cutOFF > distCol) { cutOFF = distCol; }
+ }else if ((distRow != cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+ newDist = cutoff; //eliminate value
+ if (cutOFF > distRow) { cutOFF = distRow; }
+ }else {
+ int rowBin = rabund->get(r);
+ int colBin = rabund->get(c);
+ newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin);
+ }
+ }
+ else if (method == "weighted"){
+ if ((distRow == cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+ newDist = cutoff; //eliminate value
+ }else if ((distRow == cutoff) && (distCol != cutoff)) { //you are merging with a value above cutoff
+ newDist = cutoff; //eliminate value
+ if (cutOFF > distCol) { cutOFF = distCol; }
+ }else if ((distRow != cutoff) && (distCol == cutoff)) { //you are merging with a value above cutoff
+ newDist = cutoff; //eliminate value
+ if (cutOFF > distRow) { cutOFF = distRow; }
+ }else {
+ newDist = (distCol + distRow) / 2.0;
+ }
+ }
+ else if (method == "nearest"){
+ newDist = min(distRow, distCol);
+ }
+
+ if (i > r) { dMatrix[i][r] = newDist; }
+ else { dMatrix[r][i] = newDist; }
+
+ if (newDist < rowSmallDists[i].dist) { rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i; }
+ }
+ //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl;
+ }
+
+ clusterBins();
+ clusterNames();
+
+ //find new small for 2 rows we just merged
+ colDist temp(0,0,100.0);
+ rowSmallDists[r] = temp;
+
+ for (int i = 0; i < dMatrix[r].size(); i++) {
+ if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
+ }
+ for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) {
+ if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
+ }
+
+ rowSmallDists[c] = temp;
+ for (int i = 0; i < dMatrix[c].size(); i++) {
+ if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
+ }
+ for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) {
+ if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
+ }
+
+ //cout << "after update " << endl;
+ //print();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "update");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void ClusterClassic::setMapWanted(bool f) {
+ try {
+ mapWanted = f;
+
+ //initialize map
+ for (int i = 0; i < list->getNumBins(); i++) {
+
+ //parse bin
+ string names = list->get(i);
+ while (names.find_first_of(',') != -1) {
+ //get name from bin
+ string name = names.substr(0,names.find_first_of(','));
+ //save name and bin number
+ seq2Bin[name] = i;
+ names = names.substr(names.find_first_of(',')+1, names.length());
+ }
+
+ //get last name
+ seq2Bin[names] = i;
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "setMapWanted");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void ClusterClassic::updateMap() {
+try {
+ //update location of seqs in smallRow since they move to smallCol now
+ string names = list->get(smallRow);
+ while (names.find_first_of(',') != -1) {
+ //get name from bin
+ string name = names.substr(0,names.find_first_of(','));
+ //save name and bin number
+ seq2Bin[name] = smallCol;
+ names = names.substr(names.find_first_of(',')+1, names.length());
+ }
+
+ //get last name
+ seq2Bin[names] = smallCol;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "updateMap");
+ exit(1);
+ }
+}
+/***********************************************************************/
+void ClusterClassic::print() {
+try {
+ //update location of seqs in smallRow since they move to smallCol now
+ for (int i = 0; i < dMatrix.size(); i++) {
+ cout << "row = " << i << '\t';
+ for (int j = 0; j < dMatrix[i].size(); j++) {
+ cout << dMatrix[i][j] << '\t';
+ }
+ cout << endl;
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterClassic", "updateMap");
+ exit(1);
+ }
+}
+/***********************************************************************/
+
--- /dev/null
+#ifndef CLUSTER_H
+#define CLUSTER_H
+
+
+#include "mothur.h"
+#include "mothurout.h"
+#include "listvector.hpp"
+#include "globaldata.hpp"
+#include "rabundvector.hpp"
+
+/*
+ * clusterclassic.h
+ * Mothur
+ *
+ * Created by westcott on 10/29/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+class ClusterClassic {
+
+public:
+ ClusterClassic(float, string);
+ int readPhylipFile(string, NameAssignment*);
+ void update(double&);
+ double getSmallDist() { return smallDist; }
+ int getNSeqs() { return nseqs; }
+ ListVector* getListVector() { return list; }
+ RAbundVector* getRAbundVector() { return rabund; }
+ string getTag();
+ void setMapWanted(bool m);
+ map<string, int> getSeqtoBin() { return seq2Bin; }
+
+private:
+ double getSmallCell();
+ void clusterBins();
+ void clusterNames();
+ void updateMap();
+ void print();
+
+ struct colDist {
+ int col;
+ int row;
+ double dist;
+ colDist(int i, int r, double d) : row(r), col(i), dist(d) {}
+ };
+
+ RAbundVector* rabund;
+ ListVector* list;
+ vector< vector<double> > dMatrix;
+ vector<colDist> rowSmallDists;
+
+ int smallRow;
+ int smallCol, nseqs;
+ double smallDist;
+ bool mapWanted;
+ float cutoff;
+ map<string, int> seq2Bin;
+ string method;
+
+ MothurOut* m;
+ GlobalData* globaldata;
+};
+
+#endif
+
+
--- /dev/null
+/*
+ * clusterdoturcommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 10/27/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clusterdoturcommand.h"
+#include "clusterclassic.h"
+
+//**********************************************************************************************************************
+vector<string> ClusterDoturCommand::getValidParameters(){
+ try {
+ string AlignArray[] = {"phylip","cutoff","precision","method","outputdir","inputdir"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ClusterDoturCommand::ClusterDoturCommand(){
+ try {
+ abort = true;
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClusterDoturCommand::getRequiredParameters(){
+ try {
+ string Array[] = {"phylip"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClusterDoturCommand::getRequiredFiles(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
+ClusterDoturCommand::ClusterDoturCommand(string option) {
+ try{
+
+ abort = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+ //valid paramters for this command
+ string Array[] = {"phylip","cutoff","precision","method","outputdir","inputdir"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ map<string,string>::iterator it;
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
+ abort = true;
+ }
+ }
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("phylip");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["phylip"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+
+ }
+
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ //check for required parameters
+ phylipfile = validParameter.validFile(parameters, "phylip", true);
+ if (phylipfile == "not open") { abort = true; }
+ else if (phylipfile == "not found") { phylipfile = ""; m->mothurOut("When executing the cluster.dotur command you must enter a phylip file."); m->mothurOutEndLine(); abort = true; }
+
+
+ //check for optional parameter and set defaults
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { abort = true; }
+ else if (namefile == "not found") { namefile = ""; }
+
+ string temp;
+ temp = validParameter.validFile(parameters, "precision", false);
+ if (temp == "not found") { temp = "100"; }
+ //saves precision legnth for formatting below
+ length = temp.length();
+ convert(temp, precision);
+
+ temp = validParameter.validFile(parameters, "cutoff", false);
+ if (temp == "not found") { temp = "10"; }
+ convert(temp, cutoff);
+ cutoff += (5 / (precision * 10.0));
+
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
+ hard = m->isTrue(temp);
+
+ method = validParameter.validFile(parameters, "method", false);
+ if (method == "not found") { method = "furthest"; }
+
+ if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) {
+ if (method == "furthest") { tag = "fn"; }
+ else if (method == "nearest") { tag = "nn"; }
+ else if (method == "average") { tag = "an"; }
+ else if (method == "weighted") { tag = "wn"; }
+ }else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average, weighted."); m->mothurOutEndLine(); abort = true; }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "ClusterCommand");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+void ClusterDoturCommand::help(){
+ try {
+ m->mothurOut("The cluster.classic command clusters using the algorithm from dotur. \n");
+ m->mothurOut("The cluster.classic command parameter options are phylip, name, method, cuttoff, hard, precision. Phylip is required.\n");
+ m->mothurOut("The cluster.classic command should be in the following format: \n");
+ m->mothurOut("cluster.classic(phylip=yourDistanceMatrix, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n");
+ m->mothurOut("The acceptable cluster methods are furthest, nearest, weighted and average. If no method is provided then furthest is assumed.\n\n");
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "help");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+ClusterDoturCommand::~ClusterDoturCommand(){}
+
+//**********************************************************************************************************************
+
+int ClusterDoturCommand::execute(){
+ try {
+
+ if (abort == true) { return 0; }
+
+ if(namefile != ""){
+ nameMap = new NameAssignment(namefile);
+ nameMap->readMap();
+ }else{
+ nameMap = NULL;
+ }
+
+ //reads phylip file storing data in 2D vector, also fills list and rabund
+ ClusterClassic* cluster = new ClusterClassic(cutoff, method);
+ cluster->readPhylipFile(phylipfile, nameMap);
+
+ if (m->control_pressed) { delete cluster; delete list; delete rabund; return 0; }
+
+ list = cluster->getListVector();
+ rabund = cluster->getRAbundVector();
+
+ if (outputDir == "") { outputDir += m->hasPath(phylipfile); }
+ fileroot = outputDir + m->getRootName(m->getSimpleName(phylipfile));
+
+ m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
+ m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
+ m->openOutputFile(fileroot+ tag + ".list", listFile);
+
+ outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+ outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+ outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+
+ float previousDist = 0.00000;
+ float rndPreviousDist = 0.00000;
+ oldRAbund = *rabund;
+ oldList = *list;
+
+ double saveCutoff = cutoff;
+
+ int estart = time(NULL);
+
+ while (cluster->getSmallDist() < cutoff && cluster->getNSeqs() > 0){
+
+ if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); return 0; }
+
+ cluster->update(cutoff);
+
+ float dist = cluster->getSmallDist();
+ float rndDist;
+ if (hard) {
+ rndDist = m->ceilDist(dist, precision);
+ }else{
+ rndDist = m->roundDist(dist, precision);
+ }
+
+ if(previousDist <= 0.0000 && dist != previousDist){
+ printData("unique");
+ }
+ else if(rndDist != rndPreviousDist){
+ printData(toString(rndPreviousDist, length-1));
+ }
+
+ previousDist = dist;
+ rndPreviousDist = rndDist;
+ oldRAbund = *rabund;
+ oldList = *list;
+ }
+
+ if(previousDist <= 0.0000){
+ printData("unique");
+ }
+ else if(rndPreviousDist<cutoff){
+ printData(toString(rndPreviousDist, length-1));
+ }
+
+ sabundFile.close();
+ rabundFile.close();
+ listFile.close();
+
+ delete cluster; delete nameMap; delete list; delete rabund;
+
+ if (saveCutoff != cutoff) {
+ if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
+ else { saveCutoff = m->roundDist(saveCutoff, precision); }
+ m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "execute");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+void ClusterDoturCommand::printData(string label){
+ try {
+
+ oldRAbund.setLabel(label);
+ oldRAbund.print(rabundFile);
+ oldRAbund.getSAbundVector().print(sabundFile);
+
+ oldRAbund.getSAbundVector().print(cout);
+
+ oldList.setLabel(label);
+ oldList.print(listFile);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterDoturCommand", "printData");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
--- /dev/null
+#ifndef CLUSTERDOTURCOMMAND_H
+#define CLUSTERDOTURCOMMAND_H
+
+/*
+ * clusterdoturcommand.h
+ * Mothur
+ *
+ * Created by westcott on 10/27/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "nameassignment.hpp"
+#include "globaldata.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
+#include "listvector.hpp"
+
+
+class ClusterDoturCommand : public Command {
+
+public:
+ ClusterDoturCommand(string);
+ ClusterDoturCommand();
+ ~ClusterDoturCommand();
+ vector<string> getRequiredParameters();
+ vector<string> getValidParameters();
+ vector<string> getRequiredFiles();
+ map<string, vector<string> > getOutputFiles() { return outputTypes; }
+ int execute();
+ void help();
+
+private:
+ bool abort, hard;
+ string method, fileroot, tag, outputDir, phylipfile, namefile;
+ double cutoff;
+ int precision, length;
+ ofstream sabundFile, rabundFile, listFile;
+ NameAssignment* nameMap;
+ ListVector* list;
+ RAbundVector* rabund;
+ RAbundVector oldRAbund;
+ ListVector oldList;
+
+ void printData(string label);
+ vector<string> outputNames;
+ map<string, vector<string> > outputTypes;
+};
+
+#endif
+
#include "pipelinepdscommand.h"
#include "deuniqueseqscommand.h"
#include "pairwiseseqscommand.h"
+#include "clusterdoturcommand.h"
/*******************************************************/
commands["remove.lineage"] = "remove.lineage";
commands["fastq.info"] = "fastq.info";
commands["deunique.seqs"] = "deunique.seqs";
+ commands["cluster.classic"] = "cluster.classic";
commands["pairwise.seqs"] = "MPIEnabled";
commands["pipeline.pds"] = "MPIEnabled";
commands["classify.seqs"] = "MPIEnabled";
else if(commandName == "pipeline.pds") { command = new PipelineCommand(optionString); }
else if(commandName == "deunique.seqs") { command = new DeUniqueSeqsCommand(optionString); }
else if(commandName == "pairwise.seqs") { command = new PairwiseSeqsCommand(optionString); }
+ else if(commandName == "cluster.classic") { command = new ClusterDoturCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "fastq.info") { pipecommand = new ParseFastaQCommand(optionString); }
else if(commandName == "deunique.seqs") { pipecommand = new DeUniqueSeqsCommand(optionString); }
else if(commandName == "pairwise.seqs") { pipecommand = new PairwiseSeqsCommand(optionString); }
+ else if(commandName == "cluster.classic") { pipecommand = new ClusterDoturCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "fastq.info") { shellcommand = new ParseFastaQCommand(); }
else if(commandName == "deunique.seqs") { shellcommand = new DeUniqueSeqsCommand(); }
else if(commandName == "pairwise.seqs") { shellcommand = new PairwiseSeqsCommand(); }
+ else if(commandName == "cluster.classic") { shellcommand = new ClusterDoturCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
exit(1);
}
}
+
/**************************************************************************************************/
#include "mothur.h"
+
/***********************************************/
class MothurOut {
int control_pressed;
bool executing;
-
private:
static MothurOut* _uniqueInstance;
MothurOut( const MothurOut& ); // Disable copy constructor
--- /dev/null
+/*
+ * subsamplecommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 10/27/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "subsamplecommand.h"
+
+//**********************************************************************************************************************
+vector<string> SubSampleCommand::getValidParameters(){
+ try {
+ string Array[] = {"groups","label","outputdir","inputdir"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SubSampleCommand::SubSampleCommand(){
+ try {
+ abort = true;
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["shared"] = tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> SubSampleCommand::getRequiredParameters(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> SubSampleCommand::getRequiredFiles(){
+ try {
+ string Array[] = {"shared","list","rabund","sabund","or"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SubSampleCommand::SubSampleCommand(string option) {
+ try {
+ globaldata = GlobalData::getInstance();
+ abort = false;
+ allLines = 1;
+ labels.clear();
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+ //valid paramters for this command
+ string AlignArray[] = {"groups","label","outputdir","inputdir"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["shared"] = tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = "";
+ outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
+ }
+
+ //make sure the user has already run the read.otu command
+ if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; }
+
+ //check for optional parameter and set defaults
+ // ...at some point should added some additional type checking...
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ //if the user has not specified any labels use the ones from read.otu
+ if (label == "") {
+ allLines = globaldata->allLines;
+ labels = globaldata->labels;
+ }
+
+ groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; pickedGroups = false; }
+ else {
+ pickedGroups = true;
+ m->splitAtDash(groups, Groups);
+ globaldata->Groups = Groups;
+ }
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "SubSampleCommand");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+void SubSampleCommand::help(){
+ try {
+ m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n");
+ m->mothurOut("The get.relabund command parameters are groups, scale and label. No parameters are required.\n");
+ m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
+ m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
+ m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n");
+ m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n");
+ m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n");
+ m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
+ m->mothurOut("The get.relabund command outputs a .relabund file.\n");
+ m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "help");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+SubSampleCommand::~SubSampleCommand(){
+}
+
+//**********************************************************************************************************************
+
+int SubSampleCommand::execute(){
+ try {
+
+ if (abort == true) { return 0; }
+
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" + m->getExtension(globaldata->inputFileName);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+
+ string format = globaldata->getFormat();
+
+ read = new ReadOTUFile(globaldata->inputFileName);
+ read->read(&*globaldata);
+ input = globaldata->ginput;
+
+ if (format == "sharedfile") {
+ lookup = input->getSharedRAbundVectors();
+ outputTypes["shared"].push_back(outputFileName);
+ getSubSampleShared(lookup, out);
+ }else if (format == "list") {
+ list = globaldata->glist;
+ outputTypes["list"].push_back(outputFileName);
+ //getSubSamplesList();
+ }else if (format == "rabund") {
+ rabund = globaldata->rabund;
+ outputTypes["rabund"].push_back(outputFileName);
+ //getSubSamplesRabund();
+
+ }else if (format == "sabund") {
+ sabund = globaldata->sabund;
+ outputTypes["sabund"].push_back(outputFileName);
+ //getSubSamplesSabund();
+ }
+
+ out.close();
+
+ //reset groups parameter
+ delete input; globaldata->ginput = NULL;
+ delete read;
+
+ if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName);
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup, ofstream& filename) {
+ try {
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ string lastLabel = lookup[0]->getLabel();
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+ if (m->control_pressed) { return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ //process lookup
+
+
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+
+ lookup = input->getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ //process lookup
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ //get next line to process
+ lookup = input->getSharedRAbundVectors();
+ }
+
+
+ if (m->control_pressed) { return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input->getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ //process lookup
+
+
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+ //reset groups parameter
+ globaldata->Groups.clear();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "getSubSampleShared");
+ exit(1);
+ }
+}
+
+
+//**********************************************************************************************************************
+int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
+ try {
+
+ vector<SharedRAbundVector*> newLookup;
+ for (int i = 0; i < thislookup.size(); i++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thislookup[i]->getLabel());
+ temp->setGroup(thislookup[i]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
+
+ //look at each sharedRabund and make sure they are not all zero
+ bool allZero = true;
+ for (int j = 0; j < thislookup.size(); j++) {
+ if (thislookup[j]->getAbundance(i) != 0) { allZero = false; break; }
+ }
+
+ //if they are not all zero add this bin
+ if (!allZero) {
+ for (int j = 0; j < thislookup.size(); j++) {
+ newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
+ }
+ }
+ }
+
+ for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
+
+ thislookup = newLookup;
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+
--- /dev/null
+#ifndef SUBSAMPLECOMMAND_H
+#define SUBSAMPLECOMMAND_H
+
+/*
+ * subsamplecommand.h
+ * Mothur
+ *
+ * Created by westcott on 10/27/10.
+ * Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "readotu.h"
+#include "sharedrabundvector.h"
+
+class GlobalData;
+
+class SubSampleCommand : public Command {
+
+public:
+ SubSampleCommand(string);
+ SubSampleCommand();
+ ~SubSampleCommand();
+ vector<string> getRequiredParameters();
+ vector<string> getValidParameters();
+ vector<string> getRequiredFiles();
+ map<string, vector<string> > getOutputFiles() { return outputTypes; }
+ int execute();
+ void help();
+
+private:
+ GlobalData* globaldata;
+ ReadOTUFile* read;
+ InputData* input;
+ vector<SharedRAbundVector*> lookup;
+ ListVector* list;
+ RAbundVector* rabund;
+ SAbundVector* sabund;
+
+ bool abort, allLines, pickedGroups;
+ set<string> labels; //holds labels to be used
+ string groups, label, outputDir;
+ vector<string> Groups, outputNames;
+ map<string, vector<string> > outputTypes;
+
+ int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
+ int getSubSampleShared(vector<SharedRAbundVector*>&, ofstream&);
+};
+
+#endif
+