]> git.donarmstrong.com Git - mothur.git/commitdiff
finished chimera.slayer adding trim parameter, added persample parameter to sub.sampl...
authorwestcott <westcott>
Fri, 7 Jan 2011 18:18:59 +0000 (18:18 +0000)
committerwestcott <westcott>
Fri, 7 Jan 2011 18:18:59 +0000 (18:18 +0000)
Mothur.xcodeproj/project.pbxproj
chimeraslayer.cpp
chimeraslayer.h
chimeraslayercommand.cpp
linearalgebra.cpp [new file with mode: 0644]
linearalgebra.h [new file with mode: 0644]
pcoacommand.cpp
pcoacommand.h
subsamplecommand.cpp
subsamplecommand.h

index e7e81830d7733593c273015e4d4d4a938b5752c6..20b0e4898f2e58c2fc781931725fffab60cf569e 100644 (file)
                A7E9B98D12D37EC400DA6239 /* weighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87C12D37EC400DA6239 /* weighted.cpp */; };
                A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */; };
                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B87F12D37EC400DA6239 /* whittaker.cpp */; };
+               A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7FC480D12D788F20055BC5C /* linearalgebra.cpp */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXCopyFilesBuildPhase section */
                A7E9B87E12D37EC400DA6239 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = SOURCE_ROOT; };
                A7E9B87F12D37EC400DA6239 /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whittaker.cpp; sourceTree = "<group>"; };
                A7E9B88012D37EC400DA6239 /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
+               A7FC480C12D788F20055BC5C /* linearalgebra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = linearalgebra.h; sourceTree = "<group>"; };
+               A7FC480D12D788F20055BC5C /* linearalgebra.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = linearalgebra.cpp; sourceTree = "<group>"; };
                C6A0FF2C0290799A04C91782 /* mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = mothur.1; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
                                A7E9B72D12D37EC400DA6239 /* inputdata.cpp */,
                                A7E9B73912D37EC400DA6239 /* libshuff.cpp */,
                                A7E9B73A12D37EC400DA6239 /* libshuff.h */,
+                               A7FC480C12D788F20055BC5C /* linearalgebra.h */,
+                               A7FC480D12D788F20055BC5C /* linearalgebra.cpp */,
                                A7E9BA5612D39BD800DA6239 /* metastats */,
                                A7E9B75B12D37EC400DA6239 /* mothur.cpp */,
                                A7E9B75C12D37EC400DA6239 /* mothur.h */,
                                A7E9B98E12D37EC400DA6239 /* weightedlinkage.cpp in Sources */,
                                A7E9B98F12D37EC400DA6239 /* whittaker.cpp in Sources */,
                                A70332B712D3A13400761E33 /* makefile in Sources */,
+                               A7FC480E12D788F20055BC5C /* linearalgebra.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 020b8a6efc1edd7a581ba938be968908024cba4a..38f7abe49bfc7266e6eb4acd123ff057b8780c42 100644 (file)
@@ -419,7 +419,7 @@ void ChimeraSlayer::printHeader(ostream& out) {
 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
        try {
                Sequence* trim = NULL;
-               if (trimChimera) { trim = new Sequence(trimQuery->getName(), trimQuery->getAligned()); }
+               if (trimChimera) { trim = trimQuery; }
                
                if (chimeraFlags == "yes") {
                        string chimeraFlag = "no";
@@ -472,7 +472,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                string outputString = "";
                
                Sequence* trim = NULL;
-               if (trimChimera) { trim = new Sequence(trimQuery->getName(), trimQuery->getAligned()); }
+               if (trimChimera) { trim = trimQuery; }
                
                if (chimeraFlags == "yes") {
                        string chimeraFlag = "no";
@@ -546,7 +546,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
 //***************************************************************************************************************
 int ChimeraSlayer::getChimeras(Sequence* query) {
        try {
-               trimQuery = query;
+               if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned());  }
                
                chimeraFlags = "no";
 
index 1e03d9281e7a194f34bde73065c1916f66bec469..87439a327e9e83acf1db63375a3f1173e7829b3a 100644 (file)
@@ -15,8 +15,6 @@
 #include "maligner.h"
 #include "slayer.h"
 
-
-
 //***********************************************************************/
 //This class was modeled after the chimeraSlayer written by the Broad Institute
 /***********************************************************************/
index 39483b1f37c4a9c547a95b07b66abfc432edb732..bd9ff4d083a4b20fc652bd2b3f6960cba3ce235e 100644 (file)
@@ -690,6 +690,7 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil
                                        
                                        if (trim) {  
                                                string outputString = ">" + trimmed->getName() + "\n" + trimmed->getAligned() + "\n";
+                                               delete trimmed;
                                                
                                                //write to accnos file
                                                int length = outputString.length();
diff --git a/linearalgebra.cpp b/linearalgebra.cpp
new file mode 100644 (file)
index 0000000..e015b5f
--- /dev/null
@@ -0,0 +1,238 @@
+/*
+ *  linearalgebra.cpp
+ *  mothur
+ *
+ *  Created by westcott on 1/7/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "linearalgebra.h"
+
+/*********************************************************************************************************************************/
+
+inline double SIGN(const double a, const double b)
+{
+    return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
+}
+/*********************************************************************************************************************************/
+
+vector<vector<double> > LinearAlgebra::matrix_mult(vector<vector<double> > first, vector<vector<double> > second){
+       try {
+               vector<vector<double> > product;
+               
+               int first_rows = first.size();
+               int first_cols = first[0].size();
+               int second_cols = second[0].size();
+               
+               product.resize(first_rows);
+               for(int i=0;i<first_rows;i++){
+                       product[i].resize(second_cols);
+               }
+               
+               for(int i=0;i<first_rows;i++){
+                       for(int j=0;j<second_cols;j++){
+                               
+                               if (m->control_pressed) { return product; }
+                                       
+                               product[i][j] = 0.0;
+                               for(int k=0;k<first_cols;k++){
+                                       product[i][j] += first[i][k] * second[k][j];
+                               }
+                       }
+               }
+               
+               return product;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "matrix_mult");
+               exit(1);
+       }
+       
+}
+
+/*********************************************************************************************************************************/
+
+//  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
+
+int LinearAlgebra::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
+       try {
+               double scale, hh, h, g, f;
+               
+               int n = a.size();
+               
+               d.resize(n);
+               e.resize(n);
+               
+               for(int i=n-1;i>0;i--){
+                       int l=i-1;
+                       h = scale = 0.0000;
+                       if(l>0){
+                               for(int k=0;k<l+1;k++){
+                                       scale += fabs(a[i][k]);
+                               }
+                               if(scale == 0.0){
+                                       e[i] = a[i][l];
+                               }
+                               else{
+                                       for(int k=0;k<l+1;k++){
+                                               a[i][k] /= scale;
+                                               h += a[i][k] * a[i][k];
+                                       }
+                                       f = a[i][l];
+                                       g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
+                                       e[i] = scale * g;
+                                       h -= f * g;
+                                       a[i][l] = f - g;
+                                       f = 0.0;
+                                       for(int j=0;j<l+1;j++){
+                                               a[j][i] = a[i][j] / h;
+                                               g = 0.0;
+                                               for(int k=0;k<j+1;k++){
+                                                       g += a[j][k] * a[i][k];
+                                               }
+                                               for(int k=j+1;k<l+1;k++){
+                                                       g += a[k][j] * a[i][k];
+                                               }
+                                               e[j] = g / h;
+                                               f += e[j] * a[i][j];
+                                       }
+                                       hh = f / (h + h);
+                                       for(int j=0;j<l+1;j++){
+                                               f = a[i][j];
+                                               e[j] = g = e[j] - hh * f;
+                                               for(int k=0;k<j+1;k++){
+                                                       a[j][k] -= (f * e[k] + g * a[i][k]);
+                                               }
+                                       }
+                               }
+                       }
+                       else{
+                               e[i] = a[i][l];
+                       }
+                       
+                       d[i] = h;
+               }
+               
+               d[0] = 0.0000;
+               e[0] = 0.0000;
+               
+               for(int i=0;i<n;i++){
+                       int l = i;
+                       if(d[i] != 0.0){
+                               for(int j=0;j<l;j++){
+                                       g = 0.0000;
+                                       for(int k=0;k<l;k++){
+                                               g += a[i][k] * a[k][j];
+                                       }
+                                       for(int k=0;k<l;k++){
+                                               a[k][j] -= g * a[k][i];
+                                       }
+                               }
+                       }
+                       d[i] = a[i][i];
+                       a[i][i] = 1.0000;
+                       for(int j=0;j<l;j++){
+                               a[j][i] = a[i][j] = 0.0;
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "tred2");
+               exit(1);
+       }
+       
+}
+/*********************************************************************************************************************************/
+
+double LinearAlgebra::pythag(double a, double b)       {       return(pow(a*a+b*b,0.5));       }
+
+/*********************************************************************************************************************************/
+
+//  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
+
+int LinearAlgebra::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
+       try {
+               int myM, i, iter;
+               double s, r, p, g, f, dd, c, b;
+               
+               int n = d.size();
+               for(int i=1;i<=n;i++){
+                       e[i-1] = e[i];
+               }
+               e[n-1] = 0.0000;
+               
+               for(int l=0;l<n;l++){
+                       iter = 0;
+                       do {
+                               for(myM=l;myM<n-1;myM++){
+                                       dd = fabs(d[myM]) + fabs(d[myM+1]);
+                                       if(fabs(e[myM])+dd == dd) break;
+                               }
+                               if(myM != l){
+                                       if(iter++ == 30) cerr << "Too many iterations in tqli\n";
+                                       g = (d[l+1]-d[l]) / (2.0 * e[l]);
+                                       r = pythag(g, 1.0);
+                                       g = d[myM] - d[l] + e[l] / (g + SIGN(r,g));
+                                       s = c = 1.0;
+                                       p = 0.0000;
+                                       for(i=myM-1;i>=l;i--){
+                                               f = s * e[i];
+                                               b = c * e[i];
+                                               e[i+1] = (r=pythag(f,g));
+                                               if(r==0.0){
+                                                       d[i+1] -= p;
+                                                       e[myM] = 0.0000;
+                                                       break;
+                                               }
+                                               s = f / r;
+                                               c = g / r;
+                                               g = d[i+1] - p;
+                                               r = (d[i] - g) * s + 2.0 * c * b;
+                                               d[i+1] = g + ( p = s * r);
+                                               g = c * r - b;
+                                               for(int k=0;k<n;k++){
+                                                       f = z[k][i+1];
+                                                       z[k][i+1] = s * z[k][i] + c * f;
+                                                       z[k][i] = c * z[k][i] - s * f;
+                                               }
+                                       }
+                                       if(r == 0.00 && i >= l) continue;
+                                       d[l] -= p;
+                                       e[l] = g;
+                                       e[myM] = 0.0;
+                               }
+                       } while (myM != l);
+               }
+               
+               int k;
+               for(int i=0;i<n;i++){
+                       p=d[k=i];
+                       for(int j=i;j<n;j++){
+                               if(d[j] >= p){
+                                       p=d[k=j];
+                               }
+                       }
+                       if(k!=i){
+                               d[k]=d[i];
+                               d[i]=p;
+                               for(int j=0;j<n;j++){
+                                       p=z[j][i];
+                                       z[j][i] = z[j][k];
+                                       z[j][k] = p;
+                               }
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "LinearAlgebra", "qtli");
+               exit(1);
+       }
+}
+/*********************************************************************************************************************************/
+
+
diff --git a/linearalgebra.h b/linearalgebra.h
new file mode 100644 (file)
index 0000000..2acf5cb
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef LINEARALGEBRA
+#define LINEARALGEBRA
+
+/*
+ *  linearalgebra.h
+ *  mothur
+ *
+ *  Created by westcott on 1/7/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothurout.h"
+
+class LinearAlgebra {
+       
+public:
+       LinearAlgebra() { m = MothurOut::getInstance(); }
+       ~LinearAlgebra() {}
+       
+       vector<vector<double> > matrix_mult(vector<vector<double> >, vector<vector<double> >);
+       int tred2(vector<vector<double> >&, vector<double>&, vector<double>&);
+       int qtli(vector<double>&, vector<double>&, vector<vector<double> >&);
+       
+       
+private:
+       MothurOut* m;
+       
+       double pythag(double, double);
+};
+
+#endif
+
index cb2e758be2ac7aa6ca3afcd5079d2143b59e3ead..9375144bcb342faf364a38eee6eca63226099659 100644 (file)
@@ -171,14 +171,13 @@ int PCOACommand::execute(){
                vector<double> e;
                vector<vector<double> > G = D;
                vector<vector<double> > copy_G;
-               //int rank = D.size();
-               
+                               
                m->mothurOut("\nProcessing...\n\n");
                
                for(int count=0;count<2;count++){
-                       recenter(offset, D, G);         if (m->control_pressed) { return 0; }
-                       tred2(G, d, e);                         if (m->control_pressed) { return 0; }
-                       qtli(d, e, G);                          if (m->control_pressed) { return 0; }
+                       recenter(offset, D, G);                                 if (m->control_pressed) { return 0; }
+                       linearCalc.tred2(G, d, e);                              if (m->control_pressed) { return 0; }
+                       linearCalc.qtli(d, e, G);                               if (m->control_pressed) { return 0; }
                        offset = d[d.size()-1];
                        if(offset > 0.0) break;
                } 
@@ -327,13 +326,6 @@ double PCOACommand::calcPearson(vector< vector<double> >& euclidDists, vector< v
 }
 /*********************************************************************************************************************************/
 
-inline double SIGN(const double a, const double b)
-{
-    return b>=0 ? (a>=0 ? a:-a) : (a>=0 ? -a:a);
-}
-
-/*********************************************************************************************************************************/
-
 void PCOACommand::get_comment(istream& f, char begin, char end){
        try {
                char d=f.get();
@@ -444,39 +436,6 @@ void PCOACommand::read(string fname, vector<string>& names, vector<vector<double
 
 /*********************************************************************************************************************************/
 
-double PCOACommand::pythag(double a, double b) {       return(pow(a*a+b*b,0.5));       }
-
-/*********************************************************************************************************************************/
-
-void PCOACommand::matrix_mult(vector<vector<double> > first, vector<vector<double> > second, vector<vector<double> >& product){
-       try {
-               int first_rows = first.size();
-               int first_cols = first[0].size();
-               int second_cols = second[0].size();
-               
-               product.resize(first_rows);
-               for(int i=0;i<first_rows;i++){
-                       product[i].resize(second_cols);
-               }
-               
-               for(int i=0;i<first_rows;i++){
-                       for(int j=0;j<second_cols;j++){
-                               product[i][j] = 0.0;
-                               for(int k=0;k<first_cols;k++){
-                                       product[i][j] += first[i][k] * second[k][j];
-                               }
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "matrix_mult");
-               exit(1);
-       }
-
-}
-
-/*********************************************************************************************************************************/
-
 void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vector<double> >& G){
        try {
                int rank = D.size();
@@ -499,8 +458,8 @@ void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vect
                        }
                }
                
-               matrix_mult(C,A,A);
-               matrix_mult(A,C,G);
+               A = linearCalc.matrix_mult(C,A);
+               G = linearCalc.matrix_mult(A,C);
        }
        catch(exception& e) {
                m->errorOut(e, "PCOACommand", "recenter");
@@ -511,182 +470,6 @@ void PCOACommand::recenter(double offset, vector<vector<double> > D, vector<vect
 
 /*********************************************************************************************************************************/
 
-//  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
-
-void PCOACommand::tred2(vector<vector<double> >& a, vector<double>& d, vector<double>& e){
-       try {
-               double scale, hh, h, g, f;
-               
-               int n = a.size();
-               
-               d.resize(n);
-               e.resize(n);
-               
-               for(int i=n-1;i>0;i--){
-                       int l=i-1;
-                       h = scale = 0.0000;
-                       if(l>0){
-                               for(int k=0;k<l+1;k++){
-                                       scale += fabs(a[i][k]);
-                               }
-                               if(scale == 0.0){
-                                       e[i] = a[i][l];
-                               }
-                               else{
-                                       for(int k=0;k<l+1;k++){
-                                               a[i][k] /= scale;
-                                               h += a[i][k] * a[i][k];
-                                       }
-                                       f = a[i][l];
-                                       g = (f >= 0.0 ? -sqrt(h) : sqrt(h));
-                                       e[i] = scale * g;
-                                       h -= f * g;
-                                       a[i][l] = f - g;
-                                       f = 0.0;
-                                       for(int j=0;j<l+1;j++){
-                                               a[j][i] = a[i][j] / h;
-                                               g = 0.0;
-                                               for(int k=0;k<j+1;k++){
-                                                       g += a[j][k] * a[i][k];
-                                               }
-                                               for(int k=j+1;k<l+1;k++){
-                                                       g += a[k][j] * a[i][k];
-                                               }
-                                               e[j] = g / h;
-                                               f += e[j] * a[i][j];
-                                       }
-                                       hh = f / (h + h);
-                                       for(int j=0;j<l+1;j++){
-                                               f = a[i][j];
-                                               e[j] = g = e[j] - hh * f;
-                                               for(int k=0;k<j+1;k++){
-                                                       a[j][k] -= (f * e[k] + g * a[i][k]);
-                                               }
-                                       }
-                               }
-                       }
-                       else{
-                               e[i] = a[i][l];
-                       }
-                       
-                       d[i] = h;
-               }
-               
-               d[0] = 0.0000;
-               e[0] = 0.0000;
-               
-               for(int i=0;i<n;i++){
-                       int l = i;
-                       if(d[i] != 0.0){
-                               for(int j=0;j<l;j++){
-                                       g = 0.0000;
-                                       for(int k=0;k<l;k++){
-                                               g += a[i][k] * a[k][j];
-                                       }
-                                       for(int k=0;k<l;k++){
-                                               a[k][j] -= g * a[k][i];
-                                       }
-                               }
-                       }
-                       d[i] = a[i][i];
-                       a[i][i] = 1.0000;
-                       for(int j=0;j<l;j++){
-                               a[j][i] = a[i][j] = 0.0;
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "tred2");
-               exit(1);
-       }
-
-}
-
-/*********************************************************************************************************************************/
-
-//  This function is taken from Numerical Recipes in C++ by Press et al., 2nd edition, pg. 479
-
-void PCOACommand::qtli(vector<double>& d, vector<double>& e, vector<vector<double> >& z) {
-       try {
-               int m, i, iter;
-               double s, r, p, g, f, dd, c, b;
-               
-               int n = d.size();
-               for(int i=1;i<=n;i++){
-                       e[i-1] = e[i];
-               }
-               e[n-1] = 0.0000;
-               
-               for(int l=0;l<n;l++){
-                       iter = 0;
-                       do {
-                               for(m=l;m<n-1;m++){
-                                       dd = fabs(d[m]) + fabs(d[m+1]);
-                                       if(fabs(e[m])+dd == dd) break;
-                               }
-                               if(m != l){
-                                       if(iter++ == 30) cerr << "Too many iterations in tqli\n";
-                                       g = (d[l+1]-d[l]) / (2.0 * e[l]);
-                                       r = pythag(g, 1.0);
-                                       g = d[m] - d[l] + e[l] / (g + SIGN(r,g));
-                                       s = c = 1.0;
-                                       p = 0.0000;
-                                       for(i=m-1;i>=l;i--){
-                                               f = s * e[i];
-                                               b = c * e[i];
-                                               e[i+1] = (r=pythag(f,g));
-                                               if(r==0.0){
-                                                       d[i+1] -= p;
-                                                       e[m] = 0.0000;
-                                                       break;
-                                               }
-                                               s = f / r;
-                                               c = g / r;
-                                               g = d[i+1] - p;
-                                               r = (d[i] - g) * s + 2.0 * c * b;
-                                               d[i+1] = g + ( p = s * r);
-                                               g = c * r - b;
-                                               for(int k=0;k<n;k++){
-                                                       f = z[k][i+1];
-                                                       z[k][i+1] = s * z[k][i] + c * f;
-                                                       z[k][i] = c * z[k][i] - s * f;
-                                               }
-                                       }
-                                       if(r == 0.00 && i >= l) continue;
-                                       d[l] -= p;
-                                       e[l] = g;
-                                       e[m] = 0.0;
-                               }
-                       } while (m != l);
-               }
-               
-               int k;
-               for(int i=0;i<n;i++){
-                       p=d[k=i];
-                       for(int j=i;j<n;j++){
-                               if(d[j] >= p){
-                                       p=d[k=j];
-                               }
-                       }
-                       if(k!=i){
-                               d[k]=d[i];
-                               d[i]=p;
-                               for(int j=0;j<n;j++){
-                                       p=z[j][i];
-                                       z[j][i] = z[j][k];
-                                       z[j][k] = p;
-                               }
-                       }
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "PCOACommand", "qtli");
-               exit(1);
-       }
-}
-
-/*********************************************************************************************************************************/
-
 void PCOACommand::output(string fnameRoot, vector<string> name_list, vector<vector<double> >& G, vector<double> d) {
        try {
                int rank = name_list.size();
index ea1738b9113f967af45f52e0978f4d48dcd1b23f..8bb7c1bdc586798b70fb3ec3f601bf13b5a16d9e 100644 (file)
@@ -11,6 +11,7 @@
  */
  
 #include "command.hpp"
+#include "linearalgebra.h"
 
 
 /*****************************************************************/
@@ -34,15 +35,12 @@ private:
        float cutoff, precision;
        vector<string> outputNames;
        map<string, vector<string> > outputTypes;
+       LinearAlgebra linearCalc;
        
        void get_comment(istream&, char, char);
        int read_phylip(istream&, int, vector<string>&, vector<vector<double> >&);
        void read(string, vector<string>&, vector<vector<double> >&);
-       double pythag(double, double);
-       void matrix_mult(vector<vector<double> >, vector<vector<double> >, vector<vector<double> >&);
        void recenter(double, vector<vector<double> >, vector<vector<double> >&);
-       void tred2(vector<vector<double> >&, vector<double>&, vector<double>&);
-       void qtli(vector<double>&, vector<double>&, vector<vector<double> >&);
        void output(string, vector<string>, vector<vector<double> >&, vector<double>);
        vector< vector<double> > calculateEuclidianDistance(vector<vector<double> >&, int);
        double calcPearson(vector<vector<double> >&, vector<vector<double> >&);
index b8bb7ad44462d64e661d1f9b640fbff87dc872d2..2a3d138d77497d545a04c259f55dae41d3191413 100644 (file)
@@ -13,7 +13,7 @@
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getValidParameters(){ 
        try {
-               string Array[] =  {"fasta", "group", "list","shared","rabund", "name","sabund","size","groups","label","outputdir","inputdir"};
+               string Array[] =  {"fasta", "group", "list","shared","rabund","persample", "name","sabund","size","groups","label","outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -77,7 +77,7 @@ SubSampleCommand::SubSampleCommand(string option) {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "group", "list","shared","rabund", "sabund","name","size","groups","label","outputdir","inputdir"};
+                       string Array[] =  {"fasta", "group", "list","shared","rabund","persample", "sabund","name","size","groups","label","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -216,6 +216,11 @@ SubSampleCommand::SubSampleCommand(string option) {
                        string temp = validParameter.validFile(parameters, "size", false);              if (temp == "not found"){       temp = "0";             }
                        convert(temp, size);  
                        
+                       temp = validParameter.validFile(parameters, "persample", false);                if (temp == "not found"){       temp = "f";             }
+                       persample = m->isTrue(temp);
+                       
+                       if (groupfile == "") { persample = false; }
+                       
                        if ((namefile != "") && (fastafile == "")) { m->mothurOut("You may only use a namefile with a fastafile."); m->mothurOutEndLine(); abort = true; }
                        
                        if ((fastafile == "") && (listfile == "") && (sabundfile == "") && (rabundfile == "") && (sharedfile == "")) {
@@ -244,12 +249,14 @@ SubSampleCommand::SubSampleCommand(string option) {
 void SubSampleCommand::help(){
        try {
                m->mothurOut("The sub.sample command is designed to be used as a way to normalize your data, or create a smaller set from your original set.\n");
-               m->mothurOut("The sub.sample command parameters are fasta, name, list, group, rabund, sabund, shared, groups, size and label.  You must provide a fasta, list, sabund, rabund or shared file as an input file.\n");
+               m->mothurOut("The sub.sample command parameters are fasta, name, list, group, rabund, sabund, shared, groups, size, persample and label.  You must provide a fasta, list, sabund, rabund or shared file as an input file.\n");
                m->mothurOut("The namefile is only used with the fasta file, not with the listfile, because the list file should contain all sequences.\n");
                m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
                m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
                m->mothurOut("The size parameter allows you indicate the size of your subsample.\n");
-               m->mothurOut("The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files, 10% of number of seqs.\n");
+               m->mothurOut("The persample parameter allows you indicate you want to select subsample of the same size from each of your groups, default=false. It is only used with the list and fasta files if a groupfile is given.\n");
+               m->mothurOut("persample=false will select a random set of sequences of the size you select, but the number of seqs from each group may differ.\n");
+               m->mothurOut("The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files if a groupfile is given and persample=true, then size=number of seqs in smallest sample, otherwise size=10% of number of seqs.\n");
                m->mothurOut("The sub.sample command should be in the following format: sub.sample(list=yourListFile, group=yourGroupFile, groups=yourGroups, label=yourLabels).\n");
                m->mothurOut("Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n");
                m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
@@ -340,72 +347,117 @@ int SubSampleCommand::getSubSampleFasta() {
                outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
                //make sure that if your picked groups size is not too big
-               
-               if (pickedGroups) {
-                       int total = 0;
-                       for(int i = 0; i < Groups.size(); i++) {
-                               total += groupMap->getNumSeqs(Groups[i]);
+               int thisSize = names.size();
+               if (persample) { 
+                       if (size == 0) { //user has not set size, set size = smallest samples size
+                               size = groupMap->getNumSeqs(Groups[0]);
+                               for (int i = 1; i < Groups.size(); i++) {
+                                       int thisSize = groupMap->getNumSeqs(Groups[i]);
+                                       
+                                       if (thisSize < size) {  size = thisSize;        }
+                               }
+                       }else { //make sure size is not too large
+                               int smallestSize = groupMap->getNumSeqs(Groups[0]);
+                               for (int i = 1; i < Groups.size(); i++) {
+                                       int thisSize = groupMap->getNumSeqs(Groups[i]);
+                                       
+                                       if (thisSize < smallestSize) {  smallestSize = thisSize;        }
+                               }
+                               if (smallestSize < size) { size = smallestSize; m->mothurOut("You have selected a size that is larger than your smallest sample, using your samllest sample size, " + toString(smallestSize) + "."); m->mothurOutEndLine(); }
+                       }
+                       
+                       m->mothurOut("Sampling " + toString(size) + " from each group."); m->mothurOutEndLine();                        
+               }else {
+                       if (pickedGroups) {
+                               int total = 0;
+                               for(int i = 0; i < Groups.size(); i++) {
+                                       total += groupMap->getNumSeqs(Groups[i]);
+                               }
+                               
+                               if (size == 0) { //user has not set size, set size = 10% samples size
+                                       size = int (total * 0.10);
+                               }
+                               
+                               if (total < size) { 
+                                       if (size != 0) { 
+                                               m->mothurOut("Your size is too large for the number of groups you selected. Adjusting to " + toString(int (total * 0.10)) + "."); m->mothurOutEndLine();
+                                       }
+                                       size = int (total * 0.10);
+                               }
+                               
+                               m->mothurOut("Sampling " + toString(size) + " from " + toString(total) + "."); m->mothurOutEndLine();
                        }
                        
                        if (size == 0) { //user has not set size, set size = 10% samples size
-                               size = int (total * 0.10);
+                               size = int (names.size() * 0.10);
                        }
                        
-                       if (total < size) { 
-                               if (size != 0) { 
-                                       m->mothurOut("Your size is too large for the number of groups you selected. Adjusting to " + toString(int (total * 0.10)) + "."); m->mothurOutEndLine();
-                               }
-                               size = int (total * 0.10);
+                       if (size > thisSize) { m->mothurOut("Your fasta file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + "."); m->mothurOutEndLine();
+                               size = thisSize;
                        }
                        
-                       m->mothurOut("Sampling " + toString(size) + " from " + toString(total) + "."); m->mothurOutEndLine();
-               }
-               
-               if (size == 0) { //user has not set size, set size = 10% samples size
-                       size = int (names.size() * 0.10);
-               }
-               
-               int thisSize = names.size();
-               if (size > thisSize) { m->mothurOut("Your fasta file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + "."); m->mothurOutEndLine();
-                       size = thisSize;
+                       if (!pickedGroups) { m->mothurOut("Sampling " + toString(size) + " from " + toString(thisSize) + "."); m->mothurOutEndLine(); }
+
                }
-               
                random_shuffle(names.begin(), names.end());
                
-               if (!pickedGroups) { m->mothurOut("Sampling " + toString(size) + " from " + toString(thisSize) + "."); m->mothurOutEndLine(); }
-               
-               //randomly select a subset of those names to include in the subsample
                set<string> subset; //dont want repeat sequence names added
-               for (int j = 0; j < size; j++) {
-                       
-                       if (m->control_pressed) { return 0; }
-                       
-                       //get random sequence to add, making sure we have not already added it
-                       bool done = false;
-                       int myrand;
-                       while (!done) {
-                               myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
+               if (persample) {
+                       for (int i = 0; i < Groups.size(); i++) {
                                
-                               if (subset.count(names[myrand]) == 0)  { 
+                               //randomly select a subset of those names from this group to include in the subsample
+                               for (int j = 0; j < size; j++) {
                                        
-                                       if (groupfile != "") { //if there is a groupfile given fill in group info
-                                               string group = groupMap->getGroup(names[myrand]);
-                                               if (group == "not found") { m->mothurOut("[ERROR]: " + names[myrand] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                                       if (m->control_pressed) { return 0; }
+                                       
+                                       //get random sequence to add, making sure we have not already added it
+                                       bool done = false;
+                                       int myrand;
+                                       while (!done) {
+                                               myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
                                                
-                                               if (pickedGroups) { //if hte user picked groups, we only want to keep the names of sequences from those groups
-                                                       if (m->inUsersGroups(group, Groups)) {
+                                               if (subset.count(names[myrand]) == 0)  { 
+                                                       
+                                                       string group = groupMap->getGroup(names[myrand]);
+                                                       if (group == "not found") { m->mothurOut("[ERROR]: " + names[myrand] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                                                               
+                                                       if (group == Groups[i]) {       subset.insert(names[myrand]); break;    }
+                                               }
+                                       }
+                               }
+                       }
+               }else {
+                       //randomly select a subset of those names to include in the subsample
+                       for (int j = 0; j < size; j++) {
+                               
+                               if (m->control_pressed) { return 0; }
+                               
+                               //get random sequence to add, making sure we have not already added it
+                               bool done = false;
+                               int myrand;
+                               while (!done) {
+                                       myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
+                                       
+                                       if (subset.count(names[myrand]) == 0)  { 
+                                               
+                                               if (groupfile != "") { //if there is a groupfile given fill in group info
+                                                       string group = groupMap->getGroup(names[myrand]);
+                                                       if (group == "not found") { m->mothurOut("[ERROR]: " + names[myrand] + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                                                       
+                                                       if (pickedGroups) { //if hte user picked groups, we only want to keep the names of sequences from those groups
+                                                               if (m->inUsersGroups(group, Groups)) {
+                                                                       subset.insert(names[myrand]); break;
+                                                               }
+                                                       }else{
                                                                subset.insert(names[myrand]); break;
                                                        }
-                                               }else{
+                                               }else{ //save everyone, group
                                                        subset.insert(names[myrand]); break;
-                                               }
-                                       }else{ //save everyone, group
-                                               subset.insert(names[myrand]); break;
-                                       }                                       
+                                               }                                       
+                                       }
                                }
-                       }
-               }       
-               
+                       }       
+               }
                //read through fasta file outputting only the names on the subsample list
                ifstream in;
                m->openInputFile(fastafile, in);
@@ -794,37 +846,59 @@ int SubSampleCommand::getSubSampleList() {
                }
                
                //make sure that if your picked groups size is not too big
-               if (pickedGroups) {
-                       int total = 0;
-                       for(int i = 0; i < Groups.size(); i++) {
-                               total += groupMap->getNumSeqs(Groups[i]);
-                       }
-                       
-                       if (size == 0) { //user has not set size, set size = 10% samples size
-                               size = int (total * 0.10);
-                       }
-                       
-                       if (total < size) { 
-                               m->mothurOut("Your size is too large for the number of groups you selected. Adjusting to " + toString(int (total * 0.10)) + "."); m->mothurOutEndLine();
-                               size = int (total * 0.10);
+               if (persample) {
+                       if (size == 0) { //user has not set size, set size = smallest samples size
+                               size = groupMap->getNumSeqs(Groups[0]);
+                               for (int i = 1; i < Groups.size(); i++) {
+                                       int thisSize = groupMap->getNumSeqs(Groups[i]);
+                                       
+                                       if (thisSize < size) {  size = thisSize;        }
+                               }
+                       }else { //make sure size is not too large
+                               int smallestSize = groupMap->getNumSeqs(Groups[0]);
+                               for (int i = 1; i < Groups.size(); i++) {
+                                       int thisSize = groupMap->getNumSeqs(Groups[i]);
+                                       
+                                       if (thisSize < smallestSize) {  smallestSize = thisSize;        }
+                               }
+                               if (smallestSize < size) { size = smallestSize; m->mothurOut("You have selected a size that is larger than your smallest sample, using your samllest sample size, " + toString(smallestSize) + "."); m->mothurOutEndLine(); }
                        }
                        
-                       m->mothurOut("Sampling " + toString(size) + " from " + toString(total) + "."); m->mothurOutEndLine();
+                       m->mothurOut("Sampling " + toString(size) + " from each group."); m->mothurOutEndLine();        
                }else{
-               
-                       if (size == 0) { //user has not set size, set size = 10% samples size
-                               size = int (list->getNumSeqs() * 0.10);
-                       }
-                       
-                       int thisSize = list->getNumSeqs();
-                       if (size > thisSize) { m->mothurOut("Your list file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + "."); m->mothurOutEndLine();
-                               size = thisSize;
+                       if (pickedGroups) {
+                               int total = 0;
+                               for(int i = 0; i < Groups.size(); i++) {
+                                       total += groupMap->getNumSeqs(Groups[i]);
+                               }
+                               
+                               if (size == 0) { //user has not set size, set size = 10% samples size
+                                       size = int (total * 0.10);
+                               }
+                               
+                               if (total < size) { 
+                                       m->mothurOut("Your size is too large for the number of groups you selected. Adjusting to " + toString(int (total * 0.10)) + "."); m->mothurOutEndLine();
+                                       size = int (total * 0.10);
+                               }
+                               
+                               m->mothurOut("Sampling " + toString(size) + " from " + toString(total) + "."); m->mothurOutEndLine();
+                       }else{
+                               
+                               if (size == 0) { //user has not set size, set size = 10% samples size
+                                       size = int (list->getNumSeqs() * 0.10);
+                               }
+                               
+                               int thisSize = list->getNumSeqs();
+                               if (size > thisSize) { m->mothurOut("Your list file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + "."); m->mothurOutEndLine();
+                                       size = thisSize;
+                               }
+                               
+                               m->mothurOut("Sampling " + toString(size) + " from " + toString(list->getNumSeqs()) + "."); m->mothurOutEndLine();
                        }
-                       
-                       m->mothurOut("Sampling " + toString(size) + " from " + toString(list->getNumSeqs()) + "."); m->mothurOutEndLine();
                }
                
                
+               //fill names
                for (int i = 0; i < list->getNumBins(); i++) {
                        string binnames = list->get(i);
                        
@@ -872,22 +946,43 @@ int SubSampleCommand::getSubSampleList() {
                }
                
                random_shuffle(names.begin(), names.end());
-               
+                       
                //randomly select a subset of those names to include in the subsample
                set<string> subset; //dont want repeat sequence names added
-               for (int j = 0; j < size; j++) {
-                       
-                       if (m->control_pressed) { break; }
-                       
-                       //get random sequence to add, making sure we have not already added it
-                       bool done = false;
-                       int myrand;
-                       while (!done) {
-                               myrand = (int)((float)(rand()) / (RAND_MAX / (names.size()-1) + 1));
+               if (persample) {
+                       for (int i = 0; i < Groups.size(); i++) {
                                
-                               if (subset.count(names[myrand]) == 0)  { subset.insert(names[myrand]); break;   }
+                               for (int j = 0; j < size; j++) {
+                                       
+                                       if (m->control_pressed) { break; }
+                                       
+                                       //get random sequence to add, making sure we have not already added it
+                                       bool done = false;
+                                       int myrand;
+                                       while (!done) {
+                                               myrand = (int)((float)(rand()) / (RAND_MAX / (names.size()-1) + 1));
+                                               
+                                               if (subset.count(names[myrand]) == 0) { //you are not already added
+                                                       if (groupMap->getGroup(names[myrand]) == Groups[i])  { subset.insert(names[myrand]); break;     }
+                                               }
+                                       }
+                               }
                        }
-               }       
+               }else{
+                       for (int j = 0; j < size; j++) {
+                               
+                               if (m->control_pressed) { break; }
+                               
+                               //get random sequence to add, making sure we have not already added it
+                               bool done = false;
+                               int myrand;
+                               while (!done) {
+                                       myrand = (int)((float)(rand()) / (RAND_MAX / (names.size()-1) + 1));
+                                       
+                                       if (subset.count(names[myrand]) == 0)  { subset.insert(names[myrand]); break;   }
+                               }
+                       }       
+               }
                
                if (groupfile != "") { 
                        //write out new groupfile
index b70050653fd5ded5a9b25b555efea08ae79b9565..b00defdd9d63e797675d8a7809e32fe1d92d6a63 100644 (file)
@@ -34,7 +34,7 @@ public:
 private:
        GlobalData* globaldata;
        
-       bool abort, pickedGroups, allLines;
+       bool abort, pickedGroups, allLines, persample;
        string listfile, groupfile, sharedfile, rabundfile, sabundfile, fastafile, namefile;
        set<string> labels; //holds labels to be used
        string groups, label, outputDir;