]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed metastats, added resize to cluster.classic, added code to kill children if...
authorwestcott <westcott>
Fri, 5 Nov 2010 20:02:24 +0000 (20:02 +0000)
committerwestcott <westcott>
Fri, 5 Nov 2010 20:02:24 +0000 (20:02 +0000)
29 files changed:
Mothur.xcodeproj/project.pbxproj
clusterclassic.cpp
clusterdoturcommand.cpp
commandfactory.cpp
datavector.hpp
getseqscommand.cpp
getseqscommand.h
metastats.h
metastats2.c
mothur
ordervector.cpp
ordervector.hpp
qualityscores.cpp
rabundvector.cpp
rabundvector.hpp
removeseqscommand.cpp
removeseqscommand.h
sabundvector.cpp
sabundvector.hpp
sharedordervector.cpp
sharedordervector.h
sharedrabundfloatvector.cpp
sharedrabundfloatvector.h
sharedrabundvector.cpp
sharedrabundvector.h
sharedsabundvector.cpp
sharedsabundvector.h
subsamplecommand.cpp
subsamplecommand.h

index 1c282d96b81dd6ee49eb177225076ffa91ef8894..855dad891d3b9b4e3b0422e57a3a5be488aa3c12 100644 (file)
                        };
                        buildConfigurationList = 1DEB919308733D9F0010E9CD /* Build configuration list for PBXProject "Mothur" */;
                        compatibilityVersion = "Xcode 3.0";
-                       developmentRegion = English;
                        hasScannedForEncodings = 1;
                        knownRegions = (
                                English,
index 41c1647bccf6196efb54b31333a7d5298feaaeed..0e48690dc57059bb2ab1af1df3da5df0ea71efa4 100644 (file)
@@ -226,7 +226,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
 //sets smallCol and smallRow, returns distance
 double ClusterClassic::getSmallCell() {
        try {
-               
+                       
                smallDist = aboveCutoff;
                smallRow = 1;
                smallCol = 0;
@@ -279,6 +279,10 @@ void ClusterClassic::clusterBins(){
 
                rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol));     
                rabund->set(smallCol, 0);       
+               for (int i = smallCol+1; i < rabund->size(); i++) {
+                       rabund->set((i-1), rabund->get(i));
+               }
+               rabund->resize((rabund->size()-1));
                rabund->setLabel(toString(smallDist));
 
        //      cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
@@ -296,6 +300,10 @@ void ClusterClassic::clusterNames(){
                
                list->set(smallRow, list->get(smallRow)+','+list->get(smallCol));
                list->set(smallCol, "");        
+               for (int i = smallCol+1; i < list->size(); i++) {
+                       list->set((i-1), list->get(i));
+               }
+               list->resize((list->size()-1));
                list->setLabel(toString(smallDist));
        
        //      cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
@@ -308,36 +316,21 @@ void ClusterClassic::clusterNames(){
 /***********************************************************************/
 void ClusterClassic::update(double& cutOFF){
        try {
-//cout << "before update " << endl;
 //print();             
                getSmallCell();
                
                int r, c;
                r = smallRow; c = smallCol;
-               //because we only store lt, we need to make sure we grab the right location
-               //if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value
-               //else { r = smallRow; c = smallCol; } //smallRow is the row value
-               
-               //reset rows smallest distance
-               //rowSmallDists[r].dist = aboveCutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0;
-               //rowSmallDists[c].dist = aboveCutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0;
-               
-               //if your rows smallest distance is from smallRow or smallCol, reset
-               //for(int i=0;i<nseqs;i++){
-                       //if ((rowSmallDists[i].row == r) || (rowSmallDists[i].row == c) || (rowSmallDists[i].col == r) || (rowSmallDists[i].col == c)) {
-                       //      rowSmallDists[i].dist = aboveCutoff; rowSmallDists[i].row = 0; rowSmallDists[i].col = 0;
-                       //}
-               //}
-               
+                               
                for(int i=0;i<nseqs;i++){
                        if(i != r && i != c){
                                double distRow, distCol, newDist;
                                if (i > r) { distRow = dMatrix[i][r]; }
                                else { distRow =  dMatrix[r][i]; }
-                               
+
                                if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell
                                else { distCol =  dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; }
-                                       
+                               
                                if(method == "furthest"){
                                        newDist = max(distRow, distCol);
                                }
@@ -352,39 +345,33 @@ void ClusterClassic::update(double& cutOFF){
                                else if (method == "nearest"){
                                        newDist = min(distRow, distCol);
                                }
-                               
+                               //cout << "newDist = " << newDist << endl;      
                                if (i > r) { dMatrix[i][r] = newDist; }
                                else { dMatrix[r][i] = newDist; }
                                
-                               //if (newDist < rowSmallDists[i].dist) {  rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i;  }
                        }
-                       //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl;  
                }
                        
                clusterBins();
                clusterNames();
-       
-               //find new small for 2 rows we just merged
-               //colDist temp(0,0,100.0);
-               //rowSmallDists[r] = temp;
-
-               //for (int i = 0; i < dMatrix[r].size(); i++) {
-               //      if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
-               //}
-               //for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) {
-               //      if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
-               //}
                
-               //rowSmallDists[c] = temp;
-               //for (int i = 0; i < dMatrix[c].size(); i++) {
-               //      if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
-               //}
-               //for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) {
-               //      if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
-               //}
+               //resize each row
+               for(int i=0;i<nseqs;i++){
+                       for(int j=c+1;j<dMatrix[i].size();j++){
+                               dMatrix[i][j-1]=dMatrix[i][j];
+                       }
+               }                       
+               
+               //resize each col
+               for(int i=c+1;i<nseqs;i++){
+                       for(int j=0;j < dMatrix[i-1].size();j++){
+                               dMatrix[i-1][j]=dMatrix[i][j];
+                       }
+               }       
                
-               //cout << "after update " << endl;
-               //print();
+               nseqs--;
+               dMatrix.pop_back();
+
        }
        catch(exception& e) {
                m->errorOut(e, "ClusterClassic", "update");
index 4987d4e9305b86496e9d7655a62d27e6a1c779a2..56fbd5c0d6c2b8e9380959bd6ba97435b28fcd70 100644 (file)
@@ -227,7 +227,7 @@ int ClusterDoturCommand::execute(){
                
                int estart = time(NULL);
        
-               while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 0)){
+               while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
                        if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close();  for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());         } outputTypes.clear();  return 0;  }
                
                        cluster->update(cutoff);
index 927f3742216529f54ea12a769bae752501b7fe33..3ac378120919198ff3a91780e27a881c275364d4 100644 (file)
@@ -96,6 +96,7 @@
 #include "deuniqueseqscommand.h"
 #include "pairwiseseqscommand.h"
 #include "clusterdoturcommand.h"
+#include "subsamplecommand.h"
 
 
 /*******************************************************/
@@ -197,6 +198,7 @@ CommandFactory::CommandFactory(){
        commands["fastq.info"]                  = "fastq.info";
        commands["deunique.seqs"]               = "deunique.seqs";
        commands["cluster.classic"]             = "cluster.classic";
+       commands["sub.sample"]                  = "sub.sample";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -344,6 +346,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "deunique.seqs")                 {       command = new DeUniqueSeqsCommand(optionString);                        }
                else if(commandName == "pairwise.seqs")                 {       command = new PairwiseSeqsCommand(optionString);                        }
                else if(commandName == "cluster.classic")               {       command = new ClusterDoturCommand(optionString);                        }
+               else if(commandName == "sub.sample")                    {       command = new SubSampleCommand(optionString);                           }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -458,6 +461,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "deunique.seqs")                 {       pipecommand = new DeUniqueSeqsCommand(optionString);                    }
                else if(commandName == "pairwise.seqs")                 {       pipecommand = new PairwiseSeqsCommand(optionString);                    }
                else if(commandName == "cluster.classic")               {       pipecommand = new ClusterDoturCommand(optionString);                    }
+               else if(commandName == "sub.sample")                    {       pipecommand = new SubSampleCommand(optionString);                               }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -560,6 +564,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "deunique.seqs")                 {       shellcommand = new DeUniqueSeqsCommand();                       }
                else if(commandName == "pairwise.seqs")                 {       shellcommand = new PairwiseSeqsCommand();                       }
                else if(commandName == "cluster.classic")               {       shellcommand = new ClusterDoturCommand();                       }
+               else if(commandName == "sub.sample")                    {       shellcommand = new SubSampleCommand();                          }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 0b09e0dc4c914f7a5d26461c092ab3cb250bc071..a5f96eb7f497691d3f2e0a58e976b158cc27d831 100644 (file)
@@ -33,6 +33,7 @@ public:
        virtual void resize(int) = 0;
        virtual int size()      = 0;
        virtual void print(ostream&) = 0;
+       virtual void clear() = 0;
        
        void setLabel(string l)         {       label = l;                      }
        string getLabel()                       {       return label;           }
index 4b93d1fdeea1aac9048013d755ceda466e5f37a1..9fbfb932e4cafd672374b0a31dc7d250151b095a 100644 (file)
@@ -14,7 +14,7 @@
 //**********************************************************************************************************************
 vector<string> GetSeqsCommand::getValidParameters(){   
        try {
-               string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
+               string Array[] =  {"fasta","name", "group", "qfile","alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -35,6 +35,7 @@ GetSeqsCommand::GetSeqsCommand(){
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
+               outputTypes["qfile"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
@@ -74,7 +75,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
+                       string Array[] =  {"fasta","name", "group", "alignreport", "qfile", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -96,6 +97,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
+                       outputTypes["qfile"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -160,6 +162,14 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                               
+                               it = parameters.find("qfile");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -192,11 +202,15 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (taxfile == "not open") { abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
                        
+                       qualfile = validParameter.validFile(parameters, "qfile", true);
+                       if (qualfile == "not open") { abort = true; }
+                       else if (qualfile == "not found") {  qualfile = "";  }
+                       
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "false"; usedDups = ""; }
                        dups = m->isTrue(temp);
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
                
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
 
@@ -212,9 +226,9 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
 
 void GetSeqsCommand::help(){
        try {
-               m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
+               m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
                m->mothurOut("It outputs a file containing only the sequences in the .accnos file.\n");
-               m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide accnos and at least one of the other parameters.\n");
+               m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the other parameters.\n");
                m->mothurOut("The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n");
                m->mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
                m->mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
@@ -245,6 +259,7 @@ int GetSeqsCommand::execute(){
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
+               if (qualfile != "")                     {               readQual();             }
                
                if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
                
@@ -313,6 +328,71 @@ int GetSeqsCommand::readFasta(){
        }
 }
 //**********************************************************************************************************************
+int GetSeqsCommand::readQual(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               
+               ifstream in;
+               m->openInputFile(qualfile, in);
+               string name;
+               
+               bool wroteSomething = false;
+               
+               
+               while(!in.eof()){       
+                       string saveName = "";
+                       string name = "";
+                       string scores = "";
+                       
+                       in >> name; 
+                               
+                       if (name.length() != 0) { 
+                               saveName = name.substr(1);
+                               while (!in.eof())       {       
+                                       char c = in.get(); 
+                                       if (c == 10 || c == 13){        break;  }
+                                       else { name += c; }     
+                               } 
+                               m->gobble(in);
+                       }
+                       
+                       while(in){
+                               char letter= in.get();
+                               if(letter == '>'){      in.putback(letter);     break;  }
+                               else{ scores += letter; }
+                       }
+                       
+                       m->gobble(in);
+                       
+                       if (names.count(saveName) != 0) {
+                               wroteSomething = true;
+                                               
+                               out << name << endl << scores;
+                       }
+                       
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               
+               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetSeqsCommand", "readQual");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int GetSeqsCommand::readList(){
        try {
                string thisOutputDir = outputDir;
index ea93b5a1c4e0427cc43157a5d77f995bb3f2308b..b249072e41aba8b7ff0501c1fc946b90c48dfd22 100644 (file)
@@ -29,7 +29,7 @@ class GetSeqsCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames;
-               string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
+               string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
                map<string, vector<string> > outputTypes;
                
@@ -40,6 +40,7 @@ class GetSeqsCommand : public Command {
                int readAccnos();
                int readList();
                int readTax();
+               int readQual();
                
 };
 
index 2f3cc62ac380f9eb211ad9a86d63d84116387f03..63fb0ee31dc85842b1c629fae51e56697d870fdf 100644 (file)
@@ -31,7 +31,7 @@ void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,double
                       *Ts,double *Tinitial,double *counter1);
 void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *storage);
 void start(double *Imatrix,int *g,int *nr,int *nc,double *testing,
-                       double storage[][9]);
+                       double**);
 
 int metastat_main (char*, int, int, double, int, double**, int);
 
index 136c0426f1609964322343157c148ff8971cc4dc..b8db243e9f04a05d84e23e2651e62429a4f98387 100644 (file)
@@ -22,18 +22,23 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
   }
  
   // Initialize the matrices
-  size = row*col;
-//printf("size = %d\n.", size);
-  double matrix[row][col];
-  double pmatrix[size],pmatrix2[size],permuted[size];  
-  double storage[row][9];
-//printf("here\n.", size);  
-  for (i=0;i<row;i++){
-       for (j =0;j<9;j++){
-      storage[i][j]=0;                 
-       }       
-  }
-   
+       size = row*col;
+       
+       double ** matrix, * pmatrix, * permuted, ** storage;
+       matrix = malloc(row*sizeof(double *));
+       storage =  malloc(row*sizeof(double *));
+       for (i = 0; i<row; i++){
+               matrix[i] = malloc(col*sizeof(double));
+       }
+       for (i = 0; i<row;i++){
+               storage[i] = malloc(9*sizeof(double));
+       }
+       
+       pmatrix = (double *) malloc(size*sizeof(double *));
+       permuted = (double *) malloc(size*sizeof(double *));
+       
+               
+       
   for(i=0; i<row; i++){
     for(j=0; j<col;j++){
       matrix[i][j]=data[i][j];
@@ -125,9 +130,12 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
   
   permutes = &B;
   ptt_size = B*row;
-  
+
   //changing ptt_size to row
-  double permuted_ttests[row], pvalues[row], tinitial[row];
+  double * permuted_ttests, * pvalues, * tinitial;
+  permuted_ttests = (double *) malloc(size*sizeof(double *));
+  pvalues = (double *) malloc(size*sizeof(double *));
+  tinitial = (double *) malloc(size*sizeof(double *));
   
   for(i=0;i<row;i++){
     permuted_ttests[i]=0;}
@@ -137,13 +145,17 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
     tinitial[i]=0; }
   
   // Find the initial values for the matrix.
   start(pmatrix,gvalue,nr,nc,tinitial,storage);
   
   // Start the calculations.
  
   if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){  
 
-  double fish[row], fish2[row];
+  double * fish, *fish2;
+  fish = (double *) malloc(size*sizeof(double *));
+  fish2 = (double *) malloc(size*sizeof(double *));
+         
   for(i=0;i<row;i++){
        fish[i]=0;
        fish2[i]=0;}
@@ -173,7 +185,8 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
        //   f21   f22
        
        int *nr, *nc, *ldtabl, *work;
-       int nrow=2, ncol=2, ldtable=2, workspace=100000;
+       int nrow=2, ncol=2, ldtable=2;
+       int workspace=(row*sizeof(double *)+size*sizeof(double *));
        double *expect, *prc, *emin,*prt,*pre;
        double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
 
@@ -187,7 +200,7 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
        emin=&emin1;
        prt=&prt1;
        pre=&pre1;
-       
+
        fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
        
        if (*pre>.999999999){
@@ -198,11 +211,14 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
     }
   }
   else{
-        
+printf("here before testp\n");          
   testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
         
               // Checks to make sure the matrix isn't sparse.
-  double sparse[row], sparse2[row];
+  double * sparse, * sparse2;
+  sparse = (double *) malloc(size*sizeof(double *));
+  sparse2 = (double *) malloc(size*sizeof(double *));
+         
   for(i=0;i<row;i++){
        sparse[i]=0;
        sparse2[i]=0;}
@@ -256,7 +272,7 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
        emin=&emin1;
        prt=&prt1;
        pre=&pre1;
-       
+printf("here before fexact2\n");               
        fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
        
        if (*pre>.999999999){
@@ -271,8 +287,12 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
   }
   
   // Calculates the mean of counts (not normalized)
-  double temp[row][2];
-  
+  double ** temp;
+  temp = malloc(row*sizeof(double *));
+  for (i = 0; i<row; i++){
+        temp[i] = malloc(col*sizeof(double));
+  }
+printf("here\n");        
   for(j=0;j<row;j++){
        for(i=0;i<2;i++){
                temp[j][i]=0;
@@ -512,42 +532,45 @@ void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *store){
 }
 
 void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
-               double storage[][9]){
+               double** storage){
   int i, a = *nr;
   a*=4;
-  
+       
   double store[a], tool[a], C1[*nr][3], C2[*nr][3];
   double nrows,ncols,gvalue, xbardiff=0, denom=0;
-  
   nrows = (double) *nr;
   ncols = (double) *nc;
   gvalue= (double) *g;
-  
+   
   meanvar(Imatrix,g,nr,nc,store);
-  
+   
   for(i=0;i<=a-1;i++){
     tool[i]=store[i];
   }
-  for (i=0; i<*nr;i++){
+
+  for (i=0; i<*nr;i++){ 
     C1[i][0]=tool[i]; //mean group 1
     storage[i][0]=C1[i][0];
     C1[i][1]=tool[i+*nr+*nr]; // var group 1
     storage[i][1]=C1[i][1];
     C1[i][2]=C1[i][1]/(gvalue-1);
     storage[i][2]=sqrt(C1[i][2]);
-    
+    printf("here2\n"); 
     C2[i][0]=tool[i+*nr]; // mean group 2
-    storage[i][4]=C2[i][0];    
+    storage[i][4]=C2[i][0]; 
     C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
-    storage[i][5]=C2[i][1];        
-    C2[i][2]=C2[i][1]/(ncols-gvalue+1);
+    storage[i][5]=C2[i][1];  
+       C2[i][2]=C2[i][1]/(ncols-gvalue+1);
     storage[i][6]=sqrt(C2[i][2]);   
+        
   }
   for (i=0; i<*nr; i++){
     xbardiff = C1[i][0]-C2[i][0];
     denom = sqrt(C1[i][2]+C2[i][2]);
     initial[i]=fabs(xbardiff/denom);
-  }                                                                                    
+  }     
 }
 
 
diff --git a/mothur b/mothur
index 6d8214658dae4debb9669f7f138585e9d4ae7408..cd2cdd584c4f689d352be463a832e46296d7badb 100755 (executable)
Binary files a/mothur and b/mothur differ
index 27c1e934fa8a70c18e84235365a78231bf8ce6e9..a6dff32386beb1cfeae07a1aa9f01decee5ea0f1 100644 (file)
@@ -73,7 +73,14 @@ int OrderVector::getMaxRank(){
        if(needToUpdate == 1){  updateStats();  }
        return maxRank;
 }
+/***********************************************************************/
 
+void OrderVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();
+}
 /***********************************************************************/
 
 
index d652bbf52e49e207f135f22c617bb58f27b30f5d..0139a6404b833707909cb425885bb3be683becff 100644 (file)
@@ -35,6 +35,7 @@ public:
        void push_back(int);
        void resize(int);
        int size();
+       void clear();
        void print(string, ostream&);
        vector<int>::iterator begin();
        vector<int>::iterator end();
index fa78b69d653cb841c33833d6249bab6b6ecaf18c..f76d9826ee0224cb63d3520a30a0f7b100a5cecf 100644 (file)
@@ -45,7 +45,7 @@ QualityScores::QualityScores(ifstream& qFile, int l){
                else {
                        seqName = seqName.substr(1); 
                }
-
+               
                //m->getline(qFile, line);
                //istringstream qualStream(line);
        
@@ -57,6 +57,40 @@ QualityScores::QualityScores(ifstream& qFile, int l){
                
                //seqLength = qScores.size();   
                
+               /*while(!in.eof()){     
+                       string saveName = "";
+                       string name = "";
+                       string scores = "";
+                       
+                       in >> name; 
+                       //cout << name << endl;         
+                       if (name.length() != 0) { 
+                               saveName = name.substr(1);
+                               while (!in.eof())       {       
+                                       char c = in.get(); 
+                                       if (c == 10 || c == 13){        break;  }
+                                       else { name += c; }     
+                               } 
+                               m->gobble(in);
+                       }
+                       
+                       while(in){
+                               char letter= in.get();
+                               if(letter == '>'){      in.putback(letter);     break;  }
+                               else{ scores += letter; }
+                       }
+                       
+                //istringstream iss (scores,istringstream::in);
+                
+                //int count = 0; int tempScore;
+                //while (iss) { iss >> tempScore; count++; }
+                //cout << saveName << '\t' << count << endl;   
+                       
+                       m->gobble(in);
+               }*/             
+               
+               
+               
                for(int i=0;i<seqLength;i++){
                        qFile >> score;
                        qScores.push_back(score);
@@ -70,6 +104,7 @@ QualityScores::QualityScores(ifstream& qFile, int l){
        }                                                       
 
 }
+/**************************************************************************************************/
 
 /**************************************************************************************************/
 
index 43dbfacc78b845b5d247ba44484b464fd612d590..6cbaa0d896ad7d47105873c67cdc6d003329b83a 100644 (file)
@@ -114,7 +114,15 @@ int RAbundVector::get(int index){
        return data[index];
        
 }
+/***********************************************************************/
 
+void RAbundVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();
+       
+}
 /***********************************************************************/
 
 void RAbundVector::push_back(int binSize){
index fab02a921a602524e7516bdcbd7efc64326cbbea..95165645bb4d82c88e6faf60007350798bbce929 100644 (file)
@@ -41,6 +41,7 @@ public:
        int sum();
        int sum(int);
        int numNZ();
+       void clear();
        vector<int>::reverse_iterator rbegin();
        vector<int>::reverse_iterator rend();
        
index f1804ed8b826d00b4993a708804733335bb1b194..384a360353abd1a801e80f1e580054d17deac67d 100644 (file)
@@ -14,7 +14,7 @@
 //**********************************************************************************************************************
 vector<string> RemoveSeqsCommand::getValidParameters(){        
        try {
-               string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "list","taxonomy","outputdir","inputdir", "dups" };
+               string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "qfile","list","taxonomy","outputdir","inputdir", "dups" };
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -35,6 +35,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(){
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
+               outputTypes["qfile"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
@@ -74,7 +75,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "list","taxonomy","outputdir","inputdir", "dups" };
+                       string Array[] =  {"fasta","name", "group", "alignreport", "accnos", "qfile", "list","taxonomy","outputdir","inputdir", "dups" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -96,6 +97,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
+                       outputTypes["qfile"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -160,6 +162,14 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                               
+                               it = parameters.find("qfile");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -191,6 +201,10 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        taxfile = validParameter.validFile(parameters, "taxonomy", true);
                        if (taxfile == "not open") { abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
+                       
+                       qualfile = validParameter.validFile(parameters, "qfile", true);
+                       if (qualfile == "not open") { abort = true; }
+                       else if (qualfile == "not found") {  qualfile = "";  }                  
 
                        
                        string usedDups = "true";
@@ -201,7 +215,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        }
                        dups = m->isTrue(temp);
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
                        
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
                }
@@ -216,9 +230,9 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
 
 void RemoveSeqsCommand::help(){
        try {
-               m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
+               m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
                m->mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
-               m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n");
+               m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n");
                m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n");
                m->mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
                m->mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
@@ -249,6 +263,7 @@ int RemoveSeqsCommand::execute(){
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
+               if (qualfile != "")                     {               readQual();             }
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
                
@@ -304,7 +319,7 @@ int RemoveSeqsCommand::readFasta(){
                out.close();
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["fasta"].push_back(outputFileName); 
+               outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
                return 0;
                
@@ -315,6 +330,71 @@ int RemoveSeqsCommand::readFasta(){
        }
 }
 //**********************************************************************************************************************
+int RemoveSeqsCommand::readQual(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               
+               ifstream in;
+               m->openInputFile(qualfile, in);
+               string name;
+               
+               bool wroteSomething = false;
+               
+               
+               while(!in.eof()){       
+                       string saveName = "";
+                       string name = "";
+                       string scores = "";
+                       
+                       in >> name; 
+                       
+                       if (name.length() != 0) { 
+                               saveName = name.substr(1);
+                               while (!in.eof())       {       
+                                       char c = in.get(); 
+                                       if (c == 10 || c == 13){        break;  }
+                                       else { name += c; }     
+                               } 
+                               m->gobble(in);
+                       }
+                       
+                       while(in){
+                               char letter= in.get();
+                               if(letter == '>'){      in.putback(letter);     break;  }
+                               else{ scores += letter; }
+                       }
+                       
+                       m->gobble(in);
+                       
+                       if (names.count(saveName) == 0) {
+                               wroteSomething = true;
+                               
+                               out << name << endl << scores;
+                       }
+                       
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               
+               if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveSeqsCommand", "readQual");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int RemoveSeqsCommand::readList(){
        try {
                string thisOutputDir = outputDir;
@@ -375,7 +455,7 @@ int RemoveSeqsCommand::readList(){
                out.close();
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["list"].push_back(outputFileName); 
+               outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
                                
                return 0;
 
@@ -461,7 +541,7 @@ int RemoveSeqsCommand::readName(){
                out.close();
 
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["name"].push_back(outputFileName);
+               outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
                                
                return 0;
        }
@@ -505,7 +585,7 @@ int RemoveSeqsCommand::readGroup(){
                out.close();
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["group"].push_back(outputFileName); 
+               outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                return 0;
        }
@@ -547,7 +627,7 @@ int RemoveSeqsCommand::readTax(){
                out.close();
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["taxonomy"].push_back(outputFileName);
+               outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                return 0;
        }
@@ -613,7 +693,7 @@ int RemoveSeqsCommand::readAlign(){
                out.close();
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["alignreport"].push_back(outputFileName);
+               outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                return 0;
                
index 8321212fff48bb240c54f1a1df4516b393f7216f..de1e3d9d6aff8595a098cf8bf2fd6c1d5c79ec9f 100644 (file)
@@ -28,7 +28,7 @@ class RemoveSeqsCommand : public Command {
                
        private:
                set<string> names;
-               string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
+               string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
                vector<string> outputNames;
                map<string, vector<string> > outputTypes;
@@ -40,6 +40,7 @@ class RemoveSeqsCommand : public Command {
                void readAccnos();
                int readList();
                int readTax();
+               int readQual();
                
 };
 
index 6e4401bab964db2755e7ce1bb9f39c67703bf864..1bceec210779221ce0dfce33886b177e5250efd8 100644 (file)
@@ -153,7 +153,13 @@ void SAbundVector::print(string prefix, ostream& output){
        }
        output << endl;
 }
-
+/***********************************************************************/
+void SAbundVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();   
+}
 /***********************************************************************/
 void SAbundVector::print(ostream& output){
        try {
index 769a35dd3a14c85a160ac81532b4d7aa3cd1914a..561294c319e94562ac810d8b924aec2ed73015ce 100644 (file)
@@ -40,6 +40,7 @@ public:
        int sum();
        void resize(int);
        int size();
+       void clear();
 
        void print(ostream&);
        void print(string, ostream&);
index 3944212ea99e688cea1e04af5bd559044869505d..15ee7fc4e414988f86bca49ccd38070a2d96ef22 100644 (file)
@@ -171,7 +171,14 @@ void SharedOrderVector::print(ostream& output){
        }
 }
 
+/***********************************************************************/
 
+void SharedOrderVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();
+}
 /***********************************************************************/
 
 void SharedOrderVector::resize(int){
index 3568450d3059e231c310e18e7073b434516f7c07..6aa8ba1c1ed0dc6ef49f1aa5d8becac971852126 100644 (file)
@@ -24,6 +24,7 @@ struct individual {
                bool operator()(const individual& i1, const individual& i2) {
                return (i1.abundance > i2.abundance);
                }
+       individual() { group = ""; bin = 0; abundance = 0; }
 };
 
 struct individualFloat {
@@ -33,6 +34,7 @@ struct individualFloat {
                bool operator()(const individual& i1, const individual& i2) {
                return (i1.abundance > i2.abundance);
                }
+       individualFloat() { group = ""; bin = 0; abundance = 0.0; }
 };
 
 
@@ -66,6 +68,7 @@ public:
        vector<individual>::iterator end();
        void push_back(int, int, string);  //OTU, abundance, group  MUST CALL UPDATE STATS AFTER PUSHBACK!!!
        void updateStats();
+       void clear();
 
        int getNumBins();
        int getNumSeqs();
index 84b1cf56d60fc4f046218f2e5422e32fcad4ea21..52ffbb89b3137cf724ac9ae1adc501f85c2dbe4e 100644 (file)
@@ -131,11 +131,20 @@ void SharedRAbundFloatVector::set(int binNumber, float newBinSize, string groupn
                numSeqs += (newBinSize - oldBinSize);
        }
        catch(exception& e) {
-               m->errorOut(e, "SharedRAbundVector", "set");
+               m->errorOut(e, "SharedRAbundFloatVector", "set");
                exit(1);
        }
 }
+/***********************************************************************/
 
+void SharedRAbundFloatVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();
+       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+       lookup.clear();
+}
 /***********************************************************************/
 float SharedRAbundFloatVector::getAbundance(int index){
        return data[index].abundance;   
index f826d9ce4b525e61909324c3481dd6a7bd847146..a3407f48ec39aabed942dff22b91d4691c992539 100644 (file)
@@ -51,6 +51,7 @@ public:
        void push_back(float, string);  //abundance, groupname
        void pop_back();
        void resize(int);
+       void clear();
        int size();
        
        void print(ostream&);
index 9328cbd2b3da8cef5a0067bedced912ddbea399b..9276b7bfdc0ae441a883f42ddd0aabcddf1f04bd 100644 (file)
@@ -205,6 +205,16 @@ vector <individual> SharedRAbundVector::getData(){
 }
 /***********************************************************************/
 
+void SharedRAbundVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();
+       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
+       lookup.clear();
+}
+/***********************************************************************/
+
 void SharedRAbundVector::push_back(int binSize, string groupName){
        try {
                individual newGuy;
@@ -486,15 +496,15 @@ SharedOrderVector SharedRAbundVector::getSharedOrderVector() {
 OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
        try {
                OrderVector ov;
-       
-               for(int i=0;i<data.size();i++){
+               for(int i=0;i<numBins;i++){
                        for(int j=0;j<data[i].abundance;j++){
                                ov.push_back(i);
                        }
                }
                random_shuffle(ov.begin(), ov.end());
-
+               
                ov.setLabel(label);     
+
                return ov;
        }
        catch(exception& e) {
index 8f4227eebe0c21823a6f25d22ed3e29dbee79ab3..6584d1d2e9198cb5fed6ebc3d43e9d35f608da00 100644 (file)
@@ -55,6 +55,7 @@ public:
        void pop_back();
        void resize(int);
        int size();
+       void clear();
        vector<individual>::reverse_iterator rbegin();
        vector<individual>::reverse_iterator rend();
        
index 4aea5890a341ccde6b110c2db37098f99c90aeee..1df608b75974d4460dfdb724c22e984ae2676789 100644 (file)
@@ -234,6 +234,15 @@ SharedOrderVector SharedSAbundVector::getSharedOrderVector() {
                exit(1);
        }
 }
+/***********************************************************************/
+
+void SharedSAbundVector::clear(){
+       numBins = 0;
+       maxRank = 0;
+       numSeqs = 0;
+       data.clear();
+}
+
 /***********************************************************************/
 OrderVector SharedSAbundVector::getOrderVector(map<string,int>* hold = NULL){
        try {
index 29b374523bf975653ff3b8943f2d967685a4644b..cd78a2e1a6722cc3015f90b7521d77ffbc6ae65b 100644 (file)
@@ -45,6 +45,7 @@ public:
        void pop_back();
        void resize(int);
        int size();
+       void clear();
 
        void print(ostream&);
                
index 04f6c7cac931576b696f3dc9d369e171b1b9d525..036341ff48f9de13d6684fe9632da08742ee7d82 100644 (file)
@@ -12,7 +12,7 @@
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getValidParameters(){ 
        try {
-               string Array[] =  {"groups","label","outputdir","inputdir"};
+               string Array[] =  {"fasta", "group", "list","shared","rabund", "name","sabund","size","groups","label","outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -31,6 +31,9 @@ SubSampleCommand::SubSampleCommand(){
                outputTypes["list"] = tempOutNames;
                outputTypes["rabund"] = tempOutNames;
                outputTypes["sabund"] = tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["name"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
@@ -40,7 +43,8 @@ SubSampleCommand::SubSampleCommand(){
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getRequiredParameters(){      
        try {
-               vector<string> myArray;
+               string Array[] =  {"fasta","list","shared","rabund", "sabund","or"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
        catch(exception& e) {
@@ -51,8 +55,7 @@ vector<string> SubSampleCommand::getRequiredParameters(){
 //**********************************************************************************************************************
 vector<string> SubSampleCommand::getRequiredFiles(){   
        try {
-               string Array[] =  {"shared","list","rabund","sabund","or"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               vector<string> myArray;
                return myArray;
        }
        catch(exception& e) {
@@ -73,8 +76,8 @@ SubSampleCommand::SubSampleCommand(string option) {
                
                else {
                        //valid paramters for this command
-                       string AlignArray[] =  {"groups","label","outputdir","inputdir"};
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       string Array[] =  {"fasta", "group", "list","shared","rabund", "sabund","name","size","groups","label","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -82,7 +85,8 @@ SubSampleCommand::SubSampleCommand(string option) {
                        ValidParameters validParameter;
                        
                        //check to make sure all parameters are valid for command
-                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                       map<string,string>::iterator it;
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
@@ -92,16 +96,105 @@ SubSampleCommand::SubSampleCommand(string option) {
                        outputTypes["list"] = tempOutNames;
                        outputTypes["rabund"] = tempOutNames;
                        outputTypes["sabund"] = tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["name"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
                                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it       
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("shared");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["shared"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("sabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("rabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
                        }
                        
-                       //make sure the user has already run the read.otu command
-                       if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; }
-
+                       //check for required parameters
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { listfile = ""; abort = true; }
+                       else if (listfile == "not found") { listfile = ""; }    
+                       
+                       sabundfile = validParameter.validFile(parameters, "sabund", true);
+                       if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
+                       else if (sabundfile == "not found") { sabundfile = ""; }
+                       
+                       rabundfile = validParameter.validFile(parameters, "rabund", true);
+                       if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
+                       else if (rabundfile == "not found") { rabundfile = ""; }
+                       
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }  
+                       else if (fastafile == "not found") { fastafile = ""; }
+                       
+                       sharedfile = validParameter.validFile(parameters, "shared", true);
+                       if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
+                       else if (sharedfile == "not found") { sharedfile = ""; }
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { namefile = ""; abort = true; }    
+                       else if (namefile == "not found") { namefile = ""; }
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }  
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       
+                       
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        label = validParameter.validFile(parameters, "label", false);                   
@@ -111,12 +204,6 @@ SubSampleCommand::SubSampleCommand(string option) {
                                else { allLines = 1;  }
                        }
                        
-                       //if the user has not specified any labels use the ones from read.otu
-                       if (label == "") {  
-                               allLines = globaldata->allLines; 
-                               labels = globaldata->labels; 
-                       }
-                       
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = ""; pickedGroups = false; }
                        else { 
@@ -125,6 +212,20 @@ SubSampleCommand::SubSampleCommand(string option) {
                                globaldata->Groups = Groups;
                        }
                        
+                       string temp = validParameter.validFile(parameters, "size", false);              if (temp == "not found"){       temp = "0";             }
+                       convert(temp, size);  
+                       
+                       if ((namefile != "") && (fastafile == "")) { m->mothurOut("You may only use a namefile with a fastafile."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((fastafile == "") && (listfile == "") && (sabundfile == "") && (rabundfile == "") && (sharedfile == "")) {
+                               m->mothurOut("You must provide a fasta, list, sabund, rabund or shared file as an input file."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if (pickedGroups && ((groupfile == "") && (sharedfile == ""))) { 
+                               m->mothurOut("You cannot pick groups without a valid group file or shared file."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((groupfile != "") && ((fastafile == "") && (listfile == ""))) { 
+                               m->mothurOut("Group file only valid with listfile or fastafile."); m->mothurOutEndLine(); abort = true; }
+                       
                }
 
        }
@@ -138,15 +239,16 @@ SubSampleCommand::SubSampleCommand(string option) {
 
 void SubSampleCommand::help(){
        try {
-               m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n");
-               m->mothurOut("The get.relabund command parameters are groups, scale and label.  No parameters are required.\n");
+               m->mothurOut("The sub.sample command is designed to be used as a way to normalize your data, or create a smaller set from your original set.\n");
+               m->mothurOut("The sub.sample command parameters are fasta, name, list, group, rabund, sabund, shared, groups, size and label.  You must provide a fasta, list, sabund, rabund or shared file as an input file.\n");
                m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
                m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
-               m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n");
-               m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n");
-               m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n");
+               m->mothurOut("The size parameter allows you indicate the size of your subsample.\n");
+               m->mothurOut("The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files, 10% of number of seqs.\n");
+               m->mothurOut("The sub.sample command should be in the following format: sub.sample(list=yourListFile, group=yourGroupFile, groups=yourGroups, label=yourLabels).\n");
+               m->mothurOut("Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n");
                m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
-               m->mothurOut("The get.relabund command outputs a .relabund file.\n");
+               m->mothurOut("The sub.sample command outputs a .subsample file.\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
 
        }
@@ -158,8 +260,7 @@ void SubSampleCommand::help(){
 
 //**********************************************************************************************************************
 
-SubSampleCommand::~SubSampleCommand(){
-}
+SubSampleCommand::~SubSampleCommand(){}
 
 //**********************************************************************************************************************
 
@@ -168,47 +269,15 @@ int SubSampleCommand::execute(){
        
                if (abort == true) { return 0; }
                
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" +  m->getExtension(globaldata->inputFileName);
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-               
-               string format = globaldata->getFormat();
-               
-               read = new ReadOTUFile(globaldata->inputFileName);      
-               read->read(&*globaldata); 
-               input = globaldata->ginput;
-               
-               if (format == "sharedfile") {
-                       lookup = input->getSharedRAbundVectors();
-                       outputTypes["shared"].push_back(outputFileName);
-                       getSubSampleShared(lookup, out);
-               }else if (format == "list") { 
-                       list = globaldata->glist;
-                       outputTypes["list"].push_back(outputFileName);
-                       //getSubSamplesList();
-               }else if (format == "rabund") { 
-                       rabund = globaldata->rabund;
-                       outputTypes["rabund"].push_back(outputFileName);
-                       //getSubSamplesRabund();
-               
-               }else if (format == "sabund") { 
-                       sabund = globaldata->sabund;
-                       outputTypes["sabund"].push_back(outputFileName);
-                       //getSubSamplesSabund();
-               }
-               
-               out.close();
-                                       
-               //reset groups parameter
-               delete input; globaldata->ginput = NULL;
-               delete read;
-               
-               if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
-               
+               if (sharedfile != "")   {   getSubSampleShared();       }
+               //if (listfile != "")           {   getSubSampleList();         }
+               //if (rabund != "")             {   getSubSampleRabund();       }
+               //if (sabundfile != "") {   getSubSampleSabund();       }
+               //if (fastafile != "")  {   getSubSampleFasta();        }
+                               
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); 
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                m->mothurOutEndLine();
                
                return 0;
@@ -219,26 +288,35 @@ int SubSampleCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup, ofstream& filename) {
+int SubSampleCommand::getSubSampleShared() {
        try {
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
+               InputData* input = new InputData(sharedfile, "sharedfile");
+               vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+               string lastLabel = lookup[0]->getLabel();
        
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
 
-               string lastLabel = lookup[0]->getLabel();
        
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
-                       if (m->control_pressed) {  return 0;  }
+                       if (m->control_pressed) {  out.close(); return 0;  }
        
                        if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
 
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                                
-                               //process lookup
-                               
-                                                               
+                               processShared(lookup, out);
                                                                                                                                
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
@@ -252,7 +330,8 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
                                lookup = input->getSharedRAbundVectors(lastLabel);
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                                
-                               //process lookup                                
+                               processShared(lookup, out);
+                               
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
                                
@@ -269,7 +348,7 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
                }
                
                
-               if (m->control_pressed) {  return 0;  }
+               if (m->control_pressed) {  out.close(); return 0;  }
 
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -291,15 +370,12 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
                        
                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                        
-                       //process lookup
-                       
-                       
+                       processShared(lookup, out);
                        
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                }
        
-               //reset groups parameter
-               globaldata->Groups.clear();  
+               out.close();  
                                
                return 0;
  
@@ -309,8 +385,74 @@ int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup
                exit(1);
        }
 }
-
-
+//**********************************************************************************************************************
+int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup, ofstream& out) {
+       try {
+               
+               if (pickedGroups) { eliminateZeroOTUS(thislookup); }
+               
+               if (size == 0) { //user has not set size, set size = smallest samples size
+                       size = thislookup[0]->getNumSeqs();
+                       for (int i = 1; i < thislookup.size(); i++) {
+                               int thisSize = thislookup[i]->getNumSeqs();
+                               
+                               if (thisSize < size) {  size = thisSize;        }
+                       }
+               }
+               
+               int numBins = thislookup[0]->getNumBins();
+               for (int i = 0; i < thislookup.size(); i++) {           
+                       int thisSize = thislookup[i]->getNumSeqs();
+                               
+                       if (thisSize != size) {
+                               
+                               string thisgroup = thislookup[i]->getGroup();
+       
+                               OrderVector* order = new OrderVector();
+                               for(int p=0;p<numBins;p++){
+                                       for(int j=0;j<thislookup[i]->getAbundance(p);j++){
+                                               order->push_back(p);
+                                       }
+                               }
+                               random_shuffle(order->begin(), order->end());
+                               
+                               SharedRAbundVector* temp = new SharedRAbundVector(numBins);
+                               temp->setLabel(thislookup[i]->getLabel());
+                               temp->setGroup(thislookup[i]->getGroup());
+                               
+                               delete thislookup[i];
+                               thislookup[i] = temp;
+                               
+                               
+                               for (int j = 0; j < size; j++) {
+                                       //get random number to sample from order between 0 and thisSize-1.
+                                       int myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
+                                       
+                                       int bin = order->get(myrand);
+                                       
+                                       int abund = thislookup[i]->getAbundance(bin);
+                                       thislookup[i]->set(bin, (abund+1), thisgroup);
+                               }       
+                               delete order;
+                       }
+               }
+               
+               //subsampling may have created some otus with no sequences in them
+               eliminateZeroOTUS(thislookup);
+               
+               for (int i = 0; i < thislookup.size(); i++) {
+                       out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
+                       thislookup[i]->print(out);
+               }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
        try {
@@ -326,7 +468,7 @@ int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup)
                //for each bin
                for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-               
+                       
                        //look at each sharedRabund and make sure they are not all zero
                        bool allZero = true;
                        for (int j = 0; j < thislookup.size(); j++) {
@@ -340,13 +482,14 @@ int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup)
                                }
                        }
                }
-
+               
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
-
+               thislookup.clear();
+               
                thislookup = newLookup;
                
                return 0;
+               
        }
        catch(exception& e) {
                m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
@@ -357,3 +500,4 @@ int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup)
 //**********************************************************************************************************************
 
 
+
index 98085ee9d02cd163e8d9ed76022f5a0c2ee59fea..f068ac32eb975f6fa246fe654d84f56db0e33c3a 100644 (file)
  */
  
 #include "command.hpp"
-#include "inputdata.h"
-#include "readotu.h"
+#include "globaldata.hpp"
 #include "sharedrabundvector.h"
+#include "listvector.hpp"
+#include "rabundvector.hpp"
+#include "inputdata.h"
 
-class GlobalData;
 
 class SubSampleCommand : public Command {
 
@@ -32,21 +33,19 @@ public:
        
 private:
        GlobalData* globaldata;
-       ReadOTUFile* read;
-       InputData* input;
-       vector<SharedRAbundVector*> lookup;
-       ListVector* list;
-       RAbundVector* rabund;
-       SAbundVector* sabund;
        
-       bool abort, allLines, pickedGroups;
+       bool abort, pickedGroups, allLines;
+       string listfile, groupfile, sharedfile, rabundfile, sabundfile, fastafile, namefile;
        set<string> labels; //holds labels to be used
        string groups, label, outputDir;
        vector<string> Groups, outputNames;
        map<string, vector<string> > outputTypes;
+       int size;
        
        int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
-       int getSubSampleShared(vector<SharedRAbundVector*>&, ofstream&);
+       int getSubSampleShared();
+       int processShared(vector<SharedRAbundVector*>&, ofstream&);
+       
 };
 
 #endif