worked on hcluster. made .single command run using a sharedfile. and various other...

author westcott <westcott>

Tue, 12 Jan 2010 19:00:34 +0000 (19:00 +0000)

committer westcott <westcott>

Tue, 12 Jan 2010 19:00:34 +0000 (19:00 +0000)
author westcott <westcott>
Tue, 12 Jan 2010 19:00:34 +0000 (19:00 +0000)
committer westcott <westcott>
Tue, 12 Jan 2010 19:00:34 +0000 (19:00 +0000)
diff --git a/aligncommand.cpp b/aligncommand.cpp

index f0f7243115f267b6654ab8cb25ceb05b2ef96029..3b3afc1795b2d73c709ef4474b07b88acf93e0dc 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -61,13 +61,29 @@ AlignCommand::AlignCommand(string option){
                         }
                         else if (templateFileName == "not open") { abort = true; }      
                         
-                       candidateFileName = validParameter.validFile(parameters, "candidate", true);
-                       if (candidateFileName == "not found") { 
-                               mothurOut("candidate is a required parameter for the align.seqs command."); 
-                               mothurOutEndLine();
-                               abort = true; 
+                       candidateFileName = validParameter.validFile(parameters, "candidate", false);
+                       if (candidateFileName == "not found") { mothurOut("candidate is a required parameter for the align.seqs command."); mothurOutEndLine(); abort = true;  }
+                       else { 
+                               splitAtDash(candidateFileName, candidateFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < candidateFileNames.size(); i++) {
+                                       int ableToOpen;
+                                       ifstream in;
+                                       ableToOpen = openInputFile(candidateFileNames[i], in);
+                                       if (ableToOpen == 1) { 
+                                               mothurOut(candidateFileNames[i] + " will be disregarded."); mothurOutEndLine(); 
+                                               //erase from file list
+                                               candidateFileNames.erase(candidateFileNames.begin()+i);
+                                               i--;
+                                       }
+                                       in.close();
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (candidateFileNames.size() == 0) { mothurOut("no valid files."); mothurOutEndLine(); abort = true; }
                         }
-                       else if (candidateFileName == "not open") { abort = true; }     
+                               
                         
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
@@ -125,7 +141,7 @@ void AlignCommand::help(){
         try {
                 mothurOut("The align.seqs command reads a file containing sequences and creates an alignment file and a report file.\n");
                 mothurOut("The align.seqs command parameters are template, candidate, search, ksize, align, match, mismatch, gapopen and gapextend.\n");
-               mothurOut("The template and candidate parameters are required.\n");
+               mothurOut("The template and candidate parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
                 mothurOut("The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.\n");
                 mothurOut("The align parameter allows you to specify the alignment method to use.  Your options are: gotoh, needleman, blast and noalign. The default is needleman.\n");
                 mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n");
@@ -167,25 +183,112 @@ int AlignCommand::execute(){
                         mothurOutEndLine();
                         alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
                 }
-               mothurOut("Aligning sequences...");
-               mothurOutEndLine();
-               
-               string alignFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align";
-               string reportFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align.report";
-               string accnosFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "flip.accnos";
-               
-               int numFastaSeqs = 0;
-               int start = time(NULL);
                 
+               for (int s = 0; s < candidateFileNames.size(); s++) {
+                       mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); mothurOutEndLine();
+                       
+                       string alignFileName = candidateFileNames[s].substr(0,candidateFileNames[s].find_last_of(".")+1) + "align";
+                       string reportFileName = candidateFileNames[s].substr(0,candidateFileNames[s].find_last_of(".")+1) + "align.report";
+                       string accnosFileName = candidateFileNames[s].substr(0,candidateFileNames[s].find_last_of(".")+1) + "flip.accnos";
+                       
+                       int numFastaSeqs = 0;
+                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       int start = time(NULL);
+                       
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               if(processors == 1){
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(candidateFileNames[s], inFASTA);
+                               numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numFastaSeqs));
+                               
+                               driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
+                               
+                               //delete accnos file if its blank else report to user
+                               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+                               else { 
+                                       mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                                       if (!flip) {
+                                               mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                                       }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                                       mothurOutEndLine();
+                               }
+                       }
+                       else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(candidateFileNames[s], inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numFastaSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numFastaSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); 
+                               
+                               rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
+                               rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
+                               
+                               //append alignment and report files
+                               for(int i=1;i<processors;i++){
+                                       appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
+                                       remove((alignFileName + toString(processIDS[i]) + ".temp").c_str());
+                                       
+                                       appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName);
+                                       remove((reportFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                               vector<string> nonBlankAccnosFiles;
+                               //delete blank accnos files generated with multiple processes
+                               for(int i=0;i<processors;i++){  
+                                       if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                               nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+                                       }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
+                               }
+                               
+                               //append accnos files
+                               if (nonBlankAccnosFiles.size() != 0) { 
+                                       rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                                       
+                                       for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                                               appendAlignFiles(nonBlankAccnosFiles[h], accnosFileName);
+                                               remove(nonBlankAccnosFiles[h].c_str());
+                                       }
+                                       mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                                       if (!flip) {
+                                               mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                                       }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                                       mothurOutEndLine();
+                               }
+                       }
+#else
                         ifstream inFASTA;
-                       openInputFile(candidateFileName, inFASTA);
+                       openInputFile(candidateFileName[s], inFASTA);
                         numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
                         inFASTA.close();
                         
                         lines.push_back(new linePair(0, numFastaSeqs));
-               
+                       
                         driver(lines[0], alignFileName, reportFileName, accnosFileName);
                         
                         //delete accnos file if its blank else report to user
@@ -197,100 +300,16 @@ int AlignCommand::execute(){
                                 }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
                                 mothurOutEndLine();
                         }
-               }
-               else{
-                       vector<int> positions;
-                       processIDS.resize(0);
-                       
-                       ifstream inFASTA;
-                       openInputFile(candidateFileName, inFASTA);
                         
-                       string input;
-                       while(!inFASTA.eof()){
-                               input = getline(inFASTA);
-                               if (input.length() != 0) {
-                                       if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
-                               }
-                       }
-                       inFASTA.close();
-                       
-                       numFastaSeqs = positions.size();
-               
-                       int numSeqsPerProcessor = numFastaSeqs / processors;
-                       
-                       for (int i = 0; i < processors; i++) {
-                               long int startPos = positions[ i * numSeqsPerProcessor ];
-                               if(i == processors - 1){
-                                       numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                               }
-                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                       }
-                       
-                       createProcesses(alignFileName, reportFileName, accnosFileName); 
-                       
-                       rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
-                       rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
+#endif
                         
-                       //append alignment and report files
-                       for(int i=1;i<processors;i++){
-                               appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
-                               remove((alignFileName + toString(processIDS[i]) + ".temp").c_str());
-                               
-                               appendReportFiles((reportFileName + toString(processIDS[i]) + ".temp"), reportFileName);
-                               remove((reportFileName + toString(processIDS[i]) + ".temp").c_str());
-                       }
                         
-                       vector<string> nonBlankAccnosFiles;
-                       //delete blank accnos files generated with multiple processes
-                       for(int i=0;i<processors;i++){  
-                               if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
-                                       nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
-                               }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
-                       }
                         
-                       //append accnos files
-                       if (nonBlankAccnosFiles.size() != 0) { 
-                               rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
-                               
-                               for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
-                                       appendAlignFiles(nonBlankAccnosFiles[h], accnosFileName);
-                                       remove(nonBlankAccnosFiles[h].c_str());
-                               }
-                               mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
-                               if (!flip) {
-                                       mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
-                               }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
-                               mothurOutEndLine();
-                       }
-               }
-#else
-               ifstream inFASTA;
-               openInputFile(candidateFileName, inFASTA);
-               numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-               inFASTA.close();
-               
-               lines.push_back(new linePair(0, numFastaSeqs));
-               
-               driver(lines[0], alignFileName, reportFileName, accnosFileName);
-               
-               //delete accnos file if its blank else report to user
-               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
-               else { 
-                       mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
-                       if (!flip) {
-                                mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
-                       }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                       mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
+                       mothurOutEndLine();
                         mothurOutEndLine();
                 }
                 
-#endif
-               
-               
-               
-               mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
-               mothurOutEndLine();
-               mothurOutEndLine();
-               
                 return 0;
         }
         catch(exception& e) {
@@ -301,7 +320,7 @@ int AlignCommand::execute(){
  
  //**********************************************************************************************************************
  
-int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName){
+int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){
         try {
                 ofstream alignmentFile;
                 openOutputFile(alignFName, alignmentFile);
@@ -312,12 +331,12 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
                 NastReport report(reportFName);
                 
                 ifstream inFASTA;
-               openInputFile(candidateFileName, inFASTA);
+               openInputFile(filename, inFASTA);
  
                 inFASTA.seekg(line->start);
                 
                 for(int i=0;i<line->numSeqs;i++){
-                       
+               
                         Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
                         int origNumBases = candidateSeq->getNumBases();
                         string originalUnaligned = candidateSeq->getUnaligned();
@@ -394,7 +413,6 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
                         
                         //report progress
                         if((i+1) % 100 == 0){   mothurOut(toString(i+1)); mothurOutEndLine();           }
-                       
                 }
                 //report progress
                 if((line->numSeqs) % 100 != 0){ mothurOut(toString(line->numSeqs)); mothurOutEndLine();         }
@@ -413,7 +431,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
  
  /**************************************************************************************************/
  
-void AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName) {
+void AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                 int process = 0;
@@ -427,7 +445,7 @@ void AlignCommand::createProcesses(string alignFileName, string reportFileName,
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp");
+                               driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
                                 exit(0);
                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
                 }
diff --git a/aligncommand.h b/aligncommand.h

index ec0c80ff7994e533ac2a9e627936e11c296ddcd8..0a2e7ba618291f99044cfd187f3e616b40a936cd 100644 (file)
--- a/aligncommand.h
+++ b/aligncommand.h
@@ -36,14 +36,15 @@ private:
         AlignmentDB* templateDB;
         Alignment* alignment;
         
-       int driver(linePair*, string, string, string);
-       void createProcesses(string, string, string);
+       int driver(linePair*, string, string, string, string);
+       void createProcesses(string, string, string, string);
         void appendAlignFiles(string, string); 
         void appendReportFiles(string, string);
         
         string candidateFileName, templateFileName, distanceFileName, search, align;
         float match, misMatch, gapOpen, gapExtend, threshold;
         int processors, kmerSize;
+       vector<string> candidateFileNames;
         
         bool abort, flip;
  };
diff --git a/bayesian.cpp b/bayesian.cpp

index e59545a89a04031999885beb932dac5003ab6b8c..a83f1edd04c3f8ddfad13887c1dbb78c9341d4f1 100644 (file)
--- a/bayesian.cpp
+++ b/bayesian.cpp
@@ -92,7 +92,7 @@ Classify(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0), kmerSize(ksize), c
                 mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); mothurOutEndLine();
         }
         catch(exception& e) {
-               errorOut(e, "Bayesian", "getTaxonomy");
+               errorOut(e, "Bayesian", "Bayesian");
                 exit(1);
         }
  }
diff --git a/bayesian.h b/bayesian.h

index 7100bf33b00c3fa85bd0c928c00daf82647f851a..6e35e9f8bd485734962e55a140f2dbb134cc8110 100644 (file)
--- a/bayesian.h
+++ b/bayesian.h
@@ -22,13 +22,8 @@ public:
         ~Bayesian() {};
         
         string getTaxonomy(Sequence*);
-       //map<string, int> getConfidenceScores() { return taxConfidenceScore; }
         
  private:
-       //map<string, vector<double> > taxonomyProbability;     //probability of a word being in a given taxonomy. 
-                                                                                                       //taxonomyProbability[bacteria;][0] = probabtility that a sequence within bacteria; would contain kmer 0;
-                                                                                                       //taxonomyProbability[bacteria;][1] = probabtility that a sequence within bacteria; would contain kmer 1...
-       
         vector< vector<float> > wordGenusProb;  //vector of maps from genus to probability
                                                                                                 //wordGenusProb[0][392] = probability that a sequence within genus that's index in the tree is 392 would contain kmer 0;
         
@@ -40,7 +35,6 @@ private:
         string bootstrapResults(vector<int>, int, int);
         int getMostProbableTaxonomy(vector<int>);
         void readProbFile(ifstream&, ifstream&);
-       //map<string, int> parseTaxMap(string);
         
  };
  
diff --git a/classify.cpp b/classify.cpp

index 9431c6cef6adf9edff07cfb16d32826c1b3592d9..c0f8a9c7d5115c60d46626235e82b99b625c43c1 100644 (file)
--- a/classify.cpp
+++ b/classify.cpp
@@ -14,9 +14,9 @@
  #include "blastdb.hpp"
  
  /**************************************************************************************************/
-
  Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {         
-       try {                                                                                   
+       try {   
+                                                                                       
                 readTaxonomy(taxFile);  
                 
                 int start = time(NULL);
@@ -111,10 +111,6 @@ void Classify::readTaxonomy(string file) {
                         
                         taxonomy[name] = taxInfo;
                         
-                       //itTax = taxList.find(taxInfo);
-                       //if (itTax == taxList.end()) { //this is new taxonomy
-                               //taxList[taxInfo] = 1;
-                       //}else { taxList[taxInfo]++;   }
                         phyloTree->addSeqToTree(name, taxInfo);
                 
                         gobble(inTax);
diff --git a/classify.h b/classify.h

index a8978732af92688dfddc2ede9d1d5c13c636ea06..5c5a7ac474ecc0ce7308672e1a606c0052643caa 100644 (file)
--- a/classify.h
+++ b/classify.h
@@ -45,7 +45,6 @@ protected:
         
         string taxFile, templateFile, simpleTax;
         vector<string> names;
-       //map<string, int> taxConfidenceScore;
         
         void readTaxonomy(string);
         vector<string> parseTax(string);
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index 768cb0d4a5bb3041c101752025576cdcf2d11222..52248979080ce4a8b3676fb3fa96baa3e7ddd24a 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -25,7 +25,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){
                 else {
                         
                         //valid paramters for this command
-                       string AlignArray[] =  {"template","fasta","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs","iters"};
+                       string AlignArray[] =  {"template","fasta","name","search","ksize","method","processors","taxonomy","match","mismatch","gapopen","gapextend","numwanted","cutoff","probs","iters"};
                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -47,13 +47,29 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){
                         }
                         else if (templateFileName == "not open") { abort = true; }      
                         
-                       fastaFileName = validParameter.validFile(parameters, "fasta", true);
-                       if (fastaFileName == "not found") { 
-                               mothurOut("fasta is a required parameter for the classify.seqs command."); 
-                               mothurOutEndLine();
-                               abort = true; 
+                       fastaFileName = validParameter.validFile(parameters, "fasta", false);
+                       if (fastaFileName == "not found") { mothurOut("fasta is a required parameter for the classify.seqs command."); mothurOutEndLine(); abort = true;  }
+                       else { 
+                               splitAtDash(fastaFileName, fastaFileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < fastaFileNames.size(); i++) {
+                                       int ableToOpen;
+                                       ifstream in;
+                                       ableToOpen = openInputFile(fastaFileNames[i], in);
+                                       if (ableToOpen == 1) { 
+                                               mothurOut(fastaFileNames[i] + " will be disregarded."); mothurOutEndLine(); 
+                                               //erase from file list
+                                               fastaFileNames.erase(fastaFileNames.begin()+i);
+                                               i--;
+                                       }
+                                       in.close();
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (fastaFileNames.size() == 0) { mothurOut("no valid files."); mothurOutEndLine(); abort = true; }
                         }
-                       else if (fastaFileName == "not open") { abort = true; } 
+
                         
                         taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
                         if (taxonomyFileName == "not found") { 
@@ -62,7 +78,26 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option){
                                 abort = true; 
                         }
                         else if (taxonomyFileName == "not open") { abort = true; }      
+                       
+                       
+                       namefile = validParameter.validFile(parameters, "name", false);
+                       if (fastaFileName == "not found") { namefile = "";  }
+                       else { 
+                               splitAtDash(namefile, namefileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < namefileNames.size(); i++) {
+                                       int ableToOpen;
+                                       ifstream in;
+                                       ableToOpen = openInputFile(namefileNames[i], in);
+                                       if (ableToOpen == 1) {  mothurOut("Unable to match name file with fasta file."); mothurOutEndLine(); abort = true;      }
+                                       in.close();
+                               }
+                       }
  
+                       if (namefile != "") {
+                               if (namefileNames.size() != fastaFileNames.size()) { abort = true; mothurOut("If you provide a name file, you must have one for each fasta file."); mothurOutEndLine(); }
+                       }
                         
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
@@ -131,7 +166,7 @@ void ClassifySeqsCommand::help(){
         try {
                 mothurOut("The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n");
                 mothurOut("The classify.seqs command parameters are template, fasta, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n");
-               mothurOut("The template, fasta and taxonomy parameters are required.\n");
+               mothurOut("The template, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
                 mothurOut("The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer and blast. The default is kmer.\n");
                 mothurOut("The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n");
                 mothurOut("The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n");
@@ -172,117 +207,201 @@ int ClassifySeqsCommand::execute(){
                         classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters);   
                 }
  
-               int numFastaSeqs = 0;
+                               
+               for (int s = 0; s < fastaFileNames.size(); s++) {
                 
-               string newTaxonomyFile = getRootName(fastaFileName) + getRootName(taxonomyFileName) + "taxonomy";
-               string tempTaxonomyFile = getRootName(fastaFileName) + "taxonomy.temp";
-               string taxSummary = getRootName(fastaFileName) + getRootName(taxonomyFileName) + "tax.summary";
+                       //read namefile
+                       if(namefile != "") {
+                               nameMap.clear(); //remove old names
+                               
+                               ifstream inNames;
+                               openInputFile(namefileNames[s], inNames);
+                               
+                               string firstCol, secondCol;
+                               while(!inNames.eof()) {
+                                       inNames >> firstCol >> secondCol; gobble(inNames);
+                                       nameMap[firstCol] = getNumNames(secondCol);  //ex. seq1 seq1,seq3,seq5 -> seq1 = 3.
+                               }
+                               inNames.close();
+                       }
                 
-               int start = time(NULL);
+                       mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); mothurOutEndLine();
+                       string newTaxonomyFile = getRootName(fastaFileNames[s]) + getRootName(taxonomyFileName) + "taxonomy";
+                       string tempTaxonomyFile = getRootName(fastaFileNames[s]) + "taxonomy.temp";
+                       string taxSummary = getRootName(fastaFileNames[s]) + getRootName(taxonomyFileName) + "tax.summary";
+                       
+                       int start = time(NULL);
+                       int numFastaSeqs = 0;
+                       for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+                       
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               if(processors == 1){
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastaFileNames[s], inFASTA);
+                               numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numFastaSeqs));
+                               
+                               driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+                       }
+                       else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastaFileNames[s], inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);       }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numFastaSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numFastaSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
+                               
+                               rename((newTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), newTaxonomyFile.c_str());
+                               rename((tempTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), tempTaxonomyFile.c_str());
+                               
+                               for(int i=1;i<processors;i++){
+                                       appendTaxFiles((newTaxonomyFile + toString(processIDS[i]) + ".temp"), newTaxonomyFile);
+                                       appendTaxFiles((tempTaxonomyFile + toString(processIDS[i]) + ".temp"), tempTaxonomyFile);
+                                       remove((newTaxonomyFile + toString(processIDS[i]) + ".temp").c_str());
+                                       remove((tempTaxonomyFile + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                               
+                       }
+#else
                         ifstream inFASTA;
-                       openInputFile(fastaFileName, inFASTA);
+                       openInputFile(fastaFileNames[s], inFASTA);
                         numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
                         inFASTA.close();
                         
                         lines.push_back(new linePair(0, numFastaSeqs));
-               
-                       driver(lines[0], newTaxonomyFile, tempTaxonomyFile);
-               }
-               else{
-                       vector<int> positions;
-                       processIDS.resize(0);
                         
-                       ifstream inFASTA;
-                       openInputFile(fastaFileName, inFASTA);
+                       driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+#endif 
+                       //make taxonomy tree from new taxonomy file 
+                       PhyloTree taxaBrowser;
                         
-                       string input;
-                       while(!inFASTA.eof()){
-                               input = getline(inFASTA);
-                               if (input.length() != 0) {
-                                       if(input[0] == '>'){    int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);       }
-                               }
+                       ifstream in;
+                       openInputFile(tempTaxonomyFile, in);
+               
+                       //read in users taxonomy file and add sequences to tree
+                       string name, taxon;
+                       while(!in.eof()){
+                               in >> name >> taxon; gobble(in);
+                               
+                               if (namefile != "") {
+                                       itNames = nameMap.find(name);
+               
+                                       if (itNames == nameMap.end()) { 
+                                               mothurOut(name + " is not in your name file please correct."); mothurOutEndLine(); exit(1);
+                                       }else{
+                                               for (int i = 0; i < itNames->second; i++) { 
+                                                       taxaBrowser.addSeqToTree(name+toString(i), taxon);  //add it as many times as there are identical seqs
+                                               }
+                                       }
+                               }else {  taxaBrowser.addSeqToTree(name, taxon);  } //add it once
                         }
-                       inFASTA.close();
+                       in.close();
+       
+                       taxaBrowser.assignHeirarchyIDs(0);
+
+                       taxaBrowser.binUnclassified();
                         
-                       numFastaSeqs = positions.size();
+                       remove(tempTaxonomyFile.c_str());
                         
-                       int numSeqsPerProcessor = numFastaSeqs / processors;
+                       //print summary file
+                       ofstream outTaxTree;
+                       openOutputFile(taxSummary, outTaxTree);
+                       taxaBrowser.print(outTaxTree);
+                       outTaxTree.close();
                         
-                       for (int i = 0; i < processors; i++) {
-                               int startPos = positions[ i * numSeqsPerProcessor ];
-                               if(i == processors - 1){
-                                       numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                               }
-                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                       }
-                       createProcesses(newTaxonomyFile, tempTaxonomyFile); 
+                       //output taxonomy with the unclassified bins added
+                       ifstream inTax;
+                       openInputFile(newTaxonomyFile, inTax);
                         
-                       rename((newTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), newTaxonomyFile.c_str());
-                       rename((tempTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), tempTaxonomyFile.c_str());
+                       ofstream outTax;
+                       string unclass = newTaxonomyFile + ".unclass.temp";
+                       openOutputFile(unclass, outTax);
                         
-                       for(int i=1;i<processors;i++){
-                               appendTaxFiles((newTaxonomyFile + toString(processIDS[i]) + ".temp"), newTaxonomyFile);
-                               appendTaxFiles((tempTaxonomyFile + toString(processIDS[i]) + ".temp"), tempTaxonomyFile);
-                               remove((newTaxonomyFile + toString(processIDS[i]) + ".temp").c_str());
-                               remove((tempTaxonomyFile + toString(processIDS[i]) + ".temp").c_str());
+                       //get maxLevel from phylotree so you know how many 'unclassified's to add
+                       int maxLevel = taxaBrowser.getMaxLevel();
+                       
+                       //read taxfile - this reading and rewriting is done to preserve the confidence sscores.
+                       while (!inTax.eof()) {
+                               inTax >> name >> taxon; gobble(inTax);
+                               
+                               string newTax = addUnclassifieds(taxon, maxLevel);
+                               
+                               outTax << name << '\t' << newTax << endl;
                         }
+                       inTax.close();  
+                       outTax.close();
                         
+                       remove(newTaxonomyFile.c_str());
+                       rename(unclass.c_str(), newTaxonomyFile.c_str());
+                       
+                       mothurOutEndLine();
+                       mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); mothurOutEndLine(); mothurOutEndLine();
                 }
-#else
-               ifstream inFASTA;
-               openInputFile(fastaFileName, inFASTA);
-               numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
-               inFASTA.close();
                 
-               lines.push_back(new linePair(0, numFastaSeqs));
-               
-               driver(lines[0], newTaxonomyFile, tempTaxonomyFile);
-#endif 
                 delete classify;
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "ClassifySeqsCommand", "execute");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
+       try{
+               string newTax, taxon;
+               int level = 0;
                 
-               //make taxonomy tree from new taxonomy file 
-               ifstream inTaxonomy;
-               openInputFile(tempTaxonomyFile, inTaxonomy);
-               
-               string accession, taxaList;
-               PhyloTree taxaBrowser;
-               
-               //read in users taxonomy file and add sequences to tree
-               while(!inTaxonomy.eof()){
-                       inTaxonomy >> accession >> taxaList;
-                       
-                       taxaBrowser.addSeqToTree(accession, taxaList);
-                       
-                       gobble(inTaxonomy);
+               //keep what you have counting the levels
+               while (tax.find_first_of(';') != -1) {
+                       //get taxon
+                       taxon = tax.substr(0,tax.find_first_of(';'));
+                       tax = tax.substr(tax.find_first_of(';')+1, tax.length());
+                       newTax += taxon;
+                       level++;
                 }
-               inTaxonomy.close();
-               remove(tempTaxonomyFile.c_str());
-               
-               taxaBrowser.assignHeirarchyIDs(0);
-               taxaBrowser.binUnclassified();
-               
-               ofstream outTaxTree;
-               openOutputFile(taxSummary, outTaxTree);
                 
-               taxaBrowser.print(outTaxTree);
-               
-               mothurOutEndLine();
-               mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.");
-               mothurOutEndLine();
-               mothurOutEndLine();
+               //add "unclassified" until you reach maxLevel
+               while (level < maxlevel) {
+                       newTax += "unclassified;";
+                       level++;
+               }
                 
-               return 0;
+               return newTax;
         }
         catch(exception& e) {
-               errorOut(e, "ClassifySeqsCommand", "execute");
+               errorOut(e, "ClassifySeqsCommand", "addUnclassifieds");
                 exit(1);
         }
  }
+
  /**************************************************************************************************/
  
-void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile) {
+void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                 int process = 0;
@@ -296,7 +415,7 @@ void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp");
+                               driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
                                 exit(0);
                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
                 }
@@ -339,7 +458,7 @@ void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
  
  //**********************************************************************************************************************
  
-int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFName){
+int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFName, string filename){
         try {
                 ofstream outTax;
                 openOutputFile(taxFName, outTax);
@@ -348,7 +467,7 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
                 openOutputFile(tempTFName, outTaxSimple);
         
                 ifstream inFASTA;
-               openInputFile(fastaFileName, inFASTA);
+               openInputFile(filename, inFASTA);
  
                 inFASTA.seekg(line->start);
                 
diff --git a/classifyseqscommand.h b/classifyseqscommand.h

index 2c6390c1083e0ca72836f7c21c3f8c438a5c362e..0255bd2409c043abfa1a7d261aa2910c78f9440e 100644 (file)
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -41,17 +41,22 @@ private:
         };
         vector<int> processIDS;   //processid
         vector<linePair*> lines;
+       vector<string> fastaFileNames;
+       vector<string> namefileNames;
+       map<string, int> nameMap;
+       map<string, int>::iterator itNames;
         
         Classify* classify;
         
-       string fastaFileName, templateFileName, distanceFileName, search, method, taxonomyFileName;
+       string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName;
         int processors, kmerSize, numWanted, cutoff, iters;
         float match, misMatch, gapOpen, gapExtend;
         bool abort, probs;
         
-       int driver(linePair*, string, string);
+       int driver(linePair*, string, string, string);
         void appendTaxFiles(string, string);
-       void createProcesses(string, string); 
+       void createProcesses(string, string, string); 
+       string addUnclassifieds(string, int);
  };
  
  #endif
diff --git a/collectcommand.cpp b/collectcommand.cpp

index ea2d558544514e7f35e291033c23e20aca930a6a..beb70ebe9c7c82d84ab7c21cd43bc3e7a154eb50 100644 (file)
--- a/collectcommand.cpp
+++ b/collectcommand.cpp
@@ -40,7 +40,7 @@ CollectCommand::CollectCommand(string option){
                 Estimators.clear();
                 
                 //allow user to run help
-               if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
+               if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
                 
                 else {
                         //valid paramters for this command
@@ -58,7 +58,7 @@ CollectCommand::CollectCommand(string option){
                         }
                         
                         //make sure the user has already run the read.otu command
-                       if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund or rabund before you can use the collect.single command."); mothurOutEndLine(); abort = true; }
+                       if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund, rabund or shared file before you can use the collect.single command."); mothurOutEndLine(); abort = true; }
                         
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
@@ -91,58 +91,6 @@ CollectCommand::CollectCommand(string option){
                         
                         temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
                         convert(temp, size); 
-                               
-                       if (abort == false) {
-                               string fileNameRoot = getRootName(globaldata->inputFileName);
-                               int i;
-                               validCalculator = new ValidCalculators();
-                               
-                               for (i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator->isValidCalculator("single", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "sobs") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Sobs(), new OneColumnFile(fileNameRoot+"sobs")));
-                                               }else if (Estimators[i] == "chao") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao")));
-                                               }else if (Estimators[i] == "nseqs") { 
-                                                       cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs")));
-                                               }else if (Estimators[i] == "coverage") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage")));
-                                               }else if (Estimators[i] == "ace") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace")));
-                                               }else if (Estimators[i] == "jack") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"jack")));
-                                               }else if (Estimators[i] == "shannon") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon")));
-                                               }else if (Estimators[i] == "npshannon") { 
-                                                       cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"np_shannon")));
-                                               }else if (Estimators[i] == "simpson") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson")));
-                                               }else if (Estimators[i] == "bootstrap") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap")));
-                                               }else if (Estimators[i] == "geometric") { 
-                                                       cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric")));
-                                               }else if (Estimators[i] == "qstat") { 
-                                                       cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat")));
-                                               }else if (Estimators[i] == "logseries") { 
-                                                       cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries")));
-                                               }else if (Estimators[i] == "bergerparker") { 
-                                                       cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker")));
-                                               }else if (Estimators[i] == "bstick") { 
-                                                       cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick")));
-                                               }else if (Estimators[i] == "goodscoverage") { 
-                                                       cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage")));
-                                               }else if (Estimators[i] == "efron") {
-                                                       cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+"efron")));
-                                               }else if (Estimators[i] == "boneh") {
-                                                       cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+"boneh")));
-                                               }else if (Estimators[i] == "solow") {
-                                                       cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+"solow")));
-                                               }else if (Estimators[i] == "shen") {
-                                                       cDisplays.push_back(new CollectDisplay(new Shen(size, abund), new OneColumnFile(fileNameRoot+"shen")));
-                                               }
-                                       }
-                               }
-                       }
                 }
                 
         }
@@ -174,15 +122,7 @@ void CollectCommand::help(){
  
  //**********************************************************************************************************************
  
-CollectCommand::~CollectCommand(){
-       if (abort == false) {
-               //delete order;
-               delete input;  globaldata->ginput = NULL;
-               delete read;
-               delete validCalculator;
-               globaldata->gorder = NULL;
-       }
-}
+CollectCommand::~CollectCommand(){}
  
  //**********************************************************************************************************************
  
@@ -191,87 +131,156 @@ int CollectCommand::execute(){
                 
                 if (abort == true) { return 0; }
                 
-               //if the users entered no valid calculators don't execute command
-               if (cDisplays.size() == 0) { return 0; }
-
-               read = new ReadOTUFile(globaldata->inputFileName);      
-               read->read(&*globaldata); 
-               
-               order = globaldata->gorder;
-               string lastLabel = order->getLabel();
-               input = globaldata->ginput;
-               
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;
+               if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName);  }
+               else {  inputFileNames = parseSharedFile(globaldata->getSharedFile());  globaldata->setFormat("rabund");  }
                 
-               while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
-               
-                       if(allLines == 1 || labels.count(order->getLabel()) == 1){
-                               
-                               cCurve = new Collect(order, cDisplays);
-                               cCurve->getCurve(freq);
-                               delete cCurve;
+               for (int p = 0; p < inputFileNames.size(); p++) {
                         
-                               mothurOut(order->getLabel()); mothurOutEndLine();
-                               processedLabels.insert(order->getLabel());
-                               userLabels.erase(order->getLabel());
+                       string fileNameRoot = getRootName(inputFileNames[p]);
+                       globaldata->inputFileName = inputFileNames[p];
+                       
+                       if (inputFileNames.size() > 1) {
+                               mothurOutEndLine(); mothurOut("Processing group " + groups[p]); mothurOutEndLine(); mothurOutEndLine();
+                       }
                         
+                       validCalculator = new ValidCalculators();
                         
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator->isValidCalculator("single", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "sobs") { 
+                                               cDisplays.push_back(new CollectDisplay(new Sobs(), new OneColumnFile(fileNameRoot+"sobs")));
+                                       }else if (Estimators[i] == "chao") { 
+                                               cDisplays.push_back(new CollectDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"chao")));
+                                       }else if (Estimators[i] == "nseqs") { 
+                                               cDisplays.push_back(new CollectDisplay(new NSeqs(), new OneColumnFile(fileNameRoot+"nseqs")));
+                                       }else if (Estimators[i] == "coverage") { 
+                                               cDisplays.push_back(new CollectDisplay(new Coverage(), new OneColumnFile(fileNameRoot+"coverage")));
+                                       }else if (Estimators[i] == "ace") { 
+                                               cDisplays.push_back(new CollectDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"ace")));
+                                       }else if (Estimators[i] == "jack") { 
+                                               cDisplays.push_back(new CollectDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"jack")));
+                                       }else if (Estimators[i] == "shannon") { 
+                                               cDisplays.push_back(new CollectDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"shannon")));
+                                       }else if (Estimators[i] == "npshannon") { 
+                                               cDisplays.push_back(new CollectDisplay(new NPShannon(), new OneColumnFile(fileNameRoot+"np_shannon")));
+                                       }else if (Estimators[i] == "simpson") { 
+                                               cDisplays.push_back(new CollectDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"simpson")));
+                                       }else if (Estimators[i] == "bootstrap") { 
+                                               cDisplays.push_back(new CollectDisplay(new Bootstrap(), new OneColumnFile(fileNameRoot+"bootstrap")));
+                                       }else if (Estimators[i] == "geometric") { 
+                                               cDisplays.push_back(new CollectDisplay(new Geom(), new OneColumnFile(fileNameRoot+"geometric")));
+                                       }else if (Estimators[i] == "qstat") { 
+                                               cDisplays.push_back(new CollectDisplay(new QStat(), new OneColumnFile(fileNameRoot+"qstat")));
+                                       }else if (Estimators[i] == "logseries") { 
+                                               cDisplays.push_back(new CollectDisplay(new LogSD(), new OneColumnFile(fileNameRoot+"logseries")));
+                                       }else if (Estimators[i] == "bergerparker") { 
+                                               cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker")));
+                                       }else if (Estimators[i] == "bstick") { 
+                                               cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick")));
+                                       }else if (Estimators[i] == "goodscoverage") { 
+                                               cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage")));
+                                       }else if (Estimators[i] == "efron") {
+                                               cDisplays.push_back(new CollectDisplay(new Efron(size), new OneColumnFile(fileNameRoot+"efron")));
+                                       }else if (Estimators[i] == "boneh") {
+                                               cDisplays.push_back(new CollectDisplay(new Boneh(size), new OneColumnFile(fileNameRoot+"boneh")));
+                                       }else if (Estimators[i] == "solow") {
+                                               cDisplays.push_back(new CollectDisplay(new Solow(size), new OneColumnFile(fileNameRoot+"solow")));
+                                       }else if (Estimators[i] == "shen") {
+                                               cDisplays.push_back(new CollectDisplay(new Shen(size, abund), new OneColumnFile(fileNameRoot+"shen")));
+                                       }
+                               }
                         }
-                       //you have a label the user want that is smaller than this label and the last label has not already been processed 
-                       if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                               string saveLabel = order->getLabel();
+               
+                       
+                       //if the users entered no valid calculators don't execute command
+                       if (cDisplays.size() == 0) { return 0; }
+                       
+                       read = new ReadOTUFile(inputFileNames[p]);      
+                       read->read(&*globaldata); 
                                 
-                               delete order;
+                       order = globaldata->gorder;
+                       string lastLabel = order->getLabel();
+                       input = globaldata->ginput;
+                       
+                       //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+                       set<string> processedLabels;
+                       set<string> userLabels = labels;
+
+                       while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                               
+                               if(allLines == 1 || labels.count(order->getLabel()) == 1){
+                                       
+                                       cCurve = new Collect(order, cDisplays);
+                                       cCurve->getCurve(freq);
+                                       delete cCurve;
+                                       
+                                       mothurOut(order->getLabel()); mothurOutEndLine();
+                                       processedLabels.insert(order->getLabel());
+                                       userLabels.erase(order->getLabel());
+                                       
+                                       
+                               }
+                               //you have a label the user want that is smaller than this label and the last label has not already been processed 
+                               if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = order->getLabel();
+                                       
+                                       delete order;
+                                       order = (input->getOrderVector(lastLabel));
+                                       
+                                       cCurve = new Collect(order, cDisplays);
+                                       cCurve->getCurve(freq);
+                                       delete cCurve;
+                                       
+                                       mothurOut(order->getLabel()); mothurOutEndLine();
+                                       processedLabels.insert(order->getLabel());
+                                       userLabels.erase(order->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       order->setLabel(saveLabel);
+                               }
+                               
+                               lastLabel = order->getLabel();  
+                               
+                               delete order;           
+                               order = (input->getOrderVector());
+                       }
+                       
+                       //output error messages about any remaining user labels
+                       set<string>::iterator it;
+                       bool needToRun = false;
+                       for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                               mothurOut("Your file does not include the label " + *it); 
+                               if (processedLabels.count(lastLabel) != 1) {
+                                       mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                                       needToRun = true;
+                               }else {
+                                       mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+                               }
+                       }
+                       
+                       //run last label if you need to
+                       if (needToRun == true)  {
+                               if (order != NULL) {    delete order;   }
                                 order = (input->getOrderVector(lastLabel));
                                 
+                               mothurOut(order->getLabel()); mothurOutEndLine();
+                               
                                 cCurve = new Collect(order, cDisplays);
                                 cCurve->getCurve(freq);
                                 delete cCurve;
-                       
-                               mothurOut(order->getLabel()); mothurOutEndLine();
-                               processedLabels.insert(order->getLabel());
-                               userLabels.erase(order->getLabel());
-                               
-                               //restore real lastlabel to save below
-                               order->setLabel(saveLabel);
+                               delete order;
                         }
                         
-                       lastLabel = order->getLabel();  
-                       
-                       delete order;           
-                       order = (input->getOrderVector());
+                       for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
+                       cDisplays.clear();
+                       delete input;  globaldata->ginput = NULL;
+                       delete read;
+                       globaldata->gorder = NULL;
+                       delete validCalculator;
                 }
                 
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       mothurOut("Your file does not include the label " + *it); 
-                       if (processedLabels.count(lastLabel) != 1) {
-                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
-                               needToRun = true;
-                       }else {
-                               mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
-                       }
-               }
-               
-               //run last label if you need to
-               if (needToRun == true)  {
-                       if (order != NULL) {    delete order;   }
-                       order = (input->getOrderVector(lastLabel));
-                       
-                       mothurOut(order->getLabel()); mothurOutEndLine();
-                       
-                       cCurve = new Collect(order, cDisplays);
-                       cCurve->getCurve(freq);
-                       delete cCurve;
-                       delete order;
-               }
                 
                 
-               for(int i=0;i<cDisplays.size();i++){    delete cDisplays[i];    }
                 return 0;
         }
         catch(exception& e) {
@@ -281,3 +290,63 @@ int CollectCommand::execute(){
  }
  
  //**********************************************************************************************************************
+vector<string> CollectCommand::parseSharedFile(string filename) {
+       try {
+               vector<string> filenames;
+               
+               map<string, ofstream*> filehandles;
+               map<string, ofstream*>::iterator it3;
+               
+                               
+               //read first line
+               read = new ReadOTUFile(filename);       
+               read->read(&*globaldata); 
+                       
+               input = globaldata->ginput;
+               vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+               
+               string sharedFileRoot = getRootName(filename);
+               
+               //clears file before we start to write to it below
+               for (int i=0; i<lookup.size(); i++) {
+                       remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
+                       filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
+               }
+               
+               ofstream* temp;
+               for (int i=0; i<lookup.size(); i++) {
+                       temp = new ofstream;
+                       filehandles[lookup[i]->getGroup()] = temp;
+                       groups.push_back(lookup[i]->getGroup());
+               }
+
+               while(lookup[0] != NULL) {
+               
+                       for (int i = 0; i < lookup.size(); i++) {
+                               RAbundVector rav = lookup[i]->getRAbundVector();
+                               openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
+                               rav.print(*(filehandles[lookup[i]->getGroup()]));
+                               (*(filehandles[lookup[i]->getGroup()])).close();
+                       }
+               
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input->getSharedRAbundVectors();
+               }
+               
+               //free memory
+               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                       delete it3->second;
+               }
+               delete read;
+               delete input;
+               globaldata->ginput = NULL;
+
+               return filenames;
+       }
+       catch(exception& e) {
+               errorOut(e, "CollectCommand", "parseSharedFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
diff --git a/collectcommand.h b/collectcommand.h

index 10894e7218b5d2d607f2c0fa931f7c10a0f6146d..46dd9dc32b339fb2bf95a6e259114cc04d94f12d 100644 (file)
--- a/collectcommand.h
+++ b/collectcommand.h
@@ -56,6 +56,10 @@ private:
         set<string> labels; //holds labels to be used
         string label, calc;
         vector<string>  Estimators;
+       vector<string> inputFileNames;
+       vector<string> groups;
+       
+       vector<string> parseSharedFile(string);
  
  
  };
diff --git a/distancecommand.cpp b/distancecommand.cpp

index 3ac39699918f6da277ee06a7a7dde1d22c6ae546..0422532c4b11bb8e64573a6bbc734f42a8b542b2 100644 (file)
--- a/distancecommand.cpp
+++ b/distancecommand.cpp
@@ -246,8 +246,15 @@ int DistanceCommand::driver(int startLine, int endLine, string dFileName, float
                 outFile << setprecision(4);
                 
                 if(isTrue(phylip) && startLine == 0){   outFile << alignDB.getNumSeqs() << endl;        }
+               
                 for(int i=startLine;i<endLine;i++){
-                       if(isTrue(phylip))      {       outFile << alignDB.get(i).getName() << '\t';    }
+                       if(isTrue(phylip))      {       
+                               string name = alignDB.get(i).getName();
+                               if (name.length() < 10) { //pad with spaces to make compatible
+                                       while (name.length() < 10) {  name += " ";  }
+                               }
+                               outFile << name << '\t';        
+                       }
                         for(int j=0;j<i;j++){
                                 distCalculator->calcDist(alignDB.get(i), alignDB.get(j));
                                 double dist = distCalculator->getDist();
diff --git a/hcluster.cpp b/hcluster.cpp

index 72af7c8423ac38032de4455800b171f2114d1eac..3b972c0e3cc91c15b4730db6ae837ec621b8d651 100644 (file)
--- a/hcluster.cpp
+++ b/hcluster.cpp
@@ -12,10 +12,8 @@
  #include "listvector.hpp"
  #include "sparsematrix.hpp"
  
-
  /***********************************************************************/
-
-HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m) :  rabund(rav), list(lv), method(m){
+HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m, string d, NameAssignment* n, float c) :  rabund(rav), list(lv), method(m), distfile(d), nameMap(n), cutoff(c) {
         try {
                 mapWanted = false;
                 exitedBreak = false; 
@@ -27,6 +25,9 @@ HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m) :  rabund(rav),
                         clusterArray.push_back(temp);
                 }
                 
+               if (method != "average") {
+                       openInputFile(distfile, filehandle);
+               }else{ firstRead = true; }
         }
         catch(exception& e) {
                 errorOut(e, "HCluster", "HCluster");
@@ -76,7 +77,6 @@ void HCluster::clusterNames(){
  /***********************************************************************/
  int HCluster::getUpmostParent(int node){
         try {
-               
                 while (clusterArray[node].parent != -1) {
                         node = clusterArray[node].parent;
                 }
@@ -93,14 +93,14 @@ void HCluster::printInfo(){
         try {
                 
                 cout << "link table" << endl;
-               for (it = activeLinks.begin(); it!= activeLinks.end(); it++) {
-                       cout << it->first << " = " << it->second << endl;
+               for (itActive = activeLinks.begin(); itActive!= activeLinks.end(); itActive++) {
+                       cout << itActive->first << " = " << itActive->second << endl;
                 }
                 cout << endl;
                 for (int i = 0; i < linkTable.size(); i++) {
                         cout << i << '\t';
                         for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) {
-                               cout << it->first << '-' << it->second << '\t';
+                               cout << it->first << '-' << it->second << '\t' ;
                         }
                         cout << endl;
                 }
@@ -121,64 +121,62 @@ void HCluster::printInfo(){
  /***********************************************************************/
  int HCluster::makeActive() {
         try {
-       
                 int linkValue = 1; 
-//cout << "active - here" << endl;             
-               it = activeLinks.find(smallRow);
-               it2 = activeLinks.find(smallCol);
+
+               itActive = activeLinks.find(smallRow);
+               it2Active = activeLinks.find(smallCol);
                 
-               if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them
+               if ((itActive == activeLinks.end()) && (it2Active == activeLinks.end())) { //both are not active so add them
                         int size = linkTable.size();
                         map<int, int> temp; map<int, int> temp2;
                         
                         //add link to eachother
-                       temp[smallRow] = 1;                                                     //         1    2
-                       temp2[smallCol] = 1;                                            // 1   0        1
-                                                                                                               // 2   1        0
+                       temp[smallRow] = 1;                                                     //         1    2                                                               
+                       temp2[smallCol] = 1;                                            // 1   0        1 
+                                                                                                               // 2   1        0                               
                         linkTable.push_back(temp);
                         linkTable.push_back(temp2);
                         
                         //add to activeLinks
                         activeLinks[smallRow] = size;
                         activeLinks[smallCol] = size+1;
-//cout << "active - here1" << endl;
-               }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) {  //smallRow is active, smallCol is not
+
+               }else if ((itActive != activeLinks.end()) && (it2Active == activeLinks.end())) {  //smallRow is active, smallCol is not
                          int size = linkTable.size();
-                        int alreadyActiveRow = it->second;
+                        int alreadyActiveRow = itActive->second;
                          map<int, int> temp; 
                         
                         //add link to eachother
-                       temp[smallRow] = 1;                                                     //         6    2       3       5
-                       linkTable.push_back(temp);                                      // 6   0        1       2       0
-                       linkTable[alreadyActiveRow][smallCol] = 1;      // 2   1        0       1       1
-                                                                                                               // 3   2        1       0       0
-                                                                                                               // 5   0    1   0   0   
+                       temp[smallRow] = 1;                                                                     //         6    2       3       5
+                       linkTable.push_back(temp);                                                      // 6   0        1       2       0
+                       linkTable[alreadyActiveRow][smallCol] = 1;                      // 2   1        0       1       1
+                                                                                                                               // 3   2        1       0       0
+                                                                                                                               // 5   0    1   0   0   
                         //add to activeLinks
                         activeLinks[smallCol] = size;
-//cout << "active - here2" << endl;                    
-               }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) {  //smallCol is active, smallRow is not
+       
+               }else if ((itActive == activeLinks.end()) && (it2Active != activeLinks.end())) {  //smallCol is active, smallRow is not
                          int size = linkTable.size();
-                        int alreadyActiveCol = it2->second;
+                        int alreadyActiveCol = it2Active->second;
                          map<int, int> temp; 
                         
                         //add link to eachother
-                       temp[smallCol] = 1;                                                     //         6    2       3       5
-                       linkTable.push_back(temp);                                      // 6   0        1       2       0
-                       linkTable[alreadyActiveCol][smallRow] = 1;      // 2   1        0       1       1
-                                                                                                               // 3   2        1       0       0
-                                                                                                               // 5   0    1   0   0   
+                       temp[smallCol] = 1;                                                                     //         6    2       3       5
+                       linkTable.push_back(temp);                                                      // 6   0        1       2       0
+                       linkTable[alreadyActiveCol][smallRow] = 1;                      // 2   1        0       1       1
+                                                                                                                               // 3   2        1       0       0
+                                                                                                                               // 5   0    1   0   0   
                         //add to activeLinks
                         activeLinks[smallRow] = size;
-//cout << "active - here3" << endl;
+
                 }else { //both are active so add one
-                       int row = it->second;
-                       int col = it2->second;
-//cout << row << '\t' << col << endl;                  
+                       int row = itActive->second;
+                       int col = it2Active->second;
+                       
                         
                         linkTable[row][smallCol]++;
                         linkTable[col][smallRow]++;
                         linkValue = linkTable[row][smallCol];
-//cout << "active - here4" << endl;
                 }
                 
                 return linkValue;
@@ -191,21 +189,23 @@ int HCluster::makeActive() {
  /***********************************************************************/
  void HCluster::updateArrayandLinkTable() {
         try {
-               //if cluster was made update clusterArray and linkTable
-                       int size = clusterArray.size();
-                       
-                       //add new node
-                       clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
-                       clusterArray.push_back(temp);
-                       
-                       //update child nodes
-                       clusterArray[smallRow].parent = size;
-                       clusterArray[smallCol].parent = size;
+               //if cluster was made update clusterArray and linkTable
+               int size = clusterArray.size();
+               
+               //add new node
+               clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
+               clusterArray.push_back(temp);
+               
+               //update child nodes
+               clusterArray[smallRow].parent = size;
+               clusterArray[smallCol].parent = size;
+               
+               if (method == "furthest") {
                         
                         //update linkTable by merging clustered rows and columns
                         int rowSpot = activeLinks[smallRow];
                         int colSpot = activeLinks[smallCol];
-       //cout << "here" << endl;               
+                       
                         //fix old rows
                         for (int i = 0; i < linkTable.size(); i++) {
                                 //check if they are in map
@@ -224,8 +224,7 @@ void HCluster::updateArrayandLinkTable() {
                                         linkTable[i].erase(smallRow); //delete col 
                                 }
                         }
-       //printInfo();
-//cout << "here2" << endl;
+                       
                         //merge their values
                         for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) {
                                 it2 = linkTable[colSpot].find(it->first);  //does the col also have this
@@ -233,25 +232,23 @@ void HCluster::updateArrayandLinkTable() {
                                 if (it2 == linkTable[colSpot].end()) { //not there so add it
                                         linkTable[colSpot][it->first] = it->second;
                                 }else { //merge them
-                                       linkTable[colSpot][it->first] = it->second+it2->second;
+                                       linkTable[colSpot][it->first] = it->second + it2->second;
                                 }
                         }
-//cout << "here3" << endl;                     
+                       
                         linkTable[colSpot].erase(size);
                         linkTable.erase(linkTable.begin()+rowSpot);  //delete row
-       //printInfo();          
+                       
                         //update activerows
                         activeLinks.erase(smallRow);
                         activeLinks.erase(smallCol);
                         activeLinks[size] = colSpot;
                         
                         //adjust everybody elses spot since you deleted - time vs. space
-                       for (it = activeLinks.begin(); it != activeLinks.end(); it++) {
-                               if (it->second > rowSpot) {  activeLinks[it->first]--;  }
+                       for (itActive = activeLinks.begin(); itActive != activeLinks.end(); itActive++) {
+                               if (itActive->second > rowSpot) {  activeLinks[itActive->first]--;      }
                         }
-                       
-//cout << "here4" << endl;
-       
+               }
         }
         catch(exception& e) {
                 errorOut(e, "HCluster", "updateArrayandLinkTable");
@@ -259,9 +256,9 @@ void HCluster::updateArrayandLinkTable() {
         }
  }
  /***********************************************************************/
-void HCluster::update(int row, int col, float distance){
+bool HCluster::update(int row, int col, float distance){
         try {
-               
+               bool cluster = false;
                 smallRow = row;
                 smallCol = col;
                 smallDist = distance;
@@ -269,29 +266,34 @@ void HCluster::update(int row, int col, float distance){
                 //find upmost parent of row and col
                 smallRow = getUpmostParent(smallRow);
                 smallCol = getUpmostParent(smallCol);
-       //cout << "row = " << row << " smallRow = " << smallRow <<  " col = " << col << " smallCol = " << smallCol << " dist = " << distance << endl;
+
                 //you don't want to cluster with yourself
                 if (smallRow != smallCol) {
-                       //are they active in the link table
-                       int linkValue = makeActive(); //after this point this nodes info is active in linkTable
-                       //printInfo();                  
-                       //cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl;
-                       //can we cluster???
-                       bool cluster = false;
-                       
-                       if (method == "nearest") { cluster = true;  }
-                       else if (method == "average") { 
-                               if (linkValue == (ceil(((clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) / (float) 2.0))) { cluster = true; }
-                       }else{ //assume furthest
-                               if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) { cluster = true; }
-                       }
                         
-                       if (cluster) { 
+                       if (method != "average") {
+                               //can we cluster???
+                               if (method == "nearest") { cluster = true;  }
+                               else{ //assume furthest
+                                       //are they active in the link table
+                                       int linkValue = makeActive(); //after this point this nodes info is active in linkTable
+                                       if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) {             cluster = true;         }
+                               }
+                               
+                               if (cluster) { 
+                                       updateArrayandLinkTable();
+                                       clusterBins();
+                                       clusterNames();
+                               }
+                       }else {
+                               cluster = true;
                                 updateArrayandLinkTable();
                                 clusterBins();
                                 clusterNames();
+                               combineFile();
                         }
                 }
+               
+               return cluster;
                 //printInfo();
         }
         catch(exception& e) {
@@ -349,7 +351,26 @@ try {
         }
  }
  //**********************************************************************************************************************
-vector<seqDist> HCluster::getSeqs(ifstream& filehandle, NameAssignment* nameMap, float cutoff){
+vector<seqDist> HCluster::getSeqs(){
+       try {
+               vector<seqDist> sameSeqs;
+               
+               if(method != "average") {
+                       sameSeqs = getSeqsFNNN();
+               }else{
+                       if (firstRead) {        processFile();   }
+                       sameSeqs = getSeqsAN(); 
+               }
+                               
+               return sameSeqs;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "getSeqs");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<seqDist> HCluster::getSeqsFNNN(){
         try {
                 string firstName, secondName;
                 float distance, prevDistance;
@@ -402,11 +423,352 @@ vector<seqDist> HCluster::getSeqs(ifstream& filehandle, NameAssignment* nameMap,
                 return sameSeqs;
         }
         catch(exception& e) {
-               errorOut(e, "HCluster", "getSeqs");
+               errorOut(e, "HCluster", "getSeqsFNNN");
                 exit(1);
         }
  }
+//**********************************************************************************************************************
+//don't need cutoff since processFile removes all distance above cutoff and changes names to indexes
+vector<seqDist> HCluster::getSeqsAN(){
+       try {
+               int firstName, secondName;
+               float prevDistance;
+               vector<seqDist> sameSeqs;
+               prevDistance = -1;
+               
+               openInputFile(distfile, filehandle, "no error"); 
+               
+               //is the smallest value in mergedMin or the distfile?
+               float mergedMinDist = 10000;
+               float distance = 10000;
+               if (mergedMin.size() > 0) { mergedMinDist = mergedMin[0].dist;  }
+                       
+               if (!filehandle.eof()) {  
+                       filehandle >> firstName >> secondName >> distance;    gobble(filehandle);
+                       //save first one
+                       if (prevDistance == -1) { prevDistance = distance; } 
+                       if (distance != -1) { //-1 means skip me
+                               seqDist temp(firstName, secondName, distance);
+                               sameSeqs.push_back(temp);
+                       }
+               }
+               
+               if (mergedMinDist < distance) { //get minimum distance from mergedMin
+                       //remove distance we saved from file
+                       sameSeqs.clear();
+                       prevDistance = mergedMinDist;
+                       
+                       for (int i = 0; i < mergedMin.size(); i++) {
+                               if (mergedMin[i].dist == prevDistance) {
+                                       sameSeqs.push_back(mergedMin[i]);
+                               }else { break; }
+                       }
+               }else{ //get minimum from file
+                       //get entry
+                       while (!filehandle.eof()) {
+                               
+                               filehandle >> firstName >> secondName >> distance;    gobble(filehandle); 
+                               
+                               if (prevDistance == -1) { prevDistance = distance; }
+                               
+                               if (distance != -1) { //-1 means skip me
+                                       //are the distances the same
+                                       if (distance == prevDistance) { //save in vector
+                                               seqDist temp(firstName, secondName, distance);
+                                               sameSeqs.push_back(temp);
+                                       }else{  
+                                               break;
+                                       }
+                               }
+                       }
+               }
+               filehandle.close();
+               
+               //randomize matching dists
+               random_shuffle(sameSeqs.begin(), sameSeqs.end());
+               
+               //can only return one value since once these are merged the other distances in sameSeqs may have changed
+               vector<seqDist> temp;
+               if (sameSeqs.size() > 0) {  temp.push_back(sameSeqs[0]);  }
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "getSeqsAN");
+               exit(1);
+       }
+}
+
  /***********************************************************************/
+void HCluster::combineFile() {
+       try {
+               int bufferSize = 64000;  //512k - this should be a variable that the user can set to optimize code to their hardware
+               char* inputBuffer;
+               inputBuffer = new char[bufferSize];
+               size_t numRead;
+               
+               string tempDistFile = distfile + ".temp";
+               ofstream out;
+               openOutputFile(tempDistFile, out);
+               
+               FILE* in;
+               in = fopen(distfile.c_str(), "rb");
+       
+               int first, second;
+               float dist;
+               
+               vector< map<int, float> > smallRowColValues;
+               smallRowColValues.resize(2);  //0 = row, 1 = col
+               int count = 0;
+                               
+               //go through file pulling out distances related to rows merging
+               //if mergedMin contains distances add those back into file
+               bool done = false;
+               partialDist = "";
+               while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) {
+//cout << "number of char read = " << numRead << endl;
+//cout << inputBuffer << endl;
+                       if (numRead < bufferSize) { done = true; }
+                       
+                       //parse input into individual distances
+                       int spot = 0;
+                       string outputString = "";
+                       while(spot < numRead) {
+       //cout << "spot = " << spot << endl;
+                          seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize);
+                          
+                          //you read a partial distance
+                          if (nextDist.seq1 == -1) { break;  }
+                          
+                          first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist;
+       //cout << "next distance = " << first << '\t' << second << '\t' << dist << endl;                   
+                          //since file is sorted and mergedMin is sorted 
+                          //you can put the smallest distance from each through the code below and keep the file sorted
+                          
+                          //while there are still values in mergedMin that are smaller than the distance read from file
+                          while (count < mergedMin.size())  {
+                          
+                                       //is the distance in mergedMin smaller than from the file
+                                       if (mergedMin[count].dist < dist) {
+                                       //is this a distance related to the columns merging?
+                                       //if yes, save in memory
+                                               if ((mergedMin[count].seq1 == smallRow) && (mergedMin[count].seq2 == smallCol)) { //do nothing this is the smallest distance from last time
+                                               }else if (mergedMin[count].seq1 == smallCol) {
+                                                       smallRowColValues[1][mergedMin[count].seq2] = mergedMin[count].dist;
+                                               }else if (mergedMin[count].seq2 == smallCol) {
+                                                       smallRowColValues[1][mergedMin[count].seq1] = mergedMin[count].dist;
+                                               }else if (mergedMin[count].seq1 == smallRow) {
+                                                       smallRowColValues[0][mergedMin[count].seq2] = mergedMin[count].dist;
+                                               }else if (mergedMin[count].seq2 == smallRow) {
+                                                       smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
+                                               }else { //if no, write to temp file
+                                                       outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
+                                               }
+                                               count++;
+                                       }else{   break; }
+                          }
+                          
+                          //is this a distance related to the columns merging?
+                          //if yes, save in memory
+                          if ((first == smallRow) && (second == smallCol)) { //do nothing this is the smallest distance from last time
+                          }else if (first == smallCol) {
+                                       smallRowColValues[1][second] = dist;
+                          }else if (second == smallCol) {
+                                       smallRowColValues[1][first] = dist;
+                          }else if (first == smallRow) {
+                                       smallRowColValues[0][second] = dist;
+                          }else if (second == smallRow) {
+                                       smallRowColValues[0][first] = dist;
+                          
+                          }else { //if no, write to temp file
+                                       outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
+                          }
+                       }
+                       
+                       out << outputString;
+                       if(done) { break; }
+               }
+               fclose(in);
+               
+               //if values in mergedMin are larger than the the largest in file then
+               while (count < mergedMin.size())  {  
+                       //is this a distance related to the columns merging?
+                       //if yes, save in memory
+                       if ((mergedMin[count].seq1 == smallRow) && (mergedMin[count].seq2 == smallCol)) { //do nothing this is the smallest distance from last time
+                       }else if (mergedMin[count].seq1 == smallCol) {
+                               smallRowColValues[1][mergedMin[count].seq2] = mergedMin[count].dist;
+                       }else if (mergedMin[count].seq2 == smallCol) {
+                               smallRowColValues[1][mergedMin[count].seq1] = mergedMin[count].dist;
+                       }else if (mergedMin[count].seq1 == smallRow) {
+                               smallRowColValues[0][mergedMin[count].seq2] = mergedMin[count].dist;
+                       }else if (mergedMin[count].seq2 == smallRow) {
+                               smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
+                               
+                       }else { //if no, write to temp file
+                               out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
+                       }
+                       count++;
+               }
+               out.close();
+               mergedMin.clear();
+                       
+               //rename tempfile to distfile
+               remove(distfile.c_str());
+               rename(tempDistFile.c_str(), distfile.c_str());
+               
+               //merge clustered rows averaging the distances
+               map<int, float>::iterator itMerge;
+               map<int, float>::iterator it2Merge;
+               for(itMerge = smallRowColValues[0].begin(); itMerge != smallRowColValues[0].end(); itMerge++) {                 
+                       //does smallRowColValues[1] have a distance to this seq too?
+                       it2Merge = smallRowColValues[1].find(itMerge->first);
+                       
+                       float average;
+                       if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
+                               //weighted average
+                               int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
+                               average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+                               smallRowColValues[1].erase(it2Merge);
+                               
+                               seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
+                               mergedMin.push_back(temp);
+                       }
+               }
+
+               //sort merged values
+               sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance);      
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "combineFile");
+               exit(1);
+       }
+}
+/***********************************************************************/
+seqDist HCluster::getNextDist(char* buffer, int& index, int size){
+       try {
+               seqDist next;
+               int indexBefore = index;
+               string first, second, distance;
+               first = ""; second = ""; distance = "";
+               int tabCount = 0;
+//cout << "partial = " << partialDist << endl;         
+               if (partialDist != "") { //read what you can, you know it is less than a whole distance.
+                       for (int i = 0; i < partialDist.size(); i++) {
+                               if (tabCount == 0) {
+                                       if (partialDist[i] == '\t') { tabCount++; }
+                                       else {  first += partialDist[i];        }
+                               }else if (tabCount == 1) {
+                                       if (partialDist[i] == '\t') { tabCount++; }
+                                       else {  second += partialDist[i];       }
+                               }else if (tabCount == 2) {
+                                       distance +=  partialDist[i];
+                               }
+                       }
+                       partialDist = "";
+               }
+       
+               //try to get another distance
+               bool gotDist = false;
+               while (index < size) {
+                       if ((buffer[index] == 10) || (buffer[index] == 13)) { //newline in unix or windows
+                               gotDist = true;
+                               
+                               //gobble space
+                               while (index < size) {          
+                                       if (isspace(buffer[index])) { index++; }
+                                       else { break; }         
+                               }
+                               break;
+                       }else{
+                               if (tabCount == 0) {
+                                       if (buffer[index] == '\t') { tabCount++; }
+                                       else {  first += buffer[index]; }
+                               }else if (tabCount == 1) {
+                                       if (buffer[index] == '\t') { tabCount++; }
+                                       else {  second += buffer[index];        }
+                               }else if (tabCount == 2) {
+                                       distance +=  buffer[index];
+                               }
+                               index++;
+                       }
+               }
+               
+               //there was not a whole distance in the buffer, ie. buffer = "1 2       0.01    2       3       0."
+               //then you want to save the partial distance.
+               if (!gotDist) {
+                       for (int i = indexBefore; i < size; i++) {
+                               partialDist += buffer[i];
+                       }
+                       index = size + 1;
+                       next.seq1 = -1; next.seq2 = -1; next.dist = 0.0;
+               }else{
+                       int firstname, secondname;
+                       float dist;
+                       
+                       convert(first, firstname);
+                       convert(second, secondname);
+                       convert(distance, dist);
+                       
+                       next.seq1 = firstname; next.seq2 = secondname; next.dist = dist;
+               }
+                                               
+               return next;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "getNextDist");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void HCluster::processFile() {
+       try {
+               string firstName, secondName;
+               float distance;
+               
+               ifstream in;
+               openInputFile(distfile, in);
+               
+               ofstream out;
+               string outTemp = distfile + ".temp";
+               openOutputFile(outTemp, out);
+       
+               //get entry
+               while (!in.eof()) {
+                       
+                       in >> firstName >> secondName >> distance;    gobble(in);               
+                       
+                       map<string,int>::iterator itA = nameMap->find(firstName);
+                       map<string,int>::iterator itB = nameMap->find(secondName);
+                       if(itA == nameMap->end()){  cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1);  }
+                       if(itB == nameMap->end()){  cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+               
+                       //using cutoff
+                       if (distance > cutoff) { break; }
+               
+                       if (distance != -1) { //-1 means skip me
+                               out << itA->second << '\t' << itB->second << '\t' << distance << endl;
+                       }
+               }
+               
+               in.close();
+               out.close();
+               
+               remove(distfile.c_str());
+               rename(outTemp.c_str(), distfile.c_str());
+               
+               firstRead = false;
+       }
+       catch(exception& e) {
+               errorOut(e, "HCluster", "processFile");
+               exit(1);
+       }
+}
+/***********************************************************************/
+
+
+
+
+
  
  
  
diff --git a/hcluster.h b/hcluster.h

index 065d559d1223865313b95a32f20dd6fb14fc5fc0..679abbc2cc19e54f5a3028155de3d6d18372d937 100644 (file)
--- a/hcluster.h
+++ b/hcluster.h
@@ -17,16 +17,26 @@
  class RAbundVector;
  class ListVector;
  
+/***********************************************************************/
+struct linkNode {
+       int     links;
+       float dist;
+       
+       linkNode() {};
+       linkNode(int l, float a) : links(l), dist(a) {};
+       ~linkNode() {};
+};
+
  /***********************************************************************/
  class HCluster {
         
  public:
-       HCluster(RAbundVector*, ListVector*, string);
+       HCluster(RAbundVector*, ListVector*, string, string, NameAssignment*, float);
         ~HCluster(){};
-    void update(int, int, float);
+    bool update(int, int, float);
         void setMapWanted(bool m); 
         map<string, int> getSeqtoBin()  {  return seq2Bin;      }
-       vector<seqDist> getSeqs(ifstream&, NameAssignment*, float);
+       vector<seqDist> getSeqs();
  
  protected:     
         void clusterBins();
@@ -36,24 +46,39 @@ protected:
         void printInfo();
         void updateArrayandLinkTable();
         void updateMap();
+       vector<seqDist> getSeqsFNNN();
+       vector<seqDist> getSeqsAN();
+       void combineFile();
+       void processFile();
+       seqDist getNextDist(char*, int&, int);
                 
         RAbundVector* rabund;
         ListVector* list;
+       NameAssignment* nameMap;
         
         vector<clusterNode> clusterArray;
+       
+       //note: the nearest and average neighbor method do not use the link table or active links
         vector< map<int, int> > linkTable;  // vector of maps - linkTable[1][6] = 2  would mean sequence in spot 1 has 2 links with sequence in 6
         map<int, int> activeLinks;  //maps sequence to index in linkTable
         map<int, int>::iterator it;
+       map<int, int>::iterator itActive;
+       map<int, int>::iterator it2Active;
         map<int, int>::iterator it2;
         
         int numSeqs;
         int smallRow;
         int smallCol;
-       float smallDist;
+       float smallDist, cutoff;
         map<string, int> seq2Bin;
-       bool mapWanted, exitedBreak;
+       bool mapWanted, exitedBreak, firstRead;
         seqDist next;
-       string method;
+       string method, distfile;
+       ifstream filehandle;
+       
+       vector<seqDist> mergedMin;
+       string partialDist;
+       
         
  };
  
diff --git a/hclustercommand.cpp b/hclustercommand.cpp

index 506ba544045c7c14e8944ef02d2b99a6e061acf3..bdec48e788121bc2d88519d98e1e8135d32020d5 100644 (file)
--- a/hclustercommand.cpp
+++ b/hclustercommand.cpp
@@ -179,39 +179,30 @@ int HClusterCommand::execute(){
                 print_start = true;
                 start = time(NULL);
         
-               ifstream in;
-               openInputFile(distfile, in);
-               string firstName, secondName;
-               float distance;
-               
-               cluster = new HCluster(rabund, list, method);
+               //ifstream in;
+               //openInputFile(distfile, in);
+                               
+               cluster = new HCluster(rabund, list, method, distfile, globaldata->nameMap, cutoff);
                 vector<seqDist> seqs; seqs.resize(1); // to start loop
                 
                 while (seqs.size() != 0){
                 
-                       seqs = cluster->getSeqs(in, globaldata->nameMap, cutoff);
+                       seqs = cluster->getSeqs();
                                 
                         for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
  
-                               if (print_start && isTrue(timing)) {
-                                       mothurOut("Clustering (" + tag + ") dist " + toString(distance) + "/" 
-                                                         + toString(roundDist(distance, precision)) 
-                                                         + "\t(precision: " + toString(precision) + ")");
-                                       cout.flush();
-                                       print_start = false;
-                               }
-                               
-       
                                 if (seqs[i].seq1 != seqs[i].seq2) {
-                                       cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+                                       bool clustered = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
                                         
                                         float rndDist = roundDist(seqs[i].dist, precision);
-                                                       
-                                       if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
-                                               printData("unique");
-                                       }
-                                       else if((rndDist != rndPreviousDist)){
-                                               printData(toString(rndPreviousDist,  length-1));
+                                       
+                                       if (clustered) {
+                                               if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
+                                                       printData("unique");
+                                               }
+                                               else if((rndDist != rndPreviousDist)){
+                                                       printData(toString(rndPreviousDist,  length-1));
+                                               }
                                         }
                                         
                                         previousDist = seqs[i].dist;
@@ -222,15 +213,8 @@ int HClusterCommand::execute(){
                         }
                 }
                 
-               in.close();
+               //in.close();
  
-               if (print_start && isTrue(timing)) {
-                       //mothurOut("Clustering (" + tag + ") for distance " + toString(previousDist) + "/" + toString(rndPreviousDist) 
-                                        //+ "\t(precision: " + toString(precision) + ", Nodes: " + toString(matrix->getNNodes()) + ")");
-                       cout.flush();
-                       print_start = false;
-               }
-       
                 if(previousDist <= 0.0000){
                         printData("unique");
                 }
diff --git a/mergefilecommand.cpp b/mergefilecommand.cpp

index 44cb03d64a3593aed5b408455be46e0b20da4a4f..fcf63895b960fa8293df03dcff7c1e5bce28c914 100644 (file)
--- a/mergefilecommand.cpp
+++ b/mergefilecommand.cpp
@@ -75,9 +75,10 @@ int MergeFileCommand::execute(){
                 ofstream outputFile;
                 openOutputFile(outputFileName, outputFile);
                 
-               ifstream inputFile;
                 char c;
                 for(int i=0;i<numInputFiles;i++){
+                       ifstream inputFile; //declaration must be inside for loop of windows throws an error
+                       
                         openInputFile(fileNames[i], inputFile);
                         
                         while(!inputFile.eof()){        c = inputFile.get(); outputFile << c;   }
diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp

index 07e3776e08adc3b85ee5b8c2bd3829d340eb9c82..fad0ed64ff58e58a3bc3bfaa6c432f899f6f1eea 100644 (file)
--- a/mgclustercommand.cpp
+++ b/mgclustercommand.cpp
@@ -220,16 +220,16 @@ int MGClusterCommand::execute(){
                         sortHclusterFiles(distFile, overlapFile);
                 
                         //create cluster
-                       hcluster = new HCluster(rabund, list, method);
+                       hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
                         hcluster->setMapWanted(true);
                         
                         vector<seqDist> seqs; seqs.resize(1); // to start loop
-                       ifstream inHcluster;
-                       openInputFile(distFile, inHcluster);
+                       //ifstream inHcluster;
+                       //openInputFile(distFile, inHcluster);
  
                         while (seqs.size() != 0){
                 
-                               seqs = hcluster->getSeqs(inHcluster, nameMap, cutoff);
+                               seqs = hcluster->getSeqs();
                                 
                                 for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
                                         
@@ -262,7 +262,7 @@ int MGClusterCommand::execute(){
                                         }
                                 }
                         }
-                       inHcluster.close();
+                       //inHcluster.close();
                         
                         if(previousDist <= 0.0000){
                                 oldList.setLabel("unique");
diff --git a/mothur.h b/mothur.h

index cf6ae69dc85fc866c34588118575222772897dcf..8f9f94ff872aab355c6123f97e4cebb8e4dedb7f 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -109,6 +109,11 @@ struct seqDist {
         seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {}
         ~seqDist() {}
  };
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareSequenceDistance(seqDist left, seqDist right){
+       return (left.dist < right.dist);        
+} 
  /***********************************************************************/
  
  // snagged from http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.2
@@ -458,6 +463,22 @@ inline bool isBlank(string fileName){
  }
  /***********************************************************************/
  
+inline int openInputFile(string fileName, ifstream& fileHandle, string m){
+
+       fileHandle.open(fileName.c_str());
+       if(!fileHandle) {
+               mothurOut("Error: Could not open " + fileName);  mothurOutEndLine();
+               return 1;
+       }
+       else {
+               //check for blank file
+               gobble(fileHandle);
+               return 0;
+       }
+       
+}
+/***********************************************************************/
+
  inline int openInputFile(string fileName, ifstream& fileHandle){
  
         fileHandle.open(fileName.c_str());
diff --git a/phylotree.cpp b/phylotree.cpp

index 8d03607820955f6f4474a78f0505fad9de93ad81..a07cfde3b20aefe619b6c4638383c31655f82745 100644 (file)
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -17,6 +17,7 @@ PhyloTree::PhyloTree(){
                 numSeqs = 0;
                 tree.push_back(TaxNode("Root"));
                 tree[0].heirarchyID = "0";
+               maxLevel = 0;
         }
         catch(exception& e) {
                 errorOut(e, "PhyloTree", "PhyloTree");
@@ -219,6 +220,25 @@ void PhyloTree::binUnclassified(){
         }
  }
  /**************************************************************************************************/
+string PhyloTree::getFullTaxonomy(string seqName) {
+       try {
+               string tax = "";
+               
+               int currentNode = name2Taxonomy[seqName];
+               
+               while (tree[currentNode].parent != -1) {
+                       tax = tree[currentNode].name + ";" + tax;
+                       currentNode = tree[currentNode].parent;
+               }
+               
+               return tax;
+       }
+       catch(exception& e) {
+               errorOut(e, "PhyloTree", "getFullTaxonomy");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
  
  void PhyloTree::print(ofstream& out){
         try {
diff --git a/phylotree.h b/phylotree.h

index 6e5b58d85c2430807af15e0345a36fece9f0f90e..cae0a0893f3939842f948627af481953e393f87d 100644 (file)
--- a/phylotree.h
+++ b/phylotree.h
@@ -38,10 +38,13 @@ public:
         vector<int> getGenusNodes();
         void binUnclassified();
                 
-       TaxNode get(int i)                              {       return tree[i]; }
+       TaxNode get(int i)                              {       return tree[i];                                                 }
         TaxNode get(string seqName)             {       return tree[name2Taxonomy[seqName]];    }
-       int getIndex(string seqName)    {       return name2Taxonomy[seqName];  }
-       string getName(int i)                   {       return tree[i].name;    }
+       int getIndex(string seqName)    {       return name2Taxonomy[seqName];                  }
+       string getName(int i)                   {       return tree[i].name;                                    }
+       string getFullTaxonomy(string);  //pass a sequence name return taxonomy
+       int getMaxLevel()                               {       return maxLevel;                                                }
+       
  private:
         string getNextTaxon(string&);
         vector<TaxNode> tree;
diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp

index 4417d5c7ae3ca1f186308790b63d085ab553cc5a..f2de77bbeed121a53d670e99ff203b5336cd1d9c 100644 (file)
--- a/rarefactcommand.cpp
+++ b/rarefactcommand.cpp
@@ -31,7 +31,7 @@ RareFactCommand::RareFactCommand(string option){
                 Estimators.clear();
                 
                 //allow user to run help
-               if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
+               if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
                 
                 else {
                         //valid paramters for this command
@@ -49,7 +49,7 @@ RareFactCommand::RareFactCommand(string option){
                         }
                         
                         //make sure the user has already run the read.otu command
-                       if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund or rabund before you can use the rarefaction.single command."); mothurOutEndLine(); abort = true; }
+                       if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund, rabund or shared file before you can use the rarefact.single command."); mothurOutEndLine(); abort = true; }
                         
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
@@ -82,43 +82,6 @@ RareFactCommand::RareFactCommand(string option){
                         
                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
                         convert(temp, nIters); 
-                       
-                       if (abort == false) {
-                       
-                               string fileNameRoot = getRootName(globaldata->inputFileName);
-                               int i;
-                               validCalculator = new ValidCalculators();
-                               
-                               
-                               for (i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator->isValidCalculator("rarefaction", Estimators[i]) == true) { 
-                                               if (Estimators[i] == "sobs") { 
-                                                       rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
-                                               }else if (Estimators[i] == "chao") { 
-                                                       rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
-                                               }else if (Estimators[i] == "ace") { 
-                                                       if(abund < 5)
-                                                               abund = 10;
-                                                       rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
-                                               }else if (Estimators[i] == "jack") { 
-                                                       rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
-                                               }else if (Estimators[i] == "shannon") { 
-                                                       rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
-                                               }else if (Estimators[i] == "npshannon") { 
-                                                       rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
-                                               }else if (Estimators[i] == "simpson") { 
-                                                       rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
-                                               }else if (Estimators[i] == "bootstrap") { 
-                                                       rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
-                                               }else if (Estimators[i] == "coverage") { 
-                                                       rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
-                                               }else if (Estimators[i] == "nseqs") { 
-                                                       rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
-                                               }
-                                       }
-                               }
-                       }
-                               
                 }
                 
         }
@@ -150,14 +113,7 @@ void RareFactCommand::help(){
  
  //**********************************************************************************************************************
  
-RareFactCommand::~RareFactCommand(){
-       if (abort == false) {
-               globaldata->gorder = NULL;
-               delete input;  globaldata->ginput = NULL;
-               delete read;
-               delete validCalculator;
-       }
-}
+RareFactCommand::~RareFactCommand(){}
  
  //**********************************************************************************************************************
  
@@ -166,92 +122,205 @@ int RareFactCommand::execute(){
         
                 if (abort == true) { return 0; }
                 
-               //if the users entered no valid calculators don't execute command
-               if (rDisplays.size() == 0) { return 0; }
-
-               read = new ReadOTUFile(globaldata->inputFileName);      
-               read->read(&*globaldata); 
-
-               order = globaldata->gorder;
-               string lastLabel = order->getLabel();
-               input = globaldata->ginput;
+               if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName);  }
+               else {  inputFileNames = parseSharedFile(globaldata->getSharedFile());  globaldata->setFormat("rabund");  }
                 
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;
-       
-               //as long as you are not at the end of the file or done wih the lines you want
-               while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+               for (int p = 0; p < inputFileNames.size(); p++) {
                         
-                       if(allLines == 1 || labels.count(order->getLabel()) == 1){
+                       string fileNameRoot = getRootName(inputFileNames[p]);
+                       globaldata->inputFileName = inputFileNames[p];
                         
-                               rCurve = new Rarefact(order, rDisplays);
-                               rCurve->getCurve(freq, nIters);
-                               delete rCurve;
+                       if (inputFileNames.size() > 1) {
+                               mothurOutEndLine(); mothurOut("Processing group " + groups[p]); mothurOutEndLine(); mothurOutEndLine();
+                       }
+                       int i;
+                       validCalculator = new ValidCalculators();
                         
-                               mothurOut(order->getLabel()); mothurOutEndLine();
-                               processedLabels.insert(order->getLabel());
-                               userLabels.erase(order->getLabel());
+                       
+                       for (i=0; i<Estimators.size(); i++) {
+                               if (validCalculator->isValidCalculator("rarefaction", Estimators[i]) == true) { 
+                                       if (Estimators[i] == "sobs") { 
+                                               rDisplays.push_back(new RareDisplay(new Sobs(), new ThreeColumnFile(fileNameRoot+"rarefaction")));
+                                       }else if (Estimators[i] == "chao") { 
+                                               rDisplays.push_back(new RareDisplay(new Chao1(), new ThreeColumnFile(fileNameRoot+"r_chao")));
+                                       }else if (Estimators[i] == "ace") { 
+                                               if(abund < 5)
+                                                       abund = 10;
+                                               rDisplays.push_back(new RareDisplay(new Ace(abund), new ThreeColumnFile(fileNameRoot+"r_ace")));
+                                       }else if (Estimators[i] == "jack") { 
+                                               rDisplays.push_back(new RareDisplay(new Jackknife(), new ThreeColumnFile(fileNameRoot+"r_jack")));
+                                       }else if (Estimators[i] == "shannon") { 
+                                               rDisplays.push_back(new RareDisplay(new Shannon(), new ThreeColumnFile(fileNameRoot+"r_shannon")));
+                                       }else if (Estimators[i] == "npshannon") { 
+                                               rDisplays.push_back(new RareDisplay(new NPShannon(), new ThreeColumnFile(fileNameRoot+"r_npshannon")));
+                                       }else if (Estimators[i] == "simpson") { 
+                                               rDisplays.push_back(new RareDisplay(new Simpson(), new ThreeColumnFile(fileNameRoot+"r_simpson")));
+                                       }else if (Estimators[i] == "bootstrap") { 
+                                               rDisplays.push_back(new RareDisplay(new Bootstrap(), new ThreeColumnFile(fileNameRoot+"r_bootstrap")));
+                                       }else if (Estimators[i] == "coverage") { 
+                                               rDisplays.push_back(new RareDisplay(new Coverage(), new ThreeColumnFile(fileNameRoot+"r_coverage")));
+                                       }else if (Estimators[i] == "nseqs") { 
+                                               rDisplays.push_back(new RareDisplay(new NSeqs(), new ThreeColumnFile(fileNameRoot+"r_nseqs")));
+                                       }
+                               }
                         }
                         
-                       if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                               string saveLabel = order->getLabel();
+                       
+                       //if the users entered no valid calculators don't execute command
+                       if (rDisplays.size() == 0) { return 0; }
+                       
+                       read = new ReadOTUFile(globaldata->inputFileName);      
+                       read->read(&*globaldata); 
+                       
+                       order = globaldata->gorder;
+                       string lastLabel = order->getLabel();
+                       input = globaldata->ginput;
+                       
+                       //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+                       set<string> processedLabels;
+                       set<string> userLabels = labels;
+                       
+                       //as long as you are not at the end of the file or done wih the lines you want
+                       while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                               
+                               if(allLines == 1 || labels.count(order->getLabel()) == 1){
+                                       
+                                       rCurve = new Rarefact(order, rDisplays);
+                                       rCurve->getCurve(freq, nIters);
+                                       delete rCurve;
+                                       
+                                       mothurOut(order->getLabel()); mothurOutEndLine();
+                                       processedLabels.insert(order->getLabel());
+                                       userLabels.erase(order->getLabel());
+                               }
+                               
+                               if ((anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = order->getLabel();
+                                       
+                                       delete order;
+                                       order = (input->getOrderVector(lastLabel));
+                                       
+                                       rCurve = new Rarefact(order, rDisplays);
+                                       rCurve->getCurve(freq, nIters);
+                                       delete rCurve;
+                                       
+                                       mothurOut(order->getLabel()); mothurOutEndLine();
+                                       processedLabels.insert(order->getLabel());
+                                       userLabels.erase(order->getLabel());
+                                       
+                                       //restore real lastlabel to save below
+                                       order->setLabel(saveLabel);
+                               }
+                               
+                               lastLabel = order->getLabel();          
                                 
                                 delete order;
+                               order = (input->getOrderVector());
+                       }
+                       
+                       //output error messages about any remaining user labels
+                       set<string>::iterator it;
+                       bool needToRun = false;
+                       for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                               mothurOut("Your file does not include the label " + *it);
+                               if (processedLabels.count(lastLabel) != 1) {
+                                       mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                                       needToRun = true;
+                               }else {
+                                       mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+                               }
+                       }
+                       
+                       //run last label if you need to
+                       if (needToRun == true)  {
+                               if (order != NULL) {    delete order;   }
                                 order = (input->getOrderVector(lastLabel));
                                 
                                 rCurve = new Rarefact(order, rDisplays);
                                 rCurve->getCurve(freq, nIters);
                                 delete rCurve;
-                       
-                               mothurOut(order->getLabel()); mothurOutEndLine();
-                               processedLabels.insert(order->getLabel());
-                               userLabels.erase(order->getLabel());
                                 
-                               //restore real lastlabel to save below
-                               order->setLabel(saveLabel);
+                               mothurOut(order->getLabel()); mothurOutEndLine();
+                               delete order;
                         }
                         
-                       lastLabel = order->getLabel();          
                         
-                       delete order;
-                       order = (input->getOrderVector());
+                       for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
+                       rDisplays.clear();
+                       globaldata->gorder = NULL;
+                       delete input;  globaldata->ginput = NULL;
+                       delete read;
+                       delete validCalculator;
+                       
                 }
                 
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       mothurOut("Your file does not include the label " + *it);
-                       if (processedLabels.count(lastLabel) != 1) {
-                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
-                               needToRun = true;
-                       }else {
-                               mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
-                       }
-               }
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "RareFactCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> RareFactCommand::parseSharedFile(string filename) {
+       try {
+               vector<string> filenames;
+               
+               map<string, ofstream*> filehandles;
+               map<string, ofstream*>::iterator it3;
                 
-               //run last label if you need to
-               if (needToRun == true)  {
-                       if (order != NULL) {    delete order;   }
-                       order = (input->getOrderVector(lastLabel));
                                 
-                       rCurve = new Rarefact(order, rDisplays);
-                       rCurve->getCurve(freq, nIters);
-                       delete rCurve;
+               //read first line
+               read = new ReadOTUFile(filename);       
+               read->read(&*globaldata); 
                         
-                       mothurOut(order->getLabel()); mothurOutEndLine();
-                       delete order;
+               input = globaldata->ginput;
+               vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+               
+               string sharedFileRoot = getRootName(filename);
+               
+               //clears file before we start to write to it below
+               for (int i=0; i<lookup.size(); i++) {
+                       remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
+                       filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
                 }
                 
+               ofstream* temp;
+               for (int i=0; i<lookup.size(); i++) {
+                       temp = new ofstream;
+                       filehandles[lookup[i]->getGroup()] = temp;
+                       groups.push_back(lookup[i]->getGroup());
+               }
  
-               for(int i=0;i<rDisplays.size();i++){    delete rDisplays[i];    }       
-               return 0;
+               while(lookup[0] != NULL) {
+               
+                       for (int i = 0; i < lookup.size(); i++) {
+                               RAbundVector rav = lookup[i]->getRAbundVector();
+                               openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
+                               rav.print(*(filehandles[lookup[i]->getGroup()]));
+                               (*(filehandles[lookup[i]->getGroup()])).close();
+                       }
+               
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input->getSharedRAbundVectors();
+               }
+               
+               //free memory
+               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                       delete it3->second;
+               }
+               delete read;
+               delete input;
+               globaldata->ginput = NULL;
+
+               return filenames;
         }
         catch(exception& e) {
-               errorOut(e, "RareFactCommand", "execute");
+               errorOut(e, "RareFactCommand", "parseSharedFile");
                 exit(1);
         }
  }
-
  //**********************************************************************************************************************
+
+
+
diff --git a/rarefactcommand.h b/rarefactcommand.h

index 88b0ade9c51824059f387e1c72b6564b3046d453..3213af4d6b8ea66fd9c50bbfa7ce0d7efcf85327 100644 (file)
--- a/rarefactcommand.h
+++ b/rarefactcommand.h
@@ -42,6 +42,11 @@ private:
         set<string> labels; //holds labels to be used
         string label, calc;
         vector<string>  Estimators;
+       vector<string> inputFileNames;
+       vector<string> groups;
+       
+       vector<string> parseSharedFile(string);
+
  
  };
  
diff --git a/summarycommand.cpp b/summarycommand.cpp

index 51fabf88828eb10617595b10cf8bf6a95396212f..afb4758a29b3831cd7958583344aa223fec3e223 100644 (file)
--- a/summarycommand.cpp
+++ b/summarycommand.cpp
@@ -40,7 +40,7 @@ SummaryCommand::SummaryCommand(string option){
                 Estimators.clear();
                 
                 //allow user to run help
-               if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
+               if(option == "help") { validCalculator = new ValidCalculators(); help(); delete validCalculator; abort = true; }
                 
                 else {
                         //valid paramters for this command
@@ -58,8 +58,8 @@ SummaryCommand::SummaryCommand(string option){
                         }
                         
                         //make sure the user has already run the read.otu command
-                       if ((globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund or rabund before you can use the summary.single command."); mothurOutEndLine(); abort = true; }
-                       
+                       if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { mothurOut("You must read a list, sabund, rabund or shared file before you can use the summary.single command."); mothurOutEndLine(); abort = true; }
+       
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
                         label = validParameter.validFile(parameters, "label", false);                   
@@ -89,62 +89,7 @@ SummaryCommand::SummaryCommand(string option){
                         temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
                         convert(temp, size); 
         
-                       if (abort == false) {
-                       
-                               validCalculator = new ValidCalculators();
-                               int i;
-                               
-                               for (i=0; i<Estimators.size(); i++) {
-                                       if (validCalculator->isValidCalculator("summary", Estimators[i]) == true) { 
-                                               if(Estimators[i] == "sobs"){
-                                                       sumCalculators.push_back(new Sobs());
-                                               }else if(Estimators[i] == "chao"){
-                                                       sumCalculators.push_back(new Chao1());
-                                               }else if(Estimators[i] == "coverage"){
-                                                       sumCalculators.push_back(new Coverage());
-                                               }else if(Estimators[i] == "geometric"){
-                                                       sumCalculators.push_back(new Geom());
-                                               }else if(Estimators[i] == "logseries"){
-                                                       sumCalculators.push_back(new LogSD());
-                                               }else if(Estimators[i] == "qstat"){
-                                                       sumCalculators.push_back(new QStat());
-                                               }else if(Estimators[i] == "bergerparker"){
-                                                       sumCalculators.push_back(new BergerParker());
-                                               }else if(Estimators[i] == "bstick"){
-                                                       sumCalculators.push_back(new BStick());
-                                               }else if(Estimators[i] == "ace"){
-                                                       if(abund < 5)
-                                                               abund = 10;
-                                                       sumCalculators.push_back(new Ace(abund));
-                                               }else if(Estimators[i] == "jack"){
-                                                       sumCalculators.push_back(new Jackknife());
-                                               }else if(Estimators[i] == "shannon"){
-                                                       sumCalculators.push_back(new Shannon());
-                                               }else if(Estimators[i] == "npshannon"){
-                                                       sumCalculators.push_back(new NPShannon());
-                                               }else if(Estimators[i] == "simpson"){
-                                                       sumCalculators.push_back(new Simpson());
-                                               }else if(Estimators[i] == "bootstrap"){
-                                                       sumCalculators.push_back(new Bootstrap());
-                                               }else if (Estimators[i] == "nseqs") { 
-                                                       sumCalculators.push_back(new NSeqs());
-                                               }else if (Estimators[i] == "goodscoverage") { 
-                                                       sumCalculators.push_back(new GoodsCoverage());
-                                               }else if (Estimators[i] == "efron") { 
-                                                       sumCalculators.push_back(new Efron(size));
-                                               }else if (Estimators[i] == "boneh") { 
-                                                       sumCalculators.push_back(new Boneh(size));
-                                               }else if (Estimators[i] == "solow") { 
-                                                       sumCalculators.push_back(new Solow(size));
-                                               }else if (Estimators[i] == "shen") { 
-                                                       sumCalculators.push_back(new Shen(size, abund));
-                                               }
-                                       }
-                               }
-                       }
                 }
-
-                               
         }
         catch(exception& e) {
                 errorOut(e, "SummaryCommand", "SummaryCommand");
@@ -174,14 +119,7 @@ void SummaryCommand::help(){
  
  //**********************************************************************************************************************
  
-SummaryCommand::~SummaryCommand(){
-       if (abort == false) {
-               delete input;  globaldata->ginput = NULL;
-               delete read;
-               delete validCalculator;
-               globaldata->sabund = NULL;
-       }
-}
+SummaryCommand::~SummaryCommand(){}
  
  //**********************************************************************************************************************
  
@@ -190,61 +128,161 @@ int SummaryCommand::execute(){
         
                 if (abort == true) { return 0; }
                 
-               //if the users entered no valid calculators don't execute command
-               if (sumCalculators.size() == 0) { return 0; }
-
-               outputFileName = ((getRootName(globaldata->inputFileName)) + "summary");
-               openOutputFile(outputFileName, outputFileHandle);
-               outputFileHandle << "label";
-       
-               read = new ReadOTUFile(globaldata->inputFileName);      
-               read->read(&*globaldata); 
-               
-               sabund = globaldata->sabund;
-               string lastLabel = sabund->getLabel();
-               input = globaldata->ginput;
+               if ((globaldata->getFormat() != "sharedfile")) { inputFileNames.push_back(globaldata->inputFileName);  }
+               else {  inputFileNames = parseSharedFile(globaldata->getSharedFile());  globaldata->setFormat("rabund");  }
                 
-               for(int i=0;i<sumCalculators.size();i++){
-                       if(sumCalculators[i]->getCols() == 1){
-                               outputFileHandle << '\t' << sumCalculators[i]->getName();
+               for (int p = 0; p < inputFileNames.size(); p++) {
+                       
+                       string fileNameRoot = getRootName(inputFileNames[p]) + "summary";
+                       globaldata->inputFileName = inputFileNames[p];
+                       
+                       if (inputFileNames.size() > 1) {
+                               mothurOutEndLine(); mothurOut("Processing group " + groups[p]); mothurOutEndLine(); mothurOutEndLine();
                         }
-                       else{
-                               outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
+                       
+                       
+                       validCalculator = new ValidCalculators();
+                       
+                       for (int i=0; i<Estimators.size(); i++) {
+                               if (validCalculator->isValidCalculator("summary", Estimators[i]) == true) { 
+                                       if(Estimators[i] == "sobs"){
+                                               sumCalculators.push_back(new Sobs());
+                                       }else if(Estimators[i] == "chao"){
+                                               sumCalculators.push_back(new Chao1());
+                                       }else if(Estimators[i] == "coverage"){
+                                               sumCalculators.push_back(new Coverage());
+                                       }else if(Estimators[i] == "geometric"){
+                                               sumCalculators.push_back(new Geom());
+                                       }else if(Estimators[i] == "logseries"){
+                                               sumCalculators.push_back(new LogSD());
+                                       }else if(Estimators[i] == "qstat"){
+                                               sumCalculators.push_back(new QStat());
+                                       }else if(Estimators[i] == "bergerparker"){
+                                               sumCalculators.push_back(new BergerParker());
+                                       }else if(Estimators[i] == "bstick"){
+                                               sumCalculators.push_back(new BStick());
+                                       }else if(Estimators[i] == "ace"){
+                                               if(abund < 5)
+                                                       abund = 10;
+                                               sumCalculators.push_back(new Ace(abund));
+                                       }else if(Estimators[i] == "jack"){
+                                               sumCalculators.push_back(new Jackknife());
+                                       }else if(Estimators[i] == "shannon"){
+                                               sumCalculators.push_back(new Shannon());
+                                       }else if(Estimators[i] == "npshannon"){
+                                               sumCalculators.push_back(new NPShannon());
+                                       }else if(Estimators[i] == "simpson"){
+                                               sumCalculators.push_back(new Simpson());
+                                       }else if(Estimators[i] == "bootstrap"){
+                                               sumCalculators.push_back(new Bootstrap());
+                                       }else if (Estimators[i] == "nseqs") { 
+                                               sumCalculators.push_back(new NSeqs());
+                                       }else if (Estimators[i] == "goodscoverage") { 
+                                               sumCalculators.push_back(new GoodsCoverage());
+                                       }else if (Estimators[i] == "efron") { 
+                                               sumCalculators.push_back(new Efron(size));
+                                       }else if (Estimators[i] == "boneh") { 
+                                               sumCalculators.push_back(new Boneh(size));
+                                       }else if (Estimators[i] == "solow") { 
+                                               sumCalculators.push_back(new Solow(size));
+                                       }else if (Estimators[i] == "shen") { 
+                                               sumCalculators.push_back(new Shen(size, abund));
+                                       }
+                               }
                         }
-               }
-               outputFileHandle << endl;
-               
-               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
-               set<string> processedLabels;
-               set<string> userLabels = labels;
                         
-               while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       //if the users entered no valid calculators don't execute command
+                       if (sumCalculators.size() == 0) { return 0; }
                         
-                       if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
-       
-                               mothurOut(sabund->getLabel()); mothurOutEndLine();
-                               processedLabels.insert(sabund->getLabel());
-                               userLabels.erase(sabund->getLabel());
-                                                               
-                               outputFileHandle << sabund->getLabel();
-                               for(int i=0;i<sumCalculators.size();i++){
-                                       vector<double> data = sumCalculators[i]->getValues(sabund);
-                                       outputFileHandle << '\t';
-                                       sumCalculators[i]->print(outputFileHandle);
+                       ofstream outputFileHandle;
+                       openOutputFile(fileNameRoot, outputFileHandle);
+                       outputFileHandle << "label";
+                       
+                       read = new ReadOTUFile(globaldata->inputFileName);      
+                       read->read(&*globaldata); 
+                       
+                       sabund = globaldata->sabund;
+                       string lastLabel = sabund->getLabel();
+                       input = globaldata->ginput;
+                       
+                       for(int i=0;i<sumCalculators.size();i++){
+                               if(sumCalculators[i]->getCols() == 1){
+                                       outputFileHandle << '\t' << sumCalculators[i]->getName();
+                               }
+                               else{
+                                       outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";
                                 }
-                               outputFileHandle << endl;
                         }
+                       outputFileHandle << endl;
+                       
+                       //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+                       set<string> processedLabels;
+                       set<string> userLabels = labels;
                         
-                       if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
-                               string saveLabel = sabund->getLabel();
+                       while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                               
+                               if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
+                                       
+                                       mothurOut(sabund->getLabel()); mothurOutEndLine();
+                                       processedLabels.insert(sabund->getLabel());
+                                       userLabels.erase(sabund->getLabel());
+                                       
+                                       outputFileHandle << sabund->getLabel();
+                                       for(int i=0;i<sumCalculators.size();i++){
+                                               vector<double> data = sumCalculators[i]->getValues(sabund);
+                                               outputFileHandle << '\t';
+                                               sumCalculators[i]->print(outputFileHandle);
+                                       }
+                                       outputFileHandle << endl;
+                               }
+                               
+                               if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                                       string saveLabel = sabund->getLabel();
+                                       
+                                       delete sabund;
+                                       sabund = input->getSAbundVector(lastLabel);
+                                       
+                                       mothurOut(sabund->getLabel()); mothurOutEndLine();
+                                       processedLabels.insert(sabund->getLabel());
+                                       userLabels.erase(sabund->getLabel());
+                                       
+                                       outputFileHandle << sabund->getLabel();
+                                       for(int i=0;i<sumCalculators.size();i++){
+                                               vector<double> data = sumCalculators[i]->getValues(sabund);
+                                               outputFileHandle << '\t';
+                                               sumCalculators[i]->print(outputFileHandle);
+                                       }
+                                       outputFileHandle << endl;
+                                       
+                                       //restore real lastlabel to save below
+                                       sabund->setLabel(saveLabel);
+                               }               
+                               
+                               lastLabel = sabund->getLabel();                 
                                 
                                 delete sabund;
+                               sabund = input->getSAbundVector();
+                       }
+                       
+                       //output error messages about any remaining user labels
+                       set<string>::iterator it;
+                       bool needToRun = false;
+                       for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                               mothurOut("Your file does not include the label " + *it); 
+                               if (processedLabels.count(lastLabel) != 1) {
+                                       mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                                       needToRun = true;
+                               }else {
+                                       mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+                               }
+                       }
+                       
+                       //run last label if you need to
+                       if (needToRun == true)  {
+                               if (sabund != NULL) {   delete sabund;  }
                                 sabund = input->getSAbundVector(lastLabel);
                                 
                                 mothurOut(sabund->getLabel()); mothurOutEndLine();
-                               processedLabels.insert(sabund->getLabel());
-                               userLabels.erase(sabund->getLabel());
-                               
                                 outputFileHandle << sabund->getLabel();
                                 for(int i=0;i<sumCalculators.size();i++){
                                         vector<double> data = sumCalculators[i]->getValues(sabund);
@@ -252,54 +290,81 @@ int SummaryCommand::execute(){
                                         sumCalculators[i]->print(outputFileHandle);
                                 }
                                 outputFileHandle << endl;
+                               delete sabund;
+                       }
+                       
+                       outputFileHandle.close();
+                       
+                       delete input;  globaldata->ginput = NULL;
+                       delete read;
+                       delete validCalculator;
+                       globaldata->sabund = NULL;
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "SummaryCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SummaryCommand::parseSharedFile(string filename) {
+       try {
+               vector<string> filenames;
+               
+               map<string, ofstream*> filehandles;
+               map<string, ofstream*>::iterator it3;
+               
                                 
-                               //restore real lastlabel to save below
-                               sabund->setLabel(saveLabel);
-                       }               
-
-                       lastLabel = sabund->getLabel();                 
+               //read first line
+               read = new ReadOTUFile(filename);       
+               read->read(&*globaldata); 
                         
-                       delete sabund;
-                       sabund = input->getSAbundVector();
+               input = globaldata->ginput;
+               vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+               
+               string sharedFileRoot = getRootName(filename);
+               
+               //clears file before we start to write to it below
+               for (int i=0; i<lookup.size(); i++) {
+                       remove((sharedFileRoot + lookup[i]->getGroup() + ".rabund").c_str());
+                       filenames.push_back((sharedFileRoot + lookup[i]->getGroup() + ".rabund"));
                 }
                 
-               //output error messages about any remaining user labels
-               set<string>::iterator it;
-               bool needToRun = false;
-               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
-                       mothurOut("Your file does not include the label " + *it); 
-                       if (processedLabels.count(lastLabel) != 1) {
-                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
-                               needToRun = true;
-                       }else {
-                               mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
-                       }
+               ofstream* temp;
+               for (int i=0; i<lookup.size(); i++) {
+                       temp = new ofstream;
+                       filehandles[lookup[i]->getGroup()] = temp;
+                       groups.push_back(lookup[i]->getGroup());
                 }
+
+               while(lookup[0] != NULL) {
                 
-               //run last label if you need to
-               if (needToRun == true)  {
-                       if (sabund != NULL) {   delete sabund;  }
-                       sabund = input->getSAbundVector(lastLabel);
-                       
-                       mothurOut(sabund->getLabel()); mothurOutEndLine();
-                       outputFileHandle << sabund->getLabel();
-                       for(int i=0;i<sumCalculators.size();i++){
-                               vector<double> data = sumCalculators[i]->getValues(sabund);
-                               outputFileHandle << '\t';
-                               sumCalculators[i]->print(outputFileHandle);
+                       for (int i = 0; i < lookup.size(); i++) {
+                               RAbundVector rav = lookup[i]->getRAbundVector();
+                               openOutputFileAppend(sharedFileRoot + lookup[i]->getGroup() + ".rabund", *(filehandles[lookup[i]->getGroup()]));
+                               rav.print(*(filehandles[lookup[i]->getGroup()]));
+                               (*(filehandles[lookup[i]->getGroup()])).close();
                         }
-                       outputFileHandle << endl;
-                       delete sabund;
-               }
                 
-               outputFileHandle.close();
+                       for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
+                       lookup = input->getSharedRAbundVectors();
+               }
                 
-               return 0;
+               //free memory
+               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                       delete it3->second;
+               }
+               delete read;
+               delete input;
+               globaldata->ginput = NULL;
+
+               return filenames;
         }
         catch(exception& e) {
-               errorOut(e, "SummaryCommand", "execute");
+               errorOut(e, "SummaryCommand", "parseSharedFile");
                 exit(1);
         }
  }
-
  //**********************************************************************************************************************
diff --git a/summarycommand.h b/summarycommand.h

index 6ee8c4a522d517b563318d92b840f708b20603e5..1258c650179a73afd9bcc85e56bfee92dca1d996 100644 (file)
--- a/summarycommand.h
+++ b/summarycommand.h
@@ -33,14 +33,17 @@ private:
         InputData* input;
         ValidCalculators* validCalculator;
         SAbundVector* sabund;
-       string outputFileName;
-       ofstream outputFileHandle;
         int abund, size;
  
         bool abort, allLines;
         set<string> labels; //holds labels to be used
         string label, calc;
         vector<string>  Estimators;
+       vector<string> inputFileNames;
+       vector<string> groups;
+       
+       vector<string> parseSharedFile(string);
+
  
  };
  #endif
diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp

index b551e5f20f7a4ed326dbb1d40b7ce9913826a2b3..1512284544b0e7d4cb7ba6ddac4663bf60d6d777 100644 (file)
--- a/unifracunweightedcommand.cpp
+++ b/unifracunweightedcommand.cpp
@@ -298,7 +298,11 @@ void UnifracUnweightedCommand::createPhylipFile(int i) {
                 //output to file
                 for (int r=0; r<globaldata->Groups.size(); r++) { 
                         //output name
-                       out << globaldata->Groups[r] << '\t';
+                       string name = globaldata->Groups[r];
+                       if (name.length() < 10) { //pad with spaces to make compatible
+                               while (name.length() < 10) {  name += " ";  }
+                       }
+                       out << name << '\t';
                         
                         //output distances
                         for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp

index f9cdd5a86458c6f3039e4392e6e8b27f7d5eaa08..ea79475038dc3abf2cbbc5be8eaaa750e3489bc1 100644 (file)
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -308,7 +308,11 @@ void UnifracWeightedCommand::createPhylipFile() {
                         //output to file
                         for (int r=0; r<globaldata->Groups.size(); r++) { 
                                 //output name
-                               out << globaldata->Groups[r] << '\t';
+                               string name = globaldata->Groups[r];
+                               if (name.length() < 10) { //pad with spaces to make compatible
+                                       while (name.length() < 10) {  name += " ";  }
+                               }
+                               out << name << '\t';
                                 
                                 //output distances
                                 for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
author	westcott <westcott>
	Tue, 12 Jan 2010 19:00:34 +0000 (19:00 +0000)
committer	westcott <westcott>
	Tue, 12 Jan 2010 19:00:34 +0000 (19:00 +0000)
aligncommand.cpp		patch \| blob \| history
aligncommand.h		patch \| blob \| history
bayesian.cpp		patch \| blob \| history
bayesian.h		patch \| blob \| history
classify.cpp		patch \| blob \| history
classify.h		patch \| blob \| history
classifyseqscommand.cpp		patch \| blob \| history
classifyseqscommand.h		patch \| blob \| history
collectcommand.cpp		patch \| blob \| history
collectcommand.h		patch \| blob \| history
distancecommand.cpp		patch \| blob \| history
hcluster.cpp		patch \| blob \| history
hcluster.h		patch \| blob \| history
hclustercommand.cpp		patch \| blob \| history
mergefilecommand.cpp		patch \| blob \| history
mgclustercommand.cpp		patch \| blob \| history
mothur.h		patch \| blob \| history
phylotree.cpp		patch \| blob \| history
phylotree.h		patch \| blob \| history
rarefactcommand.cpp		patch \| blob \| history
rarefactcommand.h		patch \| blob \| history
summarycommand.cpp		patch \| blob \| history
summarycommand.h		patch \| blob \| history
unifracunweightedcommand.cpp		patch \| blob \| history
unifracweightedcommand.cpp		patch \| blob \| history