major change to the tree class to use the count table class instead of tree map....

author Sarah Westcott <mothur.westcott@gmail.com>

Fri, 7 Sep 2012 18:30:06 +0000 (14:30 -0400)

committer Sarah Westcott <mothur.westcott@gmail.com>

Fri, 7 Sep 2012 18:30:06 +0000 (14:30 -0400)
author Sarah Westcott <mothur.westcott@gmail.com>
Fri, 7 Sep 2012 18:30:06 +0000 (14:30 -0400)
committer Sarah Westcott <mothur.westcott@gmail.com>
Fri, 7 Sep 2012 18:30:06 +0000 (14:30 -0400)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index 4c2e330a280e84434f269e5ee08d133a8fd8c2a4..37932a428f83748dd8df69d92e1fc88ffb5077ef 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -2286,8 +2286,8 @@
                                 GCC_MODEL_TUNING = "";
                                 GCC_OPTIMIZATION_LEVEL = 0;
                                 GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.26.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
+                                       "VERSION=\"\\\"1.27.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"8/8/2012\\\"\"",
                                 );
                                 GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                 GCC_WARN_ABOUT_RETURN_TYPE = YES;
diff --git a/aligncommand.cpp b/aligncommand.cpp

index a68fbfcba365207d7847c7eeaff12d9e4a4914e2..efc8ce489e79ac5e1d17084f5e952b39590894ba 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -572,7 +572,6 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                 if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
                                         alignment->resize(candidateSeq->getUnaligned().length()+1);
                                 }
-                                                               
                                 Sequence temp = templateDB->findClosestSequence(candidateSeq);
                                 Sequence* templateSeq = &temp;
                                 
diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp

index 0c867e35f730cc8e72c66153b14c23b5f54ce6f3..ad71d1065fb32904b365a7cd2be46a28718a7d98 100644 (file)
--- a/binsequencecommand.cpp
+++ b/binsequencecommand.cpp
@@ -15,8 +15,9 @@ vector<string> BinSeqCommand::setParameters(){
         try {
                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -34,7 +35,7 @@ vector<string> BinSeqCommand::setParameters(){
  string BinSeqCommand::getHelpString(){ 
         try {
                 string helpString = "";
-               helpString += "The bin.seqs command parameters are list, fasta, name, label and group.  The fasta and list are required, unless you have a valid current list and fasta file.\n";
+               helpString += "The bin.seqs command parameters are list, fasta, name, count, label and group.  The fasta and list are required, unless you have a valid current list and fasta file.\n";
                 helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n";
                 helpString += "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
                 helpString += "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, name=amazon.names).\n";
@@ -147,6 +148,14 @@ BinSeqCommand::BinSeqCommand(string option) {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
  
                         
@@ -195,11 +204,26 @@ BinSeqCommand::BinSeqCommand(string option) {
                         if (groupfile == "not open") { abort = true; }
                         else if (groupfile == "not found") { groupfile = ""; }
                         else { m->setGroupFile(groupfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namesfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
                         
-                       if (namesfile == ""){
-                               vector<string> files; files.push_back(fastafile); 
-                               parser.getNameFile(files);
-                       }
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+                       
+            if (countfile == "") {
+                if (namesfile == ""){
+                    vector<string> files; files.push_back(fastafile); 
+                    parser.getNameFile(files);
+                }
+            }
                         
                 }
         }
@@ -229,9 +253,8 @@ int BinSeqCommand::execute(){
                 fasta->readFastaFile(fastafile);
                 
                 //if user gave a namesfile then use it
-               if (namesfile != "") {
-                       readNamesFile();
-               }
+               if (namesfile != "") {  readNamesFile();  }
+        if (countfile != "") {  ct.readTable(countfile);  }
                 
                 input = new InputData(listfile, "list");
                 list = input->getListVector();
@@ -362,7 +385,7 @@ void BinSeqCommand::readNamesFile() {
  //return 1 if error, 0 otherwise
  int BinSeqCommand::process(ListVector* list) {
         try {
-        string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + getOutputFileNameTag("fasta");
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + "." + getOutputFileNameTag("fasta");
          m->openOutputFile(outputFileName, out);
          outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
          
@@ -381,24 +404,47 @@ int BinSeqCommand::process(ListVector* list) {
                  
                  //do work for that name
                  string sequence = fasta->getSequence(name);
-                if (sequence != "not found") {
-                    //if you don't have groups
-                    if (groupfile == "") {
-                        name = name + "\t" + toString(i+1);
-                        out << ">" << name << endl;
-                        out << sequence << endl;
-                    }else {//if you do have groups
-                        string group = groupMap->getGroup(name);
-                        if (group == "not found") {  
-                            m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
-                            return 1;
-                        }else{
-                            name = name + "\t" + group + "\t" + toString(i+1);
+                
+                if (countfile != "") {
+                    if (sequence != "not found") {
+                        if (ct.hasGroupInfo()) {
+                            vector<string> groups = ct.getGroups(name);
+                            string groupInfo = "";
+                            for (int k = 0; k < groups.size()-1; k++) {
+                                groupInfo += groups[k] + "-";
+                            }
+                            if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; }
+                            else { groupInfo = "not found";  }
+                            name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name));
+                            out << ">" << name << endl;
+                            out << sequence << endl;
+                        }else {
+                            name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name));
+                            out << ">" << name << endl;
+                            out << sequence << endl;
+                        }
+                        
+                    }else { m->mothurOut(name + " is missing from your fasta. Does your list file contain all sequence names or just the uniques?"); m->mothurOutEndLine(); return 1; }
+                }else {
+                    if (sequence != "not found") {
+                        //if you don't have groups
+                        if (groupfile == "") {
+                            name = name + "\t" + toString(i+1);
                              out << ">" << name << endl;
                              out << sequence << endl;
+                        }else {//if you do have groups
+                            string group = groupMap->getGroup(name);
+                            if (group == "not found") {  
+                                m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
+                                return 1;
+                            }else{
+                                name = name + "\t" + group + "\t" + toString(i+1);
+                                out << ">" << name << endl;
+                                out << sequence << endl;
+                            }
                          }
-                    }
-                }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
+                    }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
+                }
              }
          }
          
diff --git a/binsequencecommand.h b/binsequencecommand.h

index 1fb5664b0c018c4120881055014ec08a4e6d5559..5bdd401f9411e4abb21aa6a12284bfa98f4b5bda 100644 (file)
--- a/binsequencecommand.h
+++ b/binsequencecommand.h
@@ -16,6 +16,7 @@
  #include "listvector.hpp"
  #include "fastamap.h"
  #include "groupmap.h"
+#include "counttable.h"
  
  class BinSeqCommand : public Command {
         
@@ -36,14 +37,14 @@ public:
         void help() { m->mothurOut(getHelpString()); }          
         
  private:
-       
+       CountTable ct;
         ListVector* list;
         InputData* input;
         FastaMap* fasta;
         GroupMap* groupMap;
         bool abort, allLines;
         set<string> labels; //holds labels to be used
-       string filename, fastafile, listfile, namesfile, groupfile, label, outputDir;
+       string filename, fastafile, listfile, namesfile, groupfile, countfile, label, outputDir;
         ofstream out;
         ifstream in, inNames;
         vector<string> outputNames;
diff --git a/chimeraperseuscommand.cpp b/chimeraperseuscommand.cpp

index e57c48a7e168cb0b1afa719e425be7c423b5aa62..7ae5d69141979736bcb2b6fcfb25e1ba4682086d 100644 (file)
--- a/chimeraperseuscommand.cpp
+++ b/chimeraperseuscommand.cpp
@@ -295,7 +295,7 @@ ChimeraPerseusCommand::ChimeraPerseusCommand(string option)  {
                                         bool ignore = false;
                                         if (countfileNames[i] == "current") { 
                                                 countfileNames[i] = m->getCountTableFile(); 
-                                               if (nameFileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+                                               if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
                                                 else {  
                                                         m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
                                                         //erase from file list
diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp

index 00ae690214177d94020069986aedbbb0cb118b9e..660d53c4bdf05c3249b74d84b7a2ba1eb7e5e50e 100644 (file)
--- a/classifyotucommand.cpp
+++ b/classifyotucommand.cpp
@@ -17,8 +17,9 @@ vector<string> ClassifyOtuCommand::setParameters(){
                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                 CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
                 CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
@@ -39,11 +40,12 @@ vector<string> ClassifyOtuCommand::setParameters(){
  string ClassifyOtuCommand::getHelpString(){    
         try {
                 string helpString = "";
-               helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
+               helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs.  The taxonomy and list parameters are required unless you have a valid current file.\n";
                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
                 helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
                 helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
-               helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
+               helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
+        helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
                 helpString += "For example consider the following basis=sequence could give Clostridiales       3       105     16      43      46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
                 helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
                 helpString += "Now for basis=otu could give Clostridiales       3       7       6       1       2, where 7 is the number of otus that classified to Clostridiales.\n";
@@ -172,6 +174,14 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
  
                         
@@ -211,6 +221,20 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                         if (groupfile == "not open") { abort = true; }  
                         else if (groupfile == "not found") { groupfile = ""; }
                         else { m->setGroupFile(groupfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                         
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
@@ -235,10 +259,12 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                         
                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
                         
-                       if (namefile == ""){
-                               vector<string> files; files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if (namefile == ""){
+                    vector<string> files; files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                         
                 }
         }
@@ -255,7 +281,11 @@ int ClassifyOtuCommand::execute(){
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 
                 //if user gave a namesfile then use it
-               if (namefile != "") {   m->readNames(namefile, nameMap, true);  }
+               if (namefile != "")     {       m->readNames(namefile, nameMap, true);  }
+        if (groupfile != "")    {   groupMap = new GroupMap(groupfile);  groupMap->readMap(); }
+        else { groupMap = NULL;  }
+        if (countfile != "") {  ct = new CountTable(); ct->readTable(countfile);    }
+        else {  ct = NULL;    }
                 
                 //read taxonomy file and save in map for easy access in building bin trees
                 m->readTax(taxfile, taxMap);
@@ -270,7 +300,7 @@ int ClassifyOtuCommand::execute(){
                 set<string> processedLabels;
                 set<string> userLabels = labels;
                 
-               if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }  return 0; }
+               if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {      m->mothurRemove(outputNames[i]);  }  return 0; }
         
                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                         
@@ -278,7 +308,7 @@ int ClassifyOtuCommand::execute(){
                         
                                         m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
                                         process(list);
-                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
                                                                                 
                                         processedLabels.insert(list->getLabel());
                                         userLabels.erase(list->getLabel());
@@ -293,7 +323,7 @@ int ClassifyOtuCommand::execute(){
                                         process(list);
                                 
                                         
-                                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                                                                                 
                                         processedLabels.insert(list->getLabel());
                                         userLabels.erase(list->getLabel());
@@ -329,10 +359,12 @@ int ClassifyOtuCommand::execute(){
                         process(list);
                         delete list;
                         
-                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
+                       if (m->control_pressed) { outputTypes.clear();  if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } delete input; delete list; return 0; }
                 }
                 
                 delete input;  
+        if (groupMap != NULL) { delete groupMap; }
+        if (ct != NULL) { delete ct; }
                                 
                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
                 
@@ -400,10 +432,16 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* th
                                 if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
                                         m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
                                 }else{
+                    if (countfile != "") {
+                        int numDups = ct->getNumSeqs(names[i]); 
+                        for (int j = 0; j < numDups; j++) {  phylo->addSeqToTree(names[i], it->second);  }
+                        size += numDups;
+                    }else{
                                         //add seq to tree
-                                       phylo->addSeqToTree(names[i], it->second);
-                                       size++;
-                                       allNames.push_back(names[i]);
+                        phylo->addSeqToTree(names[i], it->second);
+                        size++;  
+                    }
+                    allNames.push_back(names[i]);
                                 }
                         }
  
@@ -486,24 +524,25 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                 if (outputDir == "") { outputDir += m->hasPath(listfile); }
                                 
                 ofstream out;
-               string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy");
+               string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("constaxonomy");
                 m->openOutputFile(outputFile, out);
                 outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
                 
                 ofstream outSum;
-               string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary");
+               string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("taxsummary");
                 m->openOutputFile(outputSumFile, outSum);
                 outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
                 
                 out << "OTU\tSize\tTaxonomy" << endl;
                 
                 PhyloSummary* taxaSum;
-               if (refTaxonomy != "") {
-                       taxaSum = new PhyloSummary(refTaxonomy, groupfile);
+        if (countfile != "") {
+            if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct);  }
+            else {  taxaSum = new PhyloSummary(ct); }
                 }else {
-                       taxaSum = new PhyloSummary(groupfile);
-               }
-               
+            if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap);  }
+            else {  taxaSum = new PhyloSummary(groupMap); }
+        }
  
                 //for each bin in the list vector
          string snumBins = toString(processList->getNumBins());
@@ -534,7 +573,34 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                         if (basis == "sequence") {
                                 for(int j = 0; j < names.size(); j++) {  taxaSum->addSeqToTree(names[j], noConfidenceConTax);  }
                         }else { //otu
-                               taxaSum->addSeqToTree(noConfidenceConTax, names);
+                map<string, bool> containsGroup; 
+                if (countfile != "") {
+                    if (ct->hasGroupInfo()) {
+                        vector<string> mGroups = ct->getNamesOfGroups();
+                        for (int k = 0; k < names.size(); k++) {
+                            vector<int> counts = ct->getGroupCounts(names[k]);
+                            for (int h = 0; h < counts.size(); h++) {  
+                                if (counts[h] != 0) {  containsGroup[mGroups[h]] = true; }
+                            }
+                        }
+                    }
+                }else {
+                    if (groupfile != "") {
+                        vector<string> mGroups = groupMap->getNamesOfGroups();
+                        for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
+                        
+                        for (int k = 0; k < names.size(); k++) {
+                            //find out the sequences group
+                            string group = groupMap->getGroup(names[k]);
+                            
+                            if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                            else {
+                                containsGroup[group] = true;
+                            }
+                        }
+                    }
+                }
+                               taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
                         }
                 }
  
diff --git a/classifyotucommand.h b/classifyotucommand.h

index b924775cf5b30436c1783714a758281cf072c32f..45e377c3cbe1ac1ad735cd9c4f47d4c4c37c30a5 100644 (file)
--- a/classifyotucommand.h
+++ b/classifyotucommand.h
@@ -13,6 +13,7 @@
  #include "command.hpp"
  #include "listvector.hpp"
  #include "inputdata.h"
+#include "counttable.h"
  
  
  class ClassifyOtuCommand : public Command {
@@ -34,10 +35,11 @@ public:
         void help() { m->mothurOut(getHelpString()); }  
  
  private:
-
+    GroupMap* groupMap;
+    CountTable* ct;
         ListVector* list;
         InputData* input;
-       string listfile, namefile, taxfile, label, outputDir, refTaxonomy, groupfile, basis;
+       string listfile, namefile, taxfile, label, outputDir, refTaxonomy, groupfile, basis, countfile;
         bool abort, allLines, probs;
         int cutoff;
         set<string> labels; //holds labels to be used
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index c76b047cbd064a5b7b967ab388109bb4e46cc0cc..43a021ee7d94e9d0638a4cbf2154717f8d9d3e89 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -17,8 +17,10 @@ vector<string> ClassifySeqsCommand::setParameters(){
                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
+
                 CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
                 CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
                 CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
@@ -50,11 +52,12 @@ string ClassifySeqsCommand::getHelpString(){
         try {
                 string helpString = "";
                 helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
-               helpString += "The classify.seqs command parameters are reference, fasta, name, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
+               helpString += "The classify.seqs command parameters are reference, fasta, name, group, count, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
                 helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
                 helpString += "The search parameter allows you to specify the method to find most similar template.  Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
                 helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
+        helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
                 helpString += "The method parameter allows you to specify classification method to use.  Your options are: bayesian and knn. The default is bayesian.\n";
                 helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate.  The default is 8.\n";
                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
@@ -127,7 +130,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(){
  ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
         try {
                 abort = false; calledHelp = false;   
-               rdb = ReferenceDB::getInstance();
+               rdb = ReferenceDB::getInstance(); hasName = false; hasCount=false;
                 
                 //allow user to run help
                 if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -185,6 +188,14 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
  
                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
@@ -333,11 +344,90 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                         }
                                 }
                         }
-
+            
+            if (namefileNames.size() != 0) { hasName = true; }
+            
                         if (namefile != "") {
                                 if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
                         }
                         
+            //check for required parameters
+                       countfile = validParameter.validFile(parameters, "count", false);
+                       if (countfile == "not found") { 
+                countfile = "";  
+                       }else { 
+                               m->splitAtDash(countfile, countfileNames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < countfileNames.size(); i++) {
+                                       
+                                       bool ignore = false;
+                                       if (countfileNames[i] == "current") { 
+                                               countfileNames[i] = m->getCountTableFile(); 
+                                               if (countfileNames[i] != "") {  m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+                                               else {  
+                                                       m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true; 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }
+                                       }
+                                       
+                                       if (!ignore) {
+                                               
+                                               if (inputDir != "") {
+                                                       string path = m->hasPath(countfileNames[i]);
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                                       if (path == "") {       countfileNames[i] = inputDir + countfileNames[i];               }
+                                               }
+                                               
+                                               int ableToOpen;
+                                               ifstream in;
+                                               
+                                               ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
+                                               
+                                               //if you can't open it, try default location
+                                               if (ableToOpen == 1) {
+                                                       if (m->getDefaultPath() != "") { //default path is set
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               if (ableToOpen == 1) {
+                                                       if (m->getOutputDir() != "") { //default path is set
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
+                                                               m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+                                                               ifstream in2;
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+                                                               in2.close();
+                                                               countfileNames[i] = tryPath;
+                                                       }
+                                               }
+                                               
+                                               in.close();
+                                               
+                                               if (ableToOpen == 1) { 
+                                                       m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
+                                                       //erase from file list
+                                                       countfileNames.erase(countfileNames.begin()+i);
+                                                       i--;
+                                               }else {
+                                                       m->setCountTableFile(countfileNames[i]);
+                                               }
+                                       }
+                               }
+                       }
+            
+            if (countfileNames.size() != 0) { hasCount = true; if (countfileNames.size() != fastaFileNames.size()) {m->mothurOut("If you provide a count file, you must have one for each fasta file."); m->mothurOutEndLine(); } }
+            
+                       //make sure there is at least one valid file left
+            if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
                         groupfile = validParameter.validFile(parameters, "group", false);
                         if (groupfile == "not found") { groupfile = "";  }
                         else { 
@@ -393,6 +483,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
  
                         if (groupfile != "") {
                                 if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
+                if (hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
                         }else {
                                 for (int i = 0; i < fastaFileNames.size(); i++) {  groupfileNames.push_back("");  }
                         }
@@ -481,10 +572,12 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                         }
                         
              if (!abort) {
-                if (namefileNames.size() == 0){
-                    if (fastaFileNames.size() != 0) {
-                        vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]); 
-                        parser.getNameFile(files);
+                if (!hasCount) {
+                    if (namefileNames.size() == 0){
+                        if (fastaFileNames.size() != 0) {
+                            vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]); 
+                            parser.getNameFile(files);
+                        }
                      }
                  }
              }
@@ -694,46 +787,58 @@ int ClassifySeqsCommand::execute(){
                         }
                 #endif
  
-                       string group = "";
-                       if (groupfile != "") {  group = groupfileNames[s]; }
-                       
-                       PhyloSummary taxaSum(baseTName, group);
-                       
-                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
-               
-                       if (namefile == "") {  taxaSum.summarize(tempTaxonomyFile);  }
-                       else {
-                               ifstream in;
-                               m->openInputFile(tempTaxonomyFile, in);
-                               
-                               //read in users taxonomy file and add sequences to tree
-                               string name, taxon;
-                               
-                               while(!in.eof()){
-                                       in >> name >> taxon; m->gobble(in);
-                                       
-                                       itNames = nameMap.find(name);
-               
-                                       if (itNames == nameMap.end()) { 
-                                               m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
-                                       }else{
-                                               for (int i = 0; i < itNames->second.size(); i++) { 
-                                                       taxaSum.addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
-                                               }
-                                               itNames->second.clear();
-                                               nameMap.erase(itNames->first);
-                                       }
-                               }
-                               in.close();
-                       }
+                string group = "";
+                GroupMap* groupMap = NULL;
+                CountTable* ct = NULL;
+                PhyloSummary* taxaSum;
+                if (hasCount) { 
+                    ct = new CountTable();
+                    ct->readTable(countfileNames[s]);
+                    taxaSum = new PhyloSummary(baseTName, ct);
+                    taxaSum->summarize(tempTaxonomyFile);
+                }else {
+                    if (groupfile != "") {  group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
+                    
+                    taxaSum = new PhyloSummary(baseTName, groupMap);
+                    
+                    if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
+                    
+                    if (namefile == "") {  taxaSum->summarize(tempTaxonomyFile);  }
+                    else {
+                        ifstream in;
+                        m->openInputFile(tempTaxonomyFile, in);
+                        
+                        //read in users taxonomy file and add sequences to tree
+                        string name, taxon;
+                        
+                        while(!in.eof()){
+                            if (m->control_pressed) { outputTypes.clear();  if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
+                            
+                            in >> name >> taxon; m->gobble(in);
+                            
+                            itNames = nameMap.find(name);
+                            
+                            if (itNames == nameMap.end()) { 
+                                m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
+                            }else{
+                                for (int i = 0; i < itNames->second.size(); i++) { 
+                                    taxaSum->addSeqToTree(itNames->second[i], taxon);  //add it as many times as there are identical seqs
+                                }
+                                itNames->second.clear();
+                                nameMap.erase(itNames->first);
+                            }
+                        }
+                        in.close();
+                    }
+                }
                         m->mothurRemove(tempTaxonomyFile);
                         
-                       if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
+                       if (m->control_pressed) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
                         
                         //print summary file
                         ofstream outTaxTree;
                         m->openOutputFile(taxSummary, outTaxTree);
-                       taxaSum.print(outTaxTree);
+                       taxaSum->print(outTaxTree);
                         outTaxTree.close();
                         
                         //output taxonomy with the unclassified bins added
@@ -745,12 +850,12 @@ int ClassifySeqsCommand::execute(){
                         m->openOutputFile(unclass, outTax);
                         
                         //get maxLevel from phylotree so you know how many 'unclassified's to add
-                       int maxLevel = taxaSum.getMaxLevel();
+                       int maxLevel = taxaSum->getMaxLevel();
                                                         
                         //read taxfile - this reading and rewriting is done to preserve the confidence scores.
                         string name, taxon;
                         while (!inTax.eof()) {
-                               if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } m->mothurRemove(unclass); delete classify; return 0; }
+                               if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } m->mothurRemove(unclass); delete classify; return 0; }
  
                                 inTax >> name >> taxon; m->gobble(inTax);
                                 
@@ -761,6 +866,8 @@ int ClassifySeqsCommand::execute(){
                         inTax.close();  
                         outTax.close();
                         
+            if (ct != NULL) { delete ct; }
+            if (groupMap != NULL) { delete groupMap; } delete taxaSum;
                         m->mothurRemove(newTaxonomyFile);
                         rename(unclass.c_str(), newTaxonomyFile.c_str());
                         
diff --git a/classifyseqscommand.h b/classifyseqscommand.h

index 4965642e7f77f0d8638bceb177bd79788c8e39ad..6d43dcb9dd2067bab91f46d42ff8afa238a4a191 100644 (file)
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -62,6 +62,7 @@ private:
         vector<linePair*> lines;
         vector<string> fastaFileNames;
         vector<string> namefileNames;
+    vector<string> countfileNames;
         vector<string> groupfileNames;
         vector<string> outputNames;
         map<string, vector<string> > nameMap;
@@ -70,10 +71,10 @@ private:
         Classify* classify;
         ReferenceDB* rdb;
         
-       string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
+       string fastaFileName, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
         int processors, kmerSize, numWanted, cutoff, iters;
         float match, misMatch, gapOpen, gapExtend;
-       bool abort, probs, save, flip;
+       bool abort, probs, save, flip, hasName, hasCount;
         
         int driver(linePair*, string, string, string, string);
         int createProcesses(string, string, string, string); 
diff --git a/classifytreecommand.cpp b/classifytreecommand.cpp

index 7861a01bab3420f1598e351b30765dbd0ae5f124..69da8e0d74e58d54d7ae59c78c7ad7f91e9f4b29 100644 (file)
--- a/classifytreecommand.cpp
+++ b/classifytreecommand.cpp
@@ -15,8 +15,9 @@ vector<string> ClassifyTreeCommand::setParameters(){
         try {
                 CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree);
          CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy);
-        CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname);
-        CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
          CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -37,8 +38,9 @@ string ClassifyTreeCommand::getHelpString(){
                 helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
                 helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
                 helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
+        helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
                 helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
-               helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n";
+               helpString += "The classify.tree command parameters are tree, group, name, count and taxonomy. The tree and taxonomy files are required.\n";
          helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
          helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n";
                 helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; 
@@ -147,6 +149,14 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -178,16 +188,30 @@ ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
                         else if (groupfile == "not found") { groupfile = ""; }
                         else { m->setGroupFile(groupfile); }
              
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
              string temp = validParameter.validFile(parameters, "cutoff", false);                       if (temp == "not found") { temp = "51"; }
                         m->mothurConvert(temp, cutoff); 
                         
                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
              
-            if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                }
                         }
-                       
                 }
         }
         catch(exception& e) {
@@ -213,7 +237,7 @@ int ClassifyTreeCommand::execute(){
          
          TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
          vector<Tree*> T = reader->getTrees();
-        TreeMap* tmap = T[0]->getTreeMap();
+        CountTable* tmap = T[0]->getCountTable();
          Tree* outputTree = T[0];
          delete reader;
  
@@ -367,10 +391,15 @@ string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
                                 if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
                                         m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
                                 }else{
-                                       //add seq to tree
-                                       phylo->addSeqToTree((*it), itTax->second);
-                    size++;
-                               }
+                                       if (countfile != "") {
+                        int numDups = ct->getNumSeqs((*it)); 
+                        for (int j = 0; j < numDups; j++) {  phylo->addSeqToTree((*it), itTax->second);  }
+                        size += numDups;
+                    }else{
+                        //add seq to tree
+                        phylo->addSeqToTree((*it), itTax->second);
+                        size++;  
+                    }                          }
                         }
              
                         if (m->control_pressed) { delete phylo; return conTax; }
@@ -444,12 +473,12 @@ map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i
                 
                 int lc = T->tree[i].getLChild();
                 int rc = T->tree[i].getRChild();
-        TreeMap* tmap = T->getTreeMap();
+       // TreeMap* tmap = T->getTreeMap();
                 
                 if (lc == -1) { //you are a leaf your only descendant is yourself
-            string group = tmap->getGroup(T->tree[i].getName());
+            vector<string> groups = T->tree[i].getGroup();
              set<string> mynames; mynames.insert(T->tree[i].getName());
-            names[group] = mynames; //mygroup -> me
+            for (int j = 0; j < groups.size(); j++) { names[groups[j]] = mynames;   } //mygroup -> me
              names["AllGroups"] = mynames;
                 }else{ //your descedants are the combination of your childrens descendants
                         names = descendants[lc];
diff --git a/classifytreecommand.h b/classifytreecommand.h

index 758a438dc068f0790aab0b8b309f32a0fb6f24a5..dd972b61b457378462392f771d759585bef53188 100644 (file)
--- a/classifytreecommand.h
+++ b/classifytreecommand.h
@@ -12,6 +12,7 @@
  #include "command.hpp"
  #include "readtree.h"
  #include "treemap.h"
+#include "counttable.h"
  
  class ClassifyTreeCommand : public Command {
  public:
@@ -31,13 +32,14 @@ public:
         void help() { m->mothurOut(getHelpString()); }  
         
  private:
-       string treefile, taxonomyfile, groupfile, namefile, outputDir;
+       string treefile, taxonomyfile, groupfile, namefile, countfile, outputDir;
         bool abort;
         vector<string> outputNames;
      int numUniquesInName, cutoff;
      map<string, string> nameMap;
      map<string, int> nameCount;
      map<string, string> taxMap;
+    CountTable* ct;
         
         int getClassifications(Tree*&);
         map<string, set<string> > getDescendantList(Tree*&, int, map<int, map<string, set<string> > >);
diff --git a/clusterfragmentscommand.cpp b/clusterfragmentscommand.cpp

index 4a338419af8900f8b7ebd693225a7e9f4d193481..378e8ab0e09350e6e8853a5f4c1a1b0fe7c77fe4 100644 (file)
--- a/clusterfragmentscommand.cpp
+++ b/clusterfragmentscommand.cpp
@@ -29,7 +29,8 @@ inline bool comparePriority(seqRNode first, seqRNode second) {
  vector<string> ClusterFragmentsCommand::setParameters(){       
         try {
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                 CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pdiffs);
                 CommandParameter ppercent("percent", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppercent);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -49,8 +50,8 @@ string ClusterFragmentsCommand::getHelpString(){
         try {
                 string helpString = "";
                 helpString += "The cluster.fragments command groups sequences that are part of a larger sequence.\n";
-               helpString += "The cluster.fragments command outputs a new fasta and name file.\n";
-               helpString += "The cluster.fragments command parameters are fasta, name, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
+               helpString += "The cluster.fragments command outputs a new fasta and name or count file.\n";
+               helpString += "The cluster.fragments command parameters are fasta, name, count, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
                 helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
                 helpString += "The diffs parameter allows you to set the number of differences allowed, default=0. \n";
                 helpString += "The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n";
@@ -78,6 +79,7 @@ string ClusterFragmentsCommand::getOutputFileNameTag(string type, string inputNa
          else {
              if (type == "fasta") {  outputFileName =  "fragclust.fasta"; }
              else if (type == "name") {  outputFileName =  "fragclust.names"; }
+            else if (type == "count") {  outputFileName =  "fragclust.count.table"; }
              else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
          }
          return outputFileName;
@@ -96,6 +98,7 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(){
                 vector<string> tempOutNames;
                 outputTypes["fasta"] = tempOutNames;
                 outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand");
@@ -129,6 +132,7 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                         vector<string> tempOutNames;
                         outputTypes["fasta"] = tempOutNames;
                         outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                         
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -150,6 +154,14 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
  
                         //check for required parameters
@@ -171,6 +183,13 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                         if (namefile == "not found") { namefile =  "";  }
                         else if (namefile == "not open") { namefile = ""; abort = true; }       
                         else {  readNameFile(); m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { ct.readTable(countfile); m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.fragments command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                         
                         string temp;
                         temp = validParameter.validFile(parameters, "diffs", false);            if (temp == "not found"){       temp = "0";                             }
@@ -179,10 +198,12 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                         temp = validParameter.validFile(parameters, "percent", false);          if (temp == "not found"){       temp = "0";                             }
                         m->mothurConvert(temp, percent);
                         
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                         
                 }
                                 
@@ -229,10 +250,13 @@ int ClusterFragmentsCommand::execute(){
                                                 string jBases = alignSeqs[j].seq.getUnaligned();
                                                                                                         
                                                 if (isFragment(iBases, jBases)) {
-                                                       //merge
-                                                       alignSeqs[i].names += ',' + alignSeqs[j].names;
-                                                       alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
-
+                            if (countfile != "") {
+                                ct.mergeCounts(alignSeqs[i].names, alignSeqs[j].names);
+                            }else {
+                                //merge
+                                alignSeqs[i].names += ',' + alignSeqs[j].names;
+                                alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
+                            }
                                                         alignSeqs[j].active = 0;
                                                         alignSeqs[j].numIdentical = 0;
                                                         count++;
@@ -254,6 +278,7 @@ int ClusterFragmentsCommand::execute(){
                 
                 string newFastaFile = fileroot + getOutputFileNameTag("fasta");
                 string newNamesFile = fileroot + getOutputFileNameTag("name");
+        if (countfile != "") { newNamesFile = fileroot + getOutputFileNameTag("count"); }
                 
                 if (m->control_pressed) { return 0; }
                 
@@ -285,6 +310,11 @@ int ClusterFragmentsCommand::execute(){
                 if (itTypes != outputTypes.end()) {
                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
                 }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
  
                 return 0;
                 
@@ -372,7 +402,10 @@ int ClusterFragmentsCommand::readFASTA(){
                                         else{
                                                 seqRNode tempNode(itSize->second, seq, names[seq.getName()], seq.getUnaligned().length());
                                                 alignSeqs.push_back(tempNode);
-                                       }       
+                                       }
+                }else if(countfile != "") {
+                    seqRNode tempNode(ct.getNumSeqs(seq.getName()), seq, seq.getName(), seq.getUnaligned().length());
+                    alignSeqs.push_back(tempNode);
                                 }else { //no names file, you are identical to yourself 
                                         seqRNode tempNode(1, seq, seq.getName(), seq.getUnaligned().length());
                                         alignSeqs.push_back(tempNode);
@@ -396,17 +429,18 @@ void ClusterFragmentsCommand::printData(string newfasta, string newname){
                 ofstream outNames;
                 
                 m->openOutputFile(newfasta, outFasta);
-               m->openOutputFile(newname, outNames);
+               if (countfile == "") {  m->openOutputFile(newname, outNames); }
                 
                 for (int i = 0; i < alignSeqs.size(); i++) {
                         if (alignSeqs[i].numIdentical != 0) {
                                 alignSeqs[i].seq.printSequence(outFasta); 
-                               outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+                               if (countfile == "") {  outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;  }
                         }
                 }
                 
                 outFasta.close();
-               outNames.close();
+               if (countfile == "") {  outNames.close(); }
+        else { ct.printTable(newname); }
         }
         catch(exception& e) {
                 m->errorOut(e, "ClusterFragmentsCommand", "printData");
@@ -438,6 +472,5 @@ void ClusterFragmentsCommand::readNameFile(){
                 exit(1);
         }
  }
-
  /**************************************************************************************************/
  
diff --git a/clusterfragmentscommand.h b/clusterfragmentscommand.h

index c322529f3bedf95600a8b30e0ac013be2870d290..e3d861a8a1477c0edf71ae7208c86560ceb56860 100644 (file)
--- a/clusterfragmentscommand.h
+++ b/clusterfragmentscommand.h
@@ -13,6 +13,7 @@
  
  #include "command.hpp"
  #include "sequence.hpp"
+#include "counttable.h"
  
  /************************************************************/
  struct seqRNode {
@@ -46,8 +47,9 @@ public:
         void help() { m->mothurOut(getHelpString()); }  
         
  private:
+    CountTable ct;
         bool abort;
-       string fastafile, namefile, outputDir;
+       string fastafile, namefile, countfile, outputDir;
         int diffs, percent;
         vector<seqRNode> alignSeqs; 
         map<string, string> names; //represents the names file first column maps to second column
diff --git a/consensus.cpp b/consensus.cpp

index 1be052f3aee81f6f0aded053de8249dd958118dd..d45a395604148c398f0f7c5a3d379476e3bdda97 100644 (file)
--- a/consensus.cpp
+++ b/consensus.cpp
@@ -21,7 +21,7 @@ Tree* Consensus::getTree(vector<Tree*>& t){
                 
                 if (m->control_pressed) { return 0; }
                 
-               consensusTree = new Tree(t[0]->getTreeMap());
+               consensusTree = new Tree(t[0]->getCountTable());
                 
                 it2 = nodePairs.find(treeSet);
                 
@@ -37,8 +37,7 @@ Tree* Consensus::getTree(vector<Tree*>& t){
                 
                 if (m->control_pressed) {  delete consensusTree; return 0; }
                 
-        map<string, string> empty;
-               consensusTree->assembleTree(empty);
+               consensusTree->assembleTree();
                 
                 if (m->control_pressed) {  delete consensusTree; return 0; }
                                 
diff --git a/consensusseqscommand.cpp b/consensusseqscommand.cpp

index d6158ba3ed3bc147027d54098aa71398182dc2f6..4c7aefb340602efffa969b2612568299cbf1865b 100644 (file)
--- a/consensusseqscommand.cpp
+++ b/consensusseqscommand.cpp
@@ -15,7 +15,8 @@
  vector<string> ConsensusSeqsCommand::setParameters(){  
         try {
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                 CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "",false,false); parameters.push_back(pcutoff);
@@ -36,7 +37,7 @@ string ConsensusSeqsCommand::getHelpString(){
         try {
                 string helpString = "";
                 helpString += "The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n";
-               helpString += "The consensus.seqs command parameters are fasta, list, name, cutoff and label.\n";
+               helpString += "The consensus.seqs command parameters are fasta, list, name, count, cutoff and label.\n";
                 helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
                 helpString += "The list parameter allows you to enter a your list file. \n";
                 helpString += "The name parameter allows you to enter a names file associated with the fasta file. \n";
@@ -65,6 +66,7 @@ string ConsensusSeqsCommand::getOutputFileNameTag(string type, string inputName=
          else {
              if (type == "fasta") {  outputFileName =  "cons.fasta"; }
              else if (type == "name") {  outputFileName =  "cons.names"; }
+            else if (type == "count") {  outputFileName =  "cons.count.table"; }
              else if (type == "summary") {  outputFileName =  "cons.summary"; }
              else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
          }
@@ -84,6 +86,7 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(){
                 vector<string> tempOutNames;
                 outputTypes["fasta"] = tempOutNames;
                 outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
                 outputTypes["summary"] = tempOutNames;
         }
         catch(exception& e) {
@@ -120,6 +123,7 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                         vector<string> tempOutNames;
                         outputTypes["fasta"] = tempOutNames;
                         outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                         outputTypes["summary"] = tempOutNames;
                         
                                                 
@@ -151,6 +155,14 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
                         
@@ -168,6 +180,13 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
                         listfile = validParameter.validFile(parameters, "list", true);
                         if (listfile == "not open") { abort = true; }
                         else if (listfile == "not found") { listfile = "";  }   
@@ -186,10 +205,12 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
                         
-                       if (namefile == ""){
-                               vector<string> files; files.push_back(fastafile); 
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if (namefile == ""){
+                    vector<string> files; files.push_back(fastafile); 
+                    parser.getNameFile(files);
+                }
+            }
                 }
         }
         catch(exception& e) {
@@ -209,6 +230,7 @@ int ConsensusSeqsCommand::execute(){
                 if (m->control_pressed) { return 0; }
                 
                 if (namefile != "") { readNames(); }
+        if (countfile != "") { ct.readTable(countfile);  }
                 
                 if (m->control_pressed) { return 0; }
                 
@@ -227,25 +249,12 @@ int ConsensusSeqsCommand::execute(){
                         string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
                         m->openOutputFile(outputFastaFile, outFasta);
                         outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
-                       
-                       vector<string> seqs;
-                       int seqLength = 0;
-                       for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
-                               
-                               if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
-                               
-                               string seq = fastaMap[it->second];
-                               seqs.push_back(seq);
-                               
-                               if (seqLength == 0) { seqLength = seq.length(); }
-                               else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
-
-                       }
-                       
+        
                         vector< vector<float> > percentages; percentages.resize(5);
                         for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
                         
                         string consSeq = "";
+            int thisCount;
                         //get counts
                         for (int j = 0; j < seqLength; j++) {
                                 
@@ -253,41 +262,55 @@ int ConsensusSeqsCommand::execute(){
                                 
                                 vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
                                 int numDots = 0;
-                               
-                               for (int i = 0; i < seqs.size(); i++) {
+                               thisCount = 0;
+                               for (map<string, string>::iterator it = fastaMap.begin(); it != fastaMap.end(); it++) {
                                         
-                                       if (seqs[i][j] == '.') { numDots++; }
-                                       
-                                       char base = toupper(seqs[i][j]);
-                                       if (base == 'A') { counts[0]++; }
-                                       else if (base == 'T') { counts[1]++; }
-                                       else if (base == 'G') { counts[2]++; }
-                                       else if (base == 'C') { counts[3]++; }
-                                       else { counts[4]++; }
+                    string thisSeq = it->second;
+                    int size = 0;
+                    
+                    if (countfile != "") { size = ct.getNumSeqs(it->first); }
+                    else {
+                        map<string, int>::iterator itCount = nameFileMap.find(it->first);
+                        if (itCount != nameFileMap.end()) {
+                            size = itCount->second;
+                        }else { m->mothurOut("[ERROR]: file mismatch, aborting.\n"); m->control_pressed = true; break; }
+                    }
+                    
+                    for (int k = 0; k < size; k++) {
+                        if (thisSeq[j] == '.') { numDots++; }
+                        
+                        char base = toupper(thisSeq[j]);
+                        if (base == 'A') { counts[0]++; }
+                        else if (base == 'T') { counts[1]++; }
+                        else if (base == 'G') { counts[2]++; }
+                        else if (base == 'C') { counts[3]++; }
+                        else { counts[4]++; }
+                        thisCount++;
+                    }
                                 }
                                 
                                 char conBase = '.';
-                               if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
+                               if (numDots != thisCount) { conBase = getBase(counts, thisCount); }
                                 
                                 consSeq += conBase;
                                 
-                               percentages[0][j] = counts[0] / (float) seqs.size();
-                               percentages[1][j] = counts[1] / (float) seqs.size();
-                               percentages[2][j] = counts[2] / (float) seqs.size();
-                               percentages[3][j] = counts[3] / (float) seqs.size();
-                               percentages[4][j] = counts[4] / (float) seqs.size();
-                               
+                               percentages[0][j] = counts[0] / (float) thisCount;
+                               percentages[1][j] = counts[1] / (float) thisCount;
+                               percentages[2][j] = counts[2] / (float) thisCount;
+                               percentages[3][j] = counts[3] / (float) thisCount;
+                               percentages[4][j] = counts[4] / (float) thisCount;
                         }
                         
                         for (int j = 0; j < seqLength; j++) { 
-                               outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
+                               outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << thisCount << '\t' << consSeq[j] << endl;
                         }
                         
                                 
                         outFasta << ">conseq" << endl << consSeq << endl;
                         
                         outSummary.close(); outFasta.close();
-                       
+            
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
                 
                 }else {
                         
@@ -414,12 +437,10 @@ int ConsensusSeqsCommand::processList(ListVector*& list){
                         if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
                         
                         string bin = list->get(i);
-                       
-                       string newName = "";
-                       string consSeq = getConsSeq(bin, outSummary, newName, i);
+                       string consSeq = getConsSeq(bin, outSummary, i);
                         
                         outFasta << ">seq" << (i+1) << endl << consSeq << endl;
-                       outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << newName << endl;
+                       outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << bin << endl;
                 }
                 
                 outSummary.close(); outName.close(); outFasta.close();
@@ -434,96 +455,127 @@ int ConsensusSeqsCommand::processList(ListVector*& list){
  }
  
  //***************************************************************************************************************
-//made this smart enough to owrk with unique or non unique list file
-string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string& name, int binNumber){
+string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, int binNumber){
         try{
                 
                 string consSeq = "";
                 bool error = false;
-               
-               //the whole bin is the second column if no names file, otherwise build it
-               name = bin;
-               if (namefile != "") { name = ""; }
-               
+        int totalSize=0;
+                               
                 vector<string> binNames;
                 m->splitAtComma(bin, binNames);
-               
-               //get sequence strings for each name in the bin
-               vector<string> seqs;
-               
-               set<string> addedAlready;
-               int seqLength = 0;
-               for (int i = 0; i < binNames.size(); i++) {
-                       
-                       map<string, string>::iterator it;
-                       
-                       it = nameMap.find(binNames[i]);
-                       if (it == nameMap.end()) { 
-                               if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
-                               else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
-                               break;
-                       }else {
-                               
-                               //add sequence string to seqs vector to process below
-                               string seq = fastaMap[it->second];
-                               seqs.push_back(seq);
-                               
-                               if (seqLength == 0) { seqLength = seq.length(); }
-                               else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); error = true; break; }
-                               
-                               if (namefile != "") { 
-                                       //did we add this line from name file already?
-                                       if (addedAlready.count(it->second) == 0) {
-                                               name += "," + nameFileMap[it->second];
-                                               addedAlready.insert(it->second);
-                                       }
-                               }
-                               
-                       }
-               }
-               
-               if (error) { m->control_pressed = true; return consSeq; }
-               
-               if (namefile != "") { name = name.substr(1); }
-               
-               vector< vector<float> > percentages; percentages.resize(5);
+        
+        vector< vector<float> > percentages; percentages.resize(5);
                 for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
-               
-               //get counts
-               for (int j = 0; j < seqLength; j++) {
-                       
-                       if (m->control_pressed) { return consSeq; }
-                       
-                       vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
-                       int numDots = 0;
-                       
-                       for (int i = 0; i < seqs.size(); i++) {
-                               
-                               if (seqs[i][j] == '.') { numDots++; }
-                               
-                               char base = toupper(seqs[i][j]);
-                               if (base == 'A') { counts[0]++; }
-                               else if (base == 'T') { counts[1]++; }
-                               else if (base == 'G') { counts[2]++; }
-                               else if (base == 'C') { counts[3]++; }
-                               else { counts[4]++; }
-                       }
-                       
-                       char conBase = '.';
-                       if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
-                       
-                       consSeq += conBase;
-                       
-                       percentages[0][j] = counts[0] / (float) seqs.size();
-                       percentages[1][j] = counts[1] / (float) seqs.size();
-                       percentages[2][j] = counts[2] / (float) seqs.size();
-                       percentages[3][j] = counts[3] / (float) seqs.size();
-                       percentages[4][j] = counts[4] / (float) seqs.size();
-                       
+
+        if (countfile != "") {
+            //get counts
+            for (int j = 0; j < seqLength; j++) {
+                
+                if (m->control_pressed) { return consSeq; }
+                
+                vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
+                int numDots = 0;
+                totalSize = 0;
+                 for (int i = 0; i < binNames.size(); i++) {
+                     if (m->control_pressed) { return consSeq; }
+                     
+                     string thisSeq = "";
+                     map<string, string>::iterator itFasta = fastaMap.find(binNames[i]);
+                     if (itFasta != fastaMap.end()) {
+                         thisSeq = itFasta->second;
+                     }else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                     
+                     int size = ct.getNumSeqs(binNames[i]);
+                     if (size != 0) {
+                         for (int k = 0; k < size; k++) {
+                             if (thisSeq[j] == '.') { numDots++; }
+                             
+                             char base = toupper(thisSeq[j]);
+                             if (base == 'A') { counts[0]++; }
+                             else if (base == 'T') { counts[1]++; }
+                             else if (base == 'G') { counts[2]++; }
+                             else if (base == 'C') { counts[3]++; }
+                             else { counts[4]++; }
+                             totalSize++;
+                         }
+                     }else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your count file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                 }
+                char conBase = '.';
+                if (numDots != totalSize) { conBase = getBase(counts, totalSize); }
+                
+                consSeq += conBase;
+                
+                percentages[0][j] = counts[0] / (float) totalSize;
+                percentages[1][j] = counts[1] / (float) totalSize;
+                percentages[2][j] = counts[2] / (float) totalSize;
+                percentages[3][j] = counts[3] / (float) totalSize;
+                percentages[4][j] = counts[4] / (float) totalSize;
+            }
+
+        }else {
+               
+            //get sequence strings for each name in the bin
+            vector<string> seqs;
+            for (int i = 0; i < binNames.size(); i++) {
+                
+                map<string, string>::iterator it;
+                it = nameMap.find(binNames[i]);
+                if (it == nameMap.end()) { 
+                    if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
+                    else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
+                    break;
+                }else {
+                    //add sequence string to seqs vector to process below
+                    map<string, string>::iterator itFasta = fastaMap.find(it->second);
+                    
+                    if (itFasta != fastaMap.end()) {
+                        string seq = itFasta->second;
+                        seqs.push_back(seq);
+                    }else { m->mothurOut("[ERROR]: file mismatch, aborting. \n"); }
+                }
+            }
+            
+            if (error) { m->control_pressed = true; return consSeq; }
+            totalSize = seqs.size();
+            //get counts
+            for (int j = 0; j < seqLength; j++) {
+                
+                if (m->control_pressed) { return consSeq; }
+                
+                vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
+                int numDots = 0;
+                
+                for (int i = 0; i < seqs.size(); i++) {
+                    
+                    if (seqs[i][j] == '.') { numDots++; }
+                    
+                    char base = toupper(seqs[i][j]);
+                    if (base == 'A') { counts[0]++; }
+                    else if (base == 'T') { counts[1]++; }
+                    else if (base == 'G') { counts[2]++; }
+                    else if (base == 'C') { counts[3]++; }
+                    else { counts[4]++; }
+                }
+                
+                char conBase = '.';
+                if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
+                
+                consSeq += conBase;
+                
+                percentages[0][j] = counts[0] / (float) seqs.size();
+                percentages[1][j] = counts[1] / (float) seqs.size();
+                percentages[2][j] = counts[2] / (float) seqs.size();
+                percentages[3][j] = counts[3] / (float) seqs.size();
+                percentages[4][j] = counts[4] / (float) seqs.size();
+                
+            }
                 }
-               
+        
+        
+        
                 for (int j = 0; j < seqLength; j++) { 
-                       outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
+                       outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << totalSize << '\t' << consSeq[j] << endl;
                 }
                 
                 return consSeq;
@@ -646,7 +698,8 @@ int ConsensusSeqsCommand::readFasta(){
                 
                 ifstream in;
                 m->openInputFile(fastafile, in);
-               
+               seqLength = 0;
+        
                 while (!in.eof()) {
                         
                         if (m->control_pressed) { break; }
@@ -657,7 +710,10 @@ int ConsensusSeqsCommand::readFasta(){
                         if (name != "") {
                                 fastaMap[name] = seq.getAligned();
                                 nameMap[name] = name; //set nameMap incase no names file
-                               nameFileMap[name] = name;
+                               nameFileMap[name] = 1;
+                
+                if (seqLength == 0) { seqLength = seq.getAligned().length(); }
+                               else if (seqLength != seq.getAligned().length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                         }
                 }
                 
@@ -688,7 +744,7 @@ int ConsensusSeqsCommand::readNames(){
               
               it = nameMap.find(thisname);
                          if (it != nameMap.end()) { //then this sequence was in the fastafile
-                                nameFileMap[thisname] = repnames;      //for later when outputting the new namesFile if the list file is unique
+                                nameFileMap[thisname] = m->getNumNames(repnames);      //for later when outputting the new namesFile if the list file is unique
                   
                                  vector<string> splitRepNames;
                                  m->splitAtComma(repnames, splitRepNames);
diff --git a/consensusseqscommand.h b/consensusseqscommand.h

index 1459b43d25bd54b525459721b5de55a5a1d6d9f6..e0c97150bfb0bc3e5baafcea911c4ba1c6c60dcb 100644 (file)
--- a/consensusseqscommand.h
+++ b/consensusseqscommand.h
@@ -13,6 +13,7 @@
  
  #include "command.hpp"
  #include "listvector.hpp"
+#include "counttable.h"
  
  class ConsensusSeqsCommand : public Command {
  public:
@@ -34,19 +35,20 @@ public:
         
  private:
         
+    CountTable ct;
         bool abort, allLines;
-       string fastafile, listfile, namefile, label, outputDir;
+       string fastafile, listfile, namefile, countfile, label, outputDir;
         set<string> labels;
         vector<string> outputNames;
         map<string, string> fastaMap;
         map<string, string> nameMap;
-       map<string, string> nameFileMap;
-       int cutoff;
+       map<string, int> nameFileMap;
+       int cutoff, seqLength;
         
         int readFasta();
         int readNames();
         int processList(ListVector*&);
-       string getConsSeq(string, ofstream&, string&, int);
+       string getConsSeq(string, ofstream&, int);
         char getBase(vector<int>, int);
  };
  
diff --git a/countseqscommand.cpp b/countseqscommand.cpp

index 7b9aebfbd63d496600a518bca7404f7a4cf4ada0..75d21c2250166d202f71ed21786dca16db40a38f 100644 (file)
--- a/countseqscommand.cpp
+++ b/countseqscommand.cpp
@@ -10,6 +10,7 @@
  #include "countseqscommand.h"
  #include "groupmap.h"
  #include "sharedutilities.h"
+#include "counttable.h"
  
  //**********************************************************************************************************************
  vector<string> CountSeqsCommand::setParameters(){      
@@ -175,7 +176,7 @@ int CountSeqsCommand::execute(){
          int total = 0;
          if (!large) { total = processSmall(outputFileName); }
          else { total = processLarge(outputFileName);  }
-                               
+        
                 if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
                 
          //set rabund file as new current rabundfile
diff --git a/counttable.cpp b/counttable.cpp

index 376bd73487b4031077f7dd26b1c58b648d9ff04d..5307beee8379d8e758b946b3847dfa1be2d5a7b2 100644 (file)
--- a/counttable.cpp
+++ b/counttable.cpp
@@ -8,6 +8,57 @@
  
  #include "counttable.h"
  
+/************************************************************/
+int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
+    try {
+        int numGroups = 0;
+        groups.clear();
+        totalGroups.clear();
+        indexGroupMap.clear();
+        indexNameMap.clear();
+        counts.clear();
+        for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it);  hasGroups = true; }
+        numGroups = groups.size();
+        totalGroups.resize(numGroups, 0);
+        
+               //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
+        m->setAllGroups(groups);
+        
+        uniques = 0;
+        total = 0;
+        for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
+            
+            if (m->control_pressed) { break; }
+            
+            string seqName = *it;
+            
+            vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+            map<string, string>::iterator itGroup = g.find(seqName);
+            
+            if (itGroup != g.end()) {   
+                groupCounts[indexGroupMap[itGroup->second]] = 1; 
+                totalGroups[indexGroupMap[itGroup->second]]++;
+            }else { m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); }
+            
+            map<string, int>::iterator it2 = indexNameMap.find(seqName);
+            if (it2 == indexNameMap.end()) {
+                if (hasGroups) {  counts.push_back(groupCounts);  }
+                indexNameMap[seqName] = uniques;
+                totals.push_back(1);
+                total++;
+                uniques++;
+            }
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "createTable");
+               exit(1);
+       }
+}
  /************************************************************/
  bool CountTable::testGroups(string file) {
      try {
@@ -26,6 +77,118 @@ bool CountTable::testGroups(string file) {
         }
  }
  /************************************************************/
+int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
+    try {
+        
+        if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
+                                           
+        GroupMap* groupMap;
+        int numGroups = 0;
+        groups.clear();
+        totalGroups.clear();
+        indexGroupMap.clear();
+        indexNameMap.clear();
+        counts.clear();
+        map<int, string> originalGroupIndexes;
+        
+        if (groupfile != "") { 
+            hasGroups = true;
+            groupMap = new GroupMap(groupfile); groupMap->readMap();
+            numGroups = groupMap->getNumGroups();
+            groups = groupMap->getNamesOfGroups();
+            totalGroups.resize(numGroups, 0);
+        }else if(createGroup) {
+            hasGroups = true;
+            numGroups = 1;
+            groups.push_back("Group1");
+            totalGroups.resize(numGroups, 0);
+        }
+               //sort groups to keep consistent with how we store the groups in groupmap
+        sort(groups.begin(), groups.end());
+        for (int i = 0; i < groups.size(); i++) {  indexGroupMap[groups[i]] = i; }
+        m->setAllGroups(groups);
+        
+        bool error = false;
+        string name;
+        uniques = 0;
+        total = 0;
+        
+        
+        //open input file
+        ifstream in;
+        m->openInputFile(namefile, in);
+        
+        int total = 0;
+        while (!in.eof()) {
+            if (m->control_pressed) { break; }
+            
+            string firstCol, secondCol;
+            in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
+            
+            vector<string> names;
+            m->splitAtChar(secondCol, names, ',');
+            
+            map<string, int> groupCounts;
+            int thisTotal = 0;
+            if (groupfile != "") {
+                //set to 0
+                for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
+                
+                //get counts for each of the users groups
+                for (int i = 0; i < names.size(); i++) {
+                    string group = groupMap->getGroup(names[i]);
+                    
+                    if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
+                    else {
+                        map<string, int>::iterator it = groupCounts.find(group);
+                        
+                        //if not found, then this sequence is not from a group we care about
+                        if (it != groupCounts.end()) {
+                            it->second++;
+                            thisTotal++;
+                        }
+                    }
+                }
+            }else if (createGroup) {
+                groupCounts["Group1"]=0;
+                for (int i = 0; i < names.size(); i++) {
+                    string group = "Group1";
+                    groupCounts["Group1"]++; thisTotal++;
+                }
+            }else { thisTotal = names.size();  }
+            
+            //if group info, then read it
+            vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
+            for (int i = 0; i < numGroups; i++) {  
+                thisGroupsCount[i] = groupCounts[groups[i]]; 
+                totalGroups[i] += thisGroupsCount[i]; 
+            }
+            
+            map<string, int>::iterator it = indexNameMap.find(firstCol);
+            if (it == indexNameMap.end()) {
+                if (hasGroups) {  counts.push_back(thisGroupsCount);  }
+                indexNameMap[firstCol] = uniques;
+                totals.push_back(thisTotal);
+                total += thisTotal;
+                uniques++;
+            }else {
+                error = true;
+                m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); 
+            }
+        }
+        in.close();
+               
+        if (error) { m->control_pressed = true; }
+               if (groupfile != "") { delete groupMap; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "createTable");
+               exit(1);
+       }
+}
+/************************************************************/
  int CountTable::readTable(string file) {
      try {
          filename = file;
@@ -89,6 +252,68 @@ int CountTable::readTable(string file) {
         }
  }
  /************************************************************/
+int CountTable::printTable(string file) {
+    try {
+        ofstream out;
+        m->openOutputFile(file, out); 
+               out << "Representative_Sequence\ttotal\t";
+        for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
+        out << endl;
+        
+        for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
+            out << itNames->first << '\t' << totals[itNames->second] << '\t';
+            if (hasGroups) {
+                
+                for (int i = 0; i < groups.size(); i++) {
+                    out << counts[itNames->second][i] << '\t';
+                }
+            }
+            out << endl;
+        }
+        out.close();
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "printTable");
+               exit(1);
+       }
+}
+/************************************************************/
+int CountTable::printHeaders(ofstream& out) {
+    try {
+               out << "Representative_Sequence\ttotal\t";
+        for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
+        out << endl;
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "printHeaders");
+               exit(1);
+       }
+}
+/************************************************************/
+int CountTable::printSeq(ofstream& out, string seqName) {
+    try {
+               map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else { 
+            out << it->first << '\t' << totals[it->second] << '\t';
+            if (hasGroups) {
+                for (int i = 0; i < groups.size(); i++) {
+                    out << counts[it->second][i] << '\t';
+                }
+            }
+            out << endl;
+        }
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "printSeq");
+               exit(1);
+       }
+}
+/************************************************************/
  //group counts for a seq
  vector<int> CountTable::getGroupCounts(string seqName) {
      try {
@@ -154,6 +379,123 @@ int CountTable::getGroupCount(string seqName, string groupName) {
                 exit(1);
         }
  }
+/************************************************************/
+//set the number of sequences for the seq for the group
+int CountTable::setAbund(string seqName, string groupName, int num) {
+    try {
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                map<string, int>::iterator it2 = indexNameMap.find(seqName);
+                if (it2 == indexNameMap.end()) {
+                    m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+                }else { 
+                    int oldCount = counts[it2->second][it->second];
+                    counts[it2->second][it->second] = num;
+                    totalGroups[it->second] += (num - oldCount);
+                    total += (num - oldCount);
+                    totals[it2->second] += (num - oldCount);
+                }
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "set");
+               exit(1);
+       }
+}
+/************************************************************/
+//add group
+int CountTable::addGroup(string groupName) {
+    try {        
+        bool sanity = m->inUsersGroups(groupName, groups);
+        if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true;  return 0; }
+        
+        groups.push_back(groupName);
+        if (!hasGroups) { counts.resize(uniques);  }
+        
+        for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); }
+        totalGroups.push_back(0);
+        indexGroupMap[groupName] = groups.size()-1;
+        map<string, int> originalGroupMap = indexGroupMap;
+        
+        //important to play well with others, :)
+        sort(groups.begin(), groups.end());
+        
+        //fix indexGroupMap && totalGroups
+        vector<int> newTotals; newTotals.resize(groups.size(), 0);
+        for (int i = 0; i < groups.size(); i++) {  
+            indexGroupMap[groups[i]] = i;  
+            //find original spot of group[i]
+            int index = originalGroupMap[groups[i]];
+            newTotals[i] = totalGroups[index];
+        }
+        totalGroups = newTotals;
+        
+        //fix counts vectors
+        for (int i = 0; i < counts.size(); i++) {
+            vector<int> newCounts; newCounts.resize(groups.size(), 0);
+            for (int j = 0; j < groups.size(); j++) {  
+                //find original spot of group[i]
+                int index = originalGroupMap[groups[j]];
+                newCounts[j] = counts[i][index];
+            }
+            counts[i] = newCounts;
+        }
+        hasGroups = true;
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "addGroup");
+               exit(1);
+       }
+}
+/************************************************************/
+//vector of groups for the seq
+vector<string> CountTable::getGroups(string seqName) {
+    try {
+        vector<string> thisGroups;
+        if (hasGroups) {
+            vector<int> thisCounts = getGroupCounts(seqName);
+            for (int i = 0; i < thisCounts.size(); i++) {  
+                if (thisCounts[i] != 0) {  thisGroups.push_back(groups[i]); }
+            } 
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return thisGroups;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getGroups");
+               exit(1);
+       }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::renameSeq(string oldSeqName, string newSeqName) {
+    try {
+        
+        map<string, int>::iterator it = indexNameMap.find(oldSeqName);
+        if (it == indexNameMap.end()) {
+            m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+        }else {  
+            int index = it->second;
+            indexNameMap.erase(it);
+            indexNameMap[newSeqName] = index;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "renameSeq");
+               exit(1);
+       }
+}
+
  /************************************************************/
  //total number of seqs represented by seq
  int CountTable::getNumSeqs(string seqName) {
@@ -213,6 +555,30 @@ int CountTable::push_back(string seqName) {
         }
  }
  /************************************************************/
+//remove sequence
+int CountTable::remove(string seqName) {
+    try {
+        map<string, int>::iterator it = indexNameMap.find(seqName);
+        if (it == indexNameMap.end()) {
+            uniques--;
+            if (hasGroups){ //remove this sequences counts from group totals
+                for (int i = 0; i < totalGroups.size(); i++) {  totalGroups[i] -= counts[it->second][i];  counts[it->second][i] = 0; }
+            }
+            int thisTotal = totals[it->second]; totals[it->second] = 0;
+            total -= thisTotal;
+            indexNameMap.erase(it);
+        }else {
+            m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true;
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "push_back");
+               exit(1);
+       }
+}
+/************************************************************/
  //add seqeunce without group info
  int CountTable::push_back(string seqName, int thisTotal) {
      try {
@@ -243,6 +609,7 @@ int CountTable::push_back(string seqName, vector<int> groupCounts) {
              if ((hasGroups) && (groupCounts.size() != getNumGroups())) {  m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
              int thisTotal = 0;
              for (int i = 0; i < getNumGroups(); i++) {   totalGroups[i] += groupCounts[i];  thisTotal += groupCounts[i]; }
+            if (hasGroups) {  counts.push_back(groupCounts);  }
              indexNameMap[seqName] = uniques;
              totals.push_back(thisTotal);
              total+= thisTotal;
@@ -293,7 +660,30 @@ vector<string> CountTable::getNamesOfSeqs() {
         }
  }
  /************************************************************/
-//returns names of seqs
+//returns the names of all unique sequences in file
+vector<string> CountTable::getNamesOfSeqs(string group) {
+    try {
+        vector<string> names;
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(group);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
+                    if (counts[it2->second][it->second] != 0) {  names.push_back(it2->first); }
+                }
+            }
+        }else{  m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n");  m->control_pressed = true; }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNamesOfSeqs");
+               exit(1);
+       }
+}
+/************************************************************/
+//merges counts of seq1 and seq2, saving in seq1
  int CountTable::mergeCounts(string seq1, string seq2) {
      try {
          map<string, int>::iterator it = indexNameMap.find(seq1);
@@ -305,17 +695,12 @@ int CountTable::mergeCounts(string seq1, string seq2) {
                  m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
              }else { 
                  //merge data
-                for (int i = 0; i < groups.size(); i++) {
-                    counts[it->second][i] += counts[it2->second][i];
-                    counts[it2->second][i] = 0;
-                }
+                for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; }
                  totals[it->second] += totals[it2->second];
-                totals[it2->second] = 0;
                  uniques--;
                  indexNameMap.erase(it2); 
              }
          }
-        
          return 0;
      }
         catch(exception& e) {
@@ -323,6 +708,25 @@ int CountTable::mergeCounts(string seq1, string seq2) {
                 exit(1);
         }
  }
+/************************************************************/
+int CountTable::copy(CountTable* ct) {
+    try {
+        vector<string> thisGroups = ct->getNamesOfGroups();
+        for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
+        vector<string> names = ct->getNamesOfSeqs();
+                                                               
+        for (int i = 0; i < names.size(); i++) {
+            vector<int> thisCounts = ct->getGroupCounts(names[i]);
+            push_back(names[i], thisCounts);
+        }
+                                                               
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "copy");
+               exit(1);
+       }
+}
  
  /************************************************************/
  
diff --git a/counttable.h b/counttable.h

index 04e26d775bb84042ea409e2d18694eeaa216c4df..68ba8d211cae1a12aa1d39a76f720a0998a87ad9 100644 (file)
--- a/counttable.h
+++ b/counttable.h
@@ -38,36 +38,49 @@
  
  #include "mothurout.h"
  #include "listvector.hpp"
+#include "groupmap.h"
  
  class CountTable {
      
      public:
      
-        CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; }
+        CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; uniques = 0; }
          ~CountTable() {}
      
-        int readTable(string);
+        int createTable(set<string>&, map<string, string>&, set<string>&); //seqNames, seqName->group, groupNames 
+        int createTable(string, string, bool); //namefile, groupfile, createGroup
+        int readTable(string);    
+        int printTable(string);
+        int printHeaders(ofstream&);
+        int printSeq(ofstream&, string);
          bool testGroups(string file); //used to check if file has group data without reading it.
+        int copy(CountTable*);
      
          bool hasGroupInfo() { return hasGroups; }
          int getNumGroups() { return groups.size(); }
          vector<string> getNamesOfGroups() {  return groups;   }  //returns group names, if no group info vector is blank.
+        int addGroup(string);
          
+        int renameSeq(string, string); //used to change name of sequence for use with trees
+        int setAbund(string, string, int); //set abundance number of seqs for that group for that seq
          int push_back(string); //add a sequence 
          int push_back(string, int); //add a sequence 
          int push_back(string, vector<int>); //add a sequence with group info
+        int remove(string); //remove seq
          int get(string); //returns unique sequence index for reading distance matrices like NameAssignment
          int size() { return indexNameMap.size(); }
      
+        vector<string> getGroups(string); //returns vector of groups represented by this sequences
          vector<int> getGroupCounts(string);  //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function.
          int getGroupCount(string, string); //returns number of seqs for that group for that seq
          int getGroupCount(string); // returns total seqs for that group
-        int getNumSeqs(string); //returns total seqs for that seq
+        int getNumSeqs(string); //returns total seqs for that seq, 0 if not found 
          int getNumSeqs() { return total; } //return total number of seqs
          int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs
          int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
      
          vector<string> getNamesOfSeqs();
+        vector<string> getNamesOfSeqs(string);
          int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
          ListVector getListVector();
      
diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp

index bab5a634cd101507957dabc22e34e429c5ec5b1b..b6f71c826066114849acfc04b5130e09e876462f 100644 (file)
--- a/deconvolutecommand.cpp
+++ b/deconvolutecommand.cpp
@@ -14,7 +14,8 @@
  vector<string> DeconvoluteCommand::setParameters(){    
         try {
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                 
@@ -31,7 +32,7 @@ vector<string> DeconvoluteCommand::setParameters(){
  string DeconvoluteCommand::getHelpString(){    
         try {
                 string helpString = "";
-               helpString += "The unique.seqs command reads a fastafile and creates a namesfile.\n";
+               helpString += "The unique.seqs command reads a fastafile and creates a name or count file.\n";
                 helpString += "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. \n";
                 helpString += "If the sequence is unique the second column will just contain its name. \n";
                 helpString += "The unique.seqs command parameters are fasta and name.  fasta is required, unless there is a valid current fasta file.\n";
@@ -56,6 +57,7 @@ string DeconvoluteCommand::getOutputFileNameTag(string type, string inputName=""
          else {
              if (type == "fasta") {  outputFileName =  "unique" + m->getExtension(inputName); }
              else if (type == "name") {  outputFileName =  "names"; }
+            else if (type == "count") {  outputFileName =  "count.table"; }
              else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
          }
          return outputFileName;
@@ -73,6 +75,7 @@ DeconvoluteCommand::DeconvoluteCommand(){
                 vector<string> tempOutNames;
                 outputTypes["fasta"] = tempOutNames;
                 outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "DeconvoluteCommand", "DeconvoluteCommand");
@@ -106,6 +109,7 @@ DeconvoluteCommand::DeconvoluteCommand(string option)  {
                         vector<string> tempOutNames;
                         outputTypes["fasta"] = tempOutNames;
                         outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                 
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -127,6 +131,14 @@ DeconvoluteCommand::DeconvoluteCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
  
                         
@@ -149,11 +161,21 @@ DeconvoluteCommand::DeconvoluteCommand(string option)  {
                         if (oldNameMapFName == "not open") { oldNameMapFName = ""; abort = true; }
                         else if (oldNameMapFName == "not found"){       oldNameMapFName = "";   }
                         else { m->setNameFile(oldNameMapFName); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
                         
-                       if (oldNameMapFName == "") {
-                               vector<string> files; files.push_back(inFastaName);
-                               parser.getNameFile(files);
-                       }
+            if ((countfile != "") && (oldNameMapFName != "")) { m->mothurOut("When executing a unique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+                       
+
+                       if (countfile == "") {
+                if (oldNameMapFName == "") {
+                    vector<string> files; files.push_back(inFastaName);
+                    parser.getNameFile(files);
+                }
+            }
                         
                 }
  
@@ -171,6 +193,7 @@ int DeconvoluteCommand::execute() {
  
                 //prepare filenames and open files
                 string outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("name");
+        string outCountFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("count");
                 string outFastaFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("fasta", inFastaName);
                 
                 map<string, string> nameMap;
@@ -179,6 +202,11 @@ int DeconvoluteCommand::execute() {
              m->readNames(oldNameMapFName, nameMap); 
              if (oldNameMapFName == outNameFile){ outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique." + getOutputFileNameTag("name");   }
          }
+        CountTable ct;
+        if (countfile != "")  {  
+            ct.readTable(countfile);
+            if (countfile == outCountFile){ outCountFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique." + getOutputFileNameTag("count");   }
+        }
                 
                 if (m->control_pressed) { return 0; }
                 
@@ -222,7 +250,10 @@ int DeconvoluteCommand::execute() {
                                                         sequenceStrings[seq.getAligned()] = itNames->second;
                                                         nameFileOrder.push_back(seq.getAligned());
                                                 }
-                                       }else { sequenceStrings[seq.getAligned()] = seq.getName();      nameFileOrder.push_back(seq.getAligned()); }
+                                       }else if (countfile != "") { 
+                        ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
+                        sequenceStrings[seq.getAligned()] = seq.getName();     nameFileOrder.push_back(seq.getAligned());
+                    }else {    sequenceStrings[seq.getAligned()] = seq.getName();      nameFileOrder.push_back(seq.getAligned()); }
                                 }else { //this is a dup
                                         if (oldNameMapFName != "") {
                                                 itNames = nameMap.find(seq.getName());
@@ -232,7 +263,12 @@ int DeconvoluteCommand::execute() {
                                                 }else {
                                                         sequenceStrings[seq.getAligned()] += "," + itNames->second;
                                                 }
-                                       }else { sequenceStrings[seq.getAligned()] += "," + seq.getName();       }
+                    }else if (countfile != "") { 
+                        int num = ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
+                        if (num != 0) { //its in the table
+                            ct.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name
+                        }
+                    }else {    sequenceStrings[seq.getAligned()] += "," + seq.getName();       }
                                 }
                                 
                                 count++;
@@ -252,34 +288,35 @@ int DeconvoluteCommand::execute() {
                 
                 //print new names file
                 ofstream outNames;
-               m->openOutputFile(outNameFile, outNames);
+               if (countfile == "") { m->openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile);  }
+        else { m->openOutputFile(outCountFile, outNames); ct.printHeaders(outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile); }
                 
                 for (int i = 0; i < nameFileOrder.size(); i++) {
-               //for (itStrings = sequenceStrings.begin(); itStrings != sequenceStrings.end(); itStrings++) {
-                       if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); outNames.close(); m->mothurRemove(outNameFile); return 0; }
+                       if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
                         
                         itStrings = sequenceStrings.find(nameFileOrder[i]);
                         
                         if (itStrings != sequenceStrings.end()) {
-                               //get rep name
-                               int pos = (itStrings->second).find_first_of(',');
-                       
-                               if (pos == string::npos) { // only reps itself
-                                       outNames << itStrings->second << '\t' << itStrings->second << endl;
-                               }else {
-                                       outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
-                               }
+                if (countfile == "") {
+                    //get rep name
+                    int pos = (itStrings->second).find_first_of(',');
+                    
+                    if (pos == string::npos) { // only reps itself
+                        outNames << itStrings->second << '\t' << itStrings->second << endl;
+                    }else {
+                        outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
+                    }
+                }else {  ct.printSeq(outNames, itStrings->second);  }
                         }else{ m->mothurOut("[ERROR]: mismatch in namefile print."); m->mothurOutEndLine(); m->control_pressed = true; }
                 }
                 outNames.close();
                 
-               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); m->mothurRemove(outNameFile); return 0; }
+               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); }  return 0; }
                 
                 m->mothurOutEndLine();
                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outFastaFile); m->mothurOutEndLine();      
-               m->mothurOut(outNameFile); m->mothurOutEndLine();
-               outputNames.push_back(outFastaFile);  outputNames.push_back(outNameFile); outputTypes["fasta"].push_back(outFastaFile);  outputTypes["name"].push_back(outNameFile); 
+               outputNames.push_back(outFastaFile);   outputTypes["fasta"].push_back(outFastaFile);  
+        for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                 m->mothurOutEndLine();
  
                 //set fasta file as new current fastafile
@@ -293,6 +330,11 @@ int DeconvoluteCommand::execute() {
                 if (itTypes != outputTypes.end()) {
                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
                 }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
                 
                 return 0;
         }
diff --git a/deconvolutecommand.h b/deconvolutecommand.h

index 7d4cb50023eb6312b652a8368d2d862882b3ffd4..673ffc9e4d2e689cd32c6b16867089fe43f216d1 100644 (file)
--- a/deconvolutecommand.h
+++ b/deconvolutecommand.h
@@ -11,6 +11,7 @@
  
  #include "command.hpp"
  #include "fastamap.h"
+#include "counttable.h"
  
  /* The unique.seqs command reads a fasta file, finds the duplicate sequences and outputs a names file
         containing 2 columns.  The first being the groupname and the second the list of identical sequence names. */ 
@@ -37,7 +38,7 @@ public:
         
         
  private:
-       string inFastaName, oldNameMapFName, outputDir;
+       string inFastaName, oldNameMapFName, outputDir, countfile;
         vector<string> outputNames;
  
         bool abort;
diff --git a/deuniquetreecommand.cpp b/deuniquetreecommand.cpp

index 662282b1d05d57e25ee2c259f178d00c5ecf96ca..d334f8f4ef107d12a57eaa4aff4d71b84808edb9 100644 (file)
--- a/deuniquetreecommand.cpp
+++ b/deuniquetreecommand.cpp
@@ -161,7 +161,8 @@ int DeuniqueTreeCommand::execute() {
                 
                 TreeReader* reader = new TreeReader(treefile, "", namefile);
          vector<Tree*> T = reader->getTrees();
-        map<string, string> nameMap = reader->getNameMap();
+        map<string, string> nameMap;
+        m->readNames(namefile, nameMap);
          delete reader;         
                 
                 //print new Tree
@@ -172,7 +173,7 @@ int DeuniqueTreeCommand::execute() {
                 T[0]->print(out, nameMap);
                 out.close();
                 
-        delete (T[0]->getTreeMap());
+        delete (T[0]->getCountTable());
                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
                                 
                 //set phylip file as new current phylipfile
diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp

index f98620b2ee0bd4cc70bc10d1db908d8de40de995..dc9f121a0e83758d4ecf4977294e8221b3da0712 100644 (file)
--- a/indicatorcommand.cpp
+++ b/indicatorcommand.cpp
@@ -287,17 +287,22 @@ int IndicatorCommand::execute(){
                         string groupfile = ""; 
                         m->setTreeFile(treefile);
                         Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
-                       treeMap = new TreeMap();
+                       ct = new CountTable();
                         bool mismatch = false;
-                               
-                       for (int i = 0; i < m->Treenames.size(); i++) { 
-                               //sanity check - is this a group that is not in the sharedfile?
+            
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < m->Treenames.size(); i++) { 
+                nameMap.insert(m->Treenames[i]); 
+                //sanity check - is this a group that is not in the sharedfile?
                                 if (designfile == "") {
+                    if (i == 0) { gps.insert("Group1"); }
                                         if (!(m->inUsersGroups(m->Treenames[i], m->getAllGroups()))) {
                                                 m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
                                                 mismatch = true;
                                         }
-                                       treeMap->addSeq(m->Treenames[i], "Group1"); 
+                                       groupMap[m->Treenames[i]] = "Group1"; 
                                 }else{
                                         vector<string> myGroups; myGroups.push_back(m->Treenames[i]);
                                         vector<string> myNames = designMap->getNamesSeqs(myGroups);
@@ -308,9 +313,10 @@ int IndicatorCommand::execute(){
                                                         mismatch = true;
                                                 }
                                         }
-                                       treeMap->addSeq(m->Treenames[i], "Group1");
+                                       groupMap[m->Treenames[i]] = "Group1";
                                 }
-                       }
+            }
+            ct->createTable(nameMap, groupMap, gps);
                         
                         if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; }
                                         
@@ -318,14 +324,14 @@ int IndicatorCommand::execute(){
                                 if (designfile != "") { delete designMap; }
                                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
-                               delete treeMap;
+                               delete ct;
                                 return 0;
                         }
                  
                         read = new ReadNewickTree(treefile);
-                       int readOk = read->read(treeMap); 
+                       int readOk = read->read(ct); 
                         
-                       if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; }
+                       if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete ct; delete read; return 0; }
                         
                         vector<Tree*> T = read->getTrees();
                         
@@ -335,19 +341,18 @@ int IndicatorCommand::execute(){
                                 if (designfile != "") { delete designMap; }
                                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
-                               for (int i = 0; i < T.size(); i++) {  delete T[i];  }  delete treeMap; return 0; 
+                               for (int i = 0; i < T.size(); i++) {  delete T[i];  }  delete ct; return 0; 
                         }
              
-                       map<string, string> nameMap;    
-                       T[0]->assembleTree(nameMap);
+                       T[0]->assembleTree();
                                         
                         /***************************************************/
                         //    create ouptut tree - respecting pickedGroups //
                         /***************************************************/
-                       Tree* outputTree = new Tree(m->getNumGroups(), treeMap); 
+                       Tree* outputTree = new Tree(m->getNumGroups(), ct); 
                         
                         outputTree->getSubTree(T[0], m->getGroups());
-                       outputTree->assembleTree(nameMap);
+                       outputTree->assembleTree();
                                 
                         //no longer need original tree, we have output tree to use and label
                         for (int i = 0; i < T.size(); i++) {  delete T[i];  } 
@@ -356,14 +361,14 @@ int IndicatorCommand::execute(){
                                 if (designfile != "") { delete designMap; }
                                 if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                                 else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
-                               delete outputTree; delete treeMap;  return 0; 
+                               delete outputTree; delete ct;  return 0; 
                         }
                         
                         /***************************************************/
                         //              get indicator species values                       //
                         /***************************************************/
                         GetIndicatorSpecies(outputTree);
-                       delete outputTree; delete treeMap;
+                       delete outputTree; delete ct;
                         
                 }else { //run with design file only
                         //get indicator species
diff --git a/indicatorcommand.h b/indicatorcommand.h

index 2c36c35f6f3025bb3f07760055512b2e0e257914..3c24dfb843e770567a1486436536b97fabfe12b5 100644 (file)
--- a/indicatorcommand.h
+++ b/indicatorcommand.h
@@ -12,7 +12,7 @@
  
  #include "command.hpp"
  #include "readtree.h"
-#include "treemap.h"
+#include "counttable.h"
  #include "sharedrabundvector.h"
  #include "sharedrabundfloatvector.h"
  #include "inputdata.h"
@@ -36,7 +36,7 @@ public:
         
  private:
         ReadTree* read;
-       TreeMap* treeMap;
+       CountTable* ct;
         GroupMap* designMap;
         string treefile, sharedfile, relabundfile, groups, label, inputFileName, outputDir, designfile;
         bool abort;
diff --git a/parsimony.cpp b/parsimony.cpp

index 3b0f31759c0563031874aa16cb0d3d515a65653b..6a0485c133b2868670e49e5873186e5d10df474d 100644 (file)
--- a/parsimony.cpp
+++ b/parsimony.cpp
@@ -15,7 +15,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
         try {
                 processors = p;
                 outputDir = o;
-        TreeMap* tmap = t->getTreeMap();
+        CountTable* ct = t->getCountTable();
                 
                 //if the users enters no groups then give them the score of all groups
                 vector<string> mGroups = m->getGroups();
@@ -38,7 +38,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
                         vector<string> groups;
                         if (numGroups == 0) {
                                 //get score for all users groups
-                               vector<string> tGroups = tmap->getNamesOfGroups();
+                               vector<string> tGroups = ct->getNamesOfGroups();
                                 for (int i = 0; i < tGroups.size(); i++) {
                                         if (tGroups[i] != "xxx") {
                                                 groups.push_back(tGroups[i]);
@@ -57,7 +57,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
                 
         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                 if(processors == 1){
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
                 }else{
                         lines.clear();
                         int numPairs = namesOfGroupCombos.size();
@@ -74,10 +74,10 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
                                 lines.push_back(linePair(startPos, numPairsPerProcessor));
                         }
                         
-                       data = createProcesses(t, namesOfGroupCombos, tmap);
+                       data = createProcesses(t, namesOfGroupCombos, ct);
                 }
         #else
-               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
         #endif
                 
                 return data;
@@ -90,7 +90,7 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) {
  }
  /**************************************************************************************************/
  
-EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
+EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                 int process = 1;
@@ -107,7 +107,7 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGr
                                 process++;
                         }else if (pid == 0){
                                 EstOutput myresults;
-                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
+                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
                                 
                                 if (m->control_pressed) { exit(0); }
                                 
@@ -127,7 +127,7 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGr
                         }
                 }
                 
-               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<processIDS.size();i++) { 
@@ -170,12 +170,12 @@ EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGr
         }
  }
  /**************************************************************************************************/
-EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) { 
+EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) { 
         try {
                 
                 EstOutput results; results.resize(num);
                 
-               Tree* copyTree = new Tree(tmap);
+               Tree* copyTree = new Tree(ct);
                 int count = 0;
                 
                 for (int h = start; h < (start+num); h++) {
diff --git a/parsimony.h b/parsimony.h

index 7316d508dd52729c8ddec87c34a08c76586cd4d1..bf0e0d4f90198ef353c2c1899414f699454c8a7e 100644 (file)
--- a/parsimony.h
+++ b/parsimony.h
@@ -12,7 +12,7 @@
   */
  
  #include "treecalculator.h"
-#include "treemap.h"
+#include "counttable.h"
  
  /***********************************************************************/
  
@@ -35,8 +35,8 @@ class Parsimony : public TreeCalculator  {
                 int processors;
                 string outputDir;
         
-               EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*); 
-               EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
+               EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*); 
+               EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
  };
  
  /***********************************************************************/
diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp

index f124b6002ee2c247db14716b4be81cb9ec10d8f2..eabbb5934445f92345eccd30f9bfb2139e652543 100644 (file)
--- a/parsimonycommand.cpp
+++ b/parsimonycommand.cpp
@@ -14,8 +14,9 @@
  vector<string> ParsimonyCommand::setParameters(){      
         try {
                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                 CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom);
                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
@@ -36,7 +37,7 @@ vector<string> ParsimonyCommand::setParameters(){
  string ParsimonyCommand::getHelpString(){      
         try {
                 string helpString = "";
-               helpString += "The parsimony command parameters are tree, group, name, random, groups, processors and iters.  tree parameter is required unless you have valid current tree file or are using random.\n";
+               helpString += "The parsimony command parameters are tree, group, name, count, random, groups, processors and iters.  tree parameter is required unless you have valid current tree file or are using random.\n";
                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
                 helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
@@ -145,6 +146,14 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -172,6 +181,20 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                                 if (namefile == "not open") { namefile = ""; abort = true; }
                                 else if (namefile == "not found") { namefile = ""; }
                                 else { m->setNameFile(namefile); }
+                
+                countfile = validParameter.validFile(parameters, "count", true);
+                if (countfile == "not open") { countfile = ""; abort = true; }
+                else if (countfile == "not found") { countfile = "";  }        
+                else { m->setCountTableFile(countfile); }
+                
+                if ((namefile != "") && (countfile != "")) {
+                    m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+                }
+                
+                if ((groupfile != "") && (countfile != "")) {
+                    m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+                }
+
                         }
                         
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -193,10 +216,12 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
                         
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile=="") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                } 
+            }
                         
                 }
  
@@ -219,9 +244,11 @@ int ParsimonyCommand::execute() {
                         
                         m->setTreeFile(treefile);
                         
-            TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+            TreeReader* reader;
+            if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+            else { reader = new TreeReader(treefile, countfile); }
              T = reader->getTrees();
-            tmap = T[0]->getTreeMap();
+            ct = T[0]->getCountTable();
              delete reader;
         
                         if(outputDir == "") { outputDir += m->hasPath(treefile); }
@@ -245,7 +272,7 @@ int ParsimonyCommand::execute() {
                 //set users groups to analyze
                 SharedUtil util;
                 vector<string> mGroups = m->getGroups();
-               vector<string> tGroups = tmap->getNamesOfGroups();
+               vector<string> tGroups = ct->getNamesOfGroups();
                 util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony");    //sets the groups the user wants to analyze
                 util.getCombos(groupComb, mGroups, numComp);
                 m->setGroups(mGroups);
@@ -260,7 +287,7 @@ int ParsimonyCommand::execute() {
                 
                 if (m->control_pressed) { 
                         delete reading; delete output;
-                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+                       delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
                         if (randomtree == "") {  outSum.close();  }
                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
                         m->clearGroups();
@@ -285,7 +312,7 @@ int ParsimonyCommand::execute() {
                                 
                                 if (m->control_pressed) { 
                                         delete reading; delete output;
-                                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+                                       delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
                                         if (randomtree == "") {  outSum.close();  }
                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
                                         m->clearGroups();
@@ -314,7 +341,7 @@ int ParsimonyCommand::execute() {
                         for (int j = 0; j < iters; j++) {
                                                                 
                                 //create new tree with same num nodes and leaves as users
-                               randT = new Tree(tmap);
+                               randT = new Tree(ct);
  
                                 //create random relationships between nodes
                                 randT->assembleRandomTree();
@@ -326,7 +353,7 @@ int ParsimonyCommand::execute() {
                                         delete reading;  delete output; delete randT;
                                         if (randomtree == "") {  outSum.close();  }
                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
-                                       delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+                                       delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
                                         m->clearGroups();
                                         return 0;
                                 }
@@ -355,13 +382,13 @@ int ParsimonyCommand::execute() {
                         for (int j = 0; j < iters; j++) {
                                                                 
                                 //create new tree with same num nodes and leaves as users
-                               randT = new Tree(tmap);
+                               randT = new Tree(ct);
                                 //create random relationships between nodes
  
                                 randT->assembleRandomTree();
                                 
                                 if (m->control_pressed) { 
-                                       delete reading; delete output; delete randT; delete tmap; 
+                                       delete reading; delete output; delete randT; delete ct; 
                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
                                 }
  
@@ -370,7 +397,7 @@ int ParsimonyCommand::execute() {
                                 randomData = pars.getValues(randT, processors, outputDir);
                                 
                                 if (m->control_pressed) { 
-                                       delete reading; delete output; delete randT; delete tmap; 
+                                       delete reading; delete output; delete randT; delete ct; 
                                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
                                 }
                         
@@ -424,7 +451,7 @@ int ParsimonyCommand::execute() {
                 
                 if (m->control_pressed) { 
                                 delete reading; delete output;
-                               delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+                               delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
                                 if (randomtree == "") {  outSum.close();  }
                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
                                 return 0;
@@ -437,7 +464,7 @@ int ParsimonyCommand::execute() {
                 printParsimonyFile();
                 if (randomtree == "") { printUSummaryFile(); }
                                 
-        delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+        delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
                 
                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
                 
@@ -529,7 +556,7 @@ void ParsimonyCommand::getUserInput() {
         try {
         
                 //create treemap
-               tmap = new TreeMap();
+               ct = new CountTable();
  
                 m->mothurOut("Please enter the number of groups you would like to analyze: ");
                 cin >> numGroups;
@@ -539,30 +566,31 @@ void ParsimonyCommand::getUserInput() {
                 count = 1;
                 numEachGroup.resize(numGroups, 0);  
                 
-               
+        set<string> nameMap;
+        map<string, string> groupMap;
+        set<string> gps;
+                
                 for (int i = 1; i <= numGroups; i++) {
                         m->mothurOut("Please enter the number of sequences in group " + toString(i) +  ": ");
                         cin >> num;
                         m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
-                               
-                       //set tmaps seqsPerGroup
-                       tmap->seqsPerGroup[toString(i)] = num;
-                       tmap->addGroup(toString(i));
                         
+            gps.insert(toString(i));
+            
                         //set tmaps namesOfSeqs
                         for (int j = 0; j < num; j++) {
-                               tmap->namesOfSeqs.push_back(toString(count));
-                               tmap->treemap[toString(count)].groupname = toString(i);
+                               groupMap[toString(count)] = i;
+                               nameMap.insert(toString(count));
                                 count++;
                         }
                 }
-               
+               ct->createTable(nameMap, groupMap, gps);
+        
                 //clears buffer so next command doesn't have error
                 string s;       
                 getline(cin, s);
                 
-               m->Treenames = tmap->namesOfSeqs; 
-               
+               m->Treenames = ct->getNamesOfSeqs(); 
         }
         catch(exception& e) {
                 m->errorOut(e, "ParsimonyCommand", "getUserInput");
diff --git a/parsimonycommand.h b/parsimonycommand.h

index 79613f560746e443697e4e92bfed755605a6fe4a..38a7505125e52b39ad37f36df592dfc9cac3b54c 100644 (file)
--- a/parsimonycommand.h
+++ b/parsimonycommand.h
@@ -11,7 +11,7 @@
  
  #include "command.hpp"
  #include "parsimony.h"
-#include "treemap.h"
+#include "counttable.h"
  #include "progress.hpp"
  #include "sharedutilities.h"
  #include "fileoutput.h"
@@ -41,10 +41,10 @@ private:
         vector<Tree*> T;           //user trees
         Tree* randT;  //random tree
         Tree* copyUserTree; 
-       TreeMap* tmap; 
-       TreeMap* savetmap;
+       CountTable* ct; 
+       CountTable* savect;
         vector<string> groupComb; // AB. AC, BC...
-       string sumFile, randomtree, allGroups, outputDir, treefile, groupfile, namefile;
+       string sumFile, randomtree, allGroups, outputDir, treefile, groupfile, namefile, countfile;
         int iters, numGroups, numComp, counter, processors, numUniquesInName;
         vector<int> numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib.
         vector< vector<float> > userTreeScores; //scores for users trees for each comb.
diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp

index ddd2b316d507b8477d84d529ae9fc4d88c69e518..b0c11f68ff26dad59634375c80c44ecf0dd0382b 100644 (file)
--- a/phylodiversitycommand.cpp
+++ b/phylodiversitycommand.cpp
@@ -15,8 +15,9 @@ vector<string> PhyloDiversityCommand::setParameters(){
         try {
  
                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                 CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
@@ -41,7 +42,7 @@ vector<string> PhyloDiversityCommand::setParameters(){
  string PhyloDiversityCommand::getHelpString(){ 
         try {
                 string helpString = "";
-               helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
+               helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary.  tree and group are required, unless you have valid current files.\n";
                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
                 helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
                 helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
@@ -156,6 +157,14 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
                         //check for required parameters
@@ -179,6 +188,19 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
                         
                         string temp;
@@ -214,10 +236,12 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                         
                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
                         
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile=="") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                } 
+            }
                 }
                 
         }
@@ -236,14 +260,16 @@ int PhyloDiversityCommand::execute(){
          int start = time(NULL);
          
                 m->setTreeFile(treefile);
-        TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+        TreeReader* reader;
+        if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+        else { reader = new TreeReader(treefile, countfile); }
          vector<Tree*> trees = reader->getTrees();
-        tmap = trees[0]->getTreeMap();
+        ct = trees[0]->getCountTable();
          delete reader;
  
                 SharedUtil util;
                 vector<string> mGroups = m->getGroups();
-               vector<string> tGroups = tmap->getNamesOfGroups();
+               vector<string> tGroups = ct->getNamesOfGroups();
                 util.setGroups(mGroups, tGroups, "phylo.diversity");    //sets the groups the user wants to analyze
                 
                 //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
@@ -255,7 +281,7 @@ int PhyloDiversityCommand::execute(){
                 //for each of the users trees
                 for(int i = 0; i < trees.size(); i++) {
                 
-                       if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } return 0; }
+                       if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } return 0; }
                         
                         ofstream outSum, outRare, outCollect;
                         string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile))  + toString(i+1) + "." + getOutputFileNameTag("summary");
@@ -286,15 +312,16 @@ int PhyloDiversityCommand::execute(){
                         
                         //find largest group total 
                         int largestGroup = 0;
-                       for (int j = 0; j < mGroups.size(); j++) {  
-                               if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
+                       for (int j = 0; j < mGroups.size(); j++) { 
+                int numSeqsThisGroup = ct->getGroupCount(mGroups[j]);
+                               if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; }
                                 
                                 //initialize diversity
-                               diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);            //numSampled
+                               diversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);          //numSampled
                                                                                                                                                                                                                         //groupA                0.0                     0.0
                                                                                                                                                                                                                         
                                 //initialize sumDiversity
-                               sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
+                               sumDiversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);
                         }       
  
                         //convert freq percentage to number
@@ -649,7 +676,7 @@ map<string, int> PhyloDiversityCommand::getRootForGroups(Tree* t){
                 map<string, bool> done;
         
                 //initialize root for all groups to -1
-               for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; }
+               for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; }
          
          for (int i = 0; i < t->getNumLeaves(); i++) {
              
diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h

index 95276921149b5237a923928bc9f23a5e8e7aed1b..ee76f05697cfce07213857432c9d786b150fc033 100644 (file)
--- a/phylodiversitycommand.h
+++ b/phylodiversitycommand.h
@@ -11,7 +11,7 @@
   */
  
  #include "command.hpp"
-#include "treemap.h"
+#include "counttable.h"
  #include "sharedutilities.h"
  #include "tree.h"
  
@@ -33,11 +33,11 @@ class PhyloDiversityCommand : public Command {
                 int execute();
                 void help() { m->mothurOut(getHelpString()); }
  private:
-               TreeMap* tmap;
+               CountTable* ct;
                 float freq;
                 int iters, processors, numUniquesInName;  
                 bool abort, rarefy, summary, collect, scale;
-               string groups, outputDir, treefile, groupfile, namefile;
+               string groups, outputDir, treefile, groupfile, namefile, countfile;
                 vector<string> Groups, outputNames; //holds groups to be used, and outputFile names
                 
          map<string, int> getRootForGroups(Tree* t);
diff --git a/phylosummary.cpp b/phylosummary.cpp

index 5f7bbc3c73a2161417a1841f567719a4bd1f8c4a..ab6bb831dfae73bb56c75eb8b144963701dc02be 100644 (file)
--- a/phylosummary.cpp
+++ b/phylosummary.cpp
@@ -8,21 +8,68 @@
   */
  
  #include "phylosummary.h"
-
  /**************************************************************************************************/
  
-PhyloSummary::PhyloSummary(string refTfile, string groupFile){
+PhyloSummary::PhyloSummary(string refTfile, CountTable* c){
         try {
                 m = MothurOut::getInstance();
                 maxLevel = 0;
                 ignore = false;
+        numSeqs = 0;
+               
+               ct = c;
+        groupmap = NULL;
+        
+               //check for necessary files
+               string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
+               ifstream FileTest(taxFileNameTest.c_str());
                 
-               if (groupFile != "") {
-                       groupmap = new GroupMap(groupFile);
-                       groupmap->readMap();
+               if (!FileTest) { 
+                       m->mothurOut("Error: can't find " + taxFileNameTest + "."); m->mothurOutEndLine(); exit(1);
                 }else{
-                       groupmap = NULL;
+                       readTreeStruct(FileTest);
                 }
+               
+               tree[0].rank = "0";
+               assignRank(0);
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloSummary", "PhyloSummary");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+PhyloSummary::PhyloSummary(CountTable* c){
+       try {
+               m = MothurOut::getInstance();
+               maxLevel = 0;
+               ignore = true;
+        numSeqs = 0;
+               
+               ct = c;
+        groupmap = NULL;
+               
+               tree.push_back(rawTaxNode("Root"));
+               tree[0].rank = "0";
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloSummary", "PhyloSummary");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+PhyloSummary::PhyloSummary(string refTfile, GroupMap* g){
+       try {
+               m = MothurOut::getInstance();
+               maxLevel = 0;
+               ignore = false;
+        numSeqs = 0;
+               
+               groupmap = g;
+        ct = NULL;
                                 
                 //check for necessary files
                 string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
@@ -46,23 +93,18 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){
  
  /**************************************************************************************************/
  
-PhyloSummary::PhyloSummary(string groupFile){
+PhyloSummary::PhyloSummary(GroupMap* g){
         try {
                 m = MothurOut::getInstance();
                 maxLevel = 0;
                 ignore = true;
+        numSeqs = 0;
                 
-               if (groupFile != "") {
-                       groupmap = new GroupMap(groupFile);
-                       groupmap->readMap();
-               }else{
-                       groupmap = NULL;
-               }
+               groupmap = g;
+        ct = NULL;
                 
                 tree.push_back(rawTaxNode("Root"));
                 tree[0].rank = "0";
-               
-               
         }
         catch(exception& e) {
                 m->errorOut(e, "PhyloSummary", "PhyloSummary");
@@ -78,7 +120,6 @@ int PhyloSummary::summarize(string userTfile){
          
          for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
              addSeqToTree(itTemp->first, itTemp->second);
-                       numSeqs++;
              temp.erase(itTemp++);
          }
          
@@ -137,7 +178,9 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                         childPointer = tree[currentNode].children.find(taxon);
                         
                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, update count and move on
-                               if (groupmap != NULL) {
+                               int thisCount = 1;
+                
+                if (groupmap != NULL) {
                                         //find out the sequences group
                                         string group = groupmap->getGroup(seqName);
                                         
@@ -150,9 +193,27 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                         if (itGroup != tree[childPointer->second].groupCount.end()) {
                                                 tree[childPointer->second].groupCount[group]++;
                                         }
-                               }
+                               }else if (ct != NULL) {
+                    if (ct->hasGroupInfo()) {
+                        vector<int> groupCounts = ct->getGroupCounts(seqName);
+                        vector<string> groups = ct->getNamesOfGroups();
+                        for (int i = 0; i < groups.size(); i++) {
+                            
+                            if (groupCounts[i] != 0) {
+                                //do you have a count for this group?
+                                map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(groups[i]);
+                                
+                                //if yes, increment it - there should not be a case where we can't find it since we load group in read
+                                if (itGroup != tree[childPointer->second].groupCount.end()) {
+                                    tree[childPointer->second].groupCount[groups[i]] += groupCounts[i];
+                                }
+                            }
+                        }
+                    }
+                    thisCount = ct->getNumSeqs(seqName);
+                }
                                 
-                               tree[childPointer->second].total++;
+                               tree[childPointer->second].total += thisCount;
  
                                 currentNode = childPointer->second;
                         }else{  
@@ -163,8 +224,8 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                 
                                         tree[index].parent = currentNode;
                                         tree[index].level = (level+1);
-                                       tree[index].total = 1;
                                         tree[currentNode].children[taxon] = index;
+                    int thisCount = 1;
                                         
                                         //initialize groupcounts
                                         if (groupmap != NULL) {
@@ -184,9 +245,33 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                                 //if yes, increment it - there should not be a case where we can't find it since we load group in read
                                                 if (itGroup != tree[index].groupCount.end()) {
                                                         tree[index].groupCount[group]++;
-                                               }                                               
-                                       }
+                                               }
+                                       }else if (ct != NULL) {
+                        if (ct->hasGroupInfo()) {
+                            vector<string> mGroups = ct->getNamesOfGroups();
+                            for (int j = 0; j < mGroups.size(); j++) {
+                                tree[index].groupCount[mGroups[j]] = 0;
+                            }
+                            vector<int> groupCounts = ct->getGroupCounts(seqName);
+                            vector<string> groups = ct->getNamesOfGroups();
+                        
+                            for (int i = 0; i < groups.size(); i++) {
+                                if (groupCounts[i] != 0) {
+                                   
+                                    //do you have a count for this group?
+                                    map<string, int>::iterator itGroup = tree[index].groupCount.find(groups[i]);
+                                     
+                                    //if yes, increment it - there should not be a case where we can't find it since we load group in read
+                                    if (itGroup != tree[index].groupCount.end()) {
+                                        tree[index].groupCount[groups[i]]+=groupCounts[i];
+                                    }
+                                }
+                            }
+                        }
+                        thisCount = ct->getNumSeqs(seqName);
+                    }
                                         
+                    tree[index].total = thisCount;
                                         currentNode = index;
                                         
                                 }else{ //otherwise, error
@@ -210,7 +295,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
  }
  /**************************************************************************************************/
  
-int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
+int PhyloSummary::addSeqToTree(string seqTaxonomy, map<string, bool> containsGroup){
         try {
                 numSeqs++;
                 
@@ -235,32 +320,12 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                         childPointer = tree[currentNode].children.find(taxon);
                         
                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, update count and move on
-                               if (groupmap != NULL) {
-                                       
-                                       map<string, bool> containsGroup; 
-                                       vector<string> mGroups = groupmap->getNamesOfGroups();
-                                       for (int j = 0; j < mGroups.size(); j++) {
-                                               containsGroup[mGroups[j]] = false;
-                                       }
-                                       
-                                       for (int k = 0; k < names.size(); k++) {
-                                               //find out the sequences group
-                                               string group = groupmap->getGroup(names[k]);
-                                       
-                                               if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
-                                               else {
-                                                       containsGroup[group] = true;
-                                               }
-                                       }
+                for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+                    if (itGroup->second == true) {
+                        tree[childPointer->second].groupCount[itGroup->first]++;
+                    }
+                }
                                         
-                                       for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
-                                               if (itGroup->second == true) {
-                                                       tree[childPointer->second].groupCount[itGroup->first]++;
-                                               }
-                                       }
-                                       
-                               }
-                               
                                 tree[childPointer->second].total++;
                                 
                                 currentNode = childPointer->second;
@@ -274,33 +339,12 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                                         tree[index].level = (level+1);
                                         tree[index].total = 1;
                                         tree[currentNode].children[taxon] = index;
-                                       
-                                       //initialize groupcounts
-                                       if (groupmap != NULL) {
-                                               map<string, bool> containsGroup; 
-                                               vector<string> mGroups = groupmap->getNamesOfGroups();
-                                               for (int j = 0; j < mGroups.size(); j++) {
-                                                       tree[index].groupCount[mGroups[j]] = 0;
-                                                       containsGroup[mGroups[j]] = false;
-                                               }
-                                               
                                                 
-                                               for (int k = 0; k < names.size(); k++) {
-                                                       //find out the sequences group
-                                                       string group = groupmap->getGroup(names[k]);
-                                                       
-                                                       if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
-                                                       else {
-                                                               containsGroup[group] = true;
-                                                       }
-                                               }
-                                               
-                                               for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
-                                                       if (itGroup->second == true) {
-                                                               tree[index].groupCount[itGroup->first]++;
-                                                       }
-                                               }
-                                       }
+                    for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+                        if (itGroup->second == true) {
+                            tree[index].groupCount[itGroup->first]++;
+                        }
+                    }
                                         
                                         currentNode = index;
                                         
@@ -349,17 +393,24 @@ void PhyloSummary::print(ofstream& out){
         try {
                 
                 if (ignore) { assignRank(0); }
-       
+        vector<string> mGroups;
                 //print labels
                 out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
                 if (groupmap != NULL) {
                         //so the labels match the counts below, since the map sorts them automatically...
                         //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
-                       vector<string> mGroups = groupmap->getNamesOfGroups();
+            mGroups = groupmap->getNamesOfGroups();
                         for (int i = 0; i < mGroups.size(); i++) {
                                 out << mGroups[i] << '\t';
                         }
-               }
+               }else if (ct != NULL) {
+            if (ct->hasGroupInfo()) {
+                mGroups = ct->getNamesOfGroups();
+                for (int i = 0; i < mGroups.size(); i++) {
+                    out << mGroups[i] << '\t';
+                }
+            }
+        }
                 
                 out << endl;
                 
@@ -373,9 +424,10 @@ void PhyloSummary::print(ofstream& out){
                                 tree[0].total += tree[it->second].total;
                                 
                                 if (groupmap != NULL) {
-                                       vector<string> mGroups = groupmap->getNamesOfGroups();
                                         for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } 
-                               }
+                               }else if ( ct != NULL) {
+                    if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } }
+                }
                         }
                 }
                 
@@ -384,12 +436,10 @@ void PhyloSummary::print(ofstream& out){
                 
                 
                 if (groupmap != NULL) {
-                       //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
-                       //      out << itGroup->second << '\t';
-                       //}
-                       vector<string> mGroups = groupmap->getNamesOfGroups();
-                       for (int i = 0; i < mGroups.size(); i++) {  out << tree[0].groupCount[mGroups[i]] << '\t'; } 
-               }
+                       for (int i = 0; i < mGroups.size(); i++) {  out << tree[0].groupCount[mGroups[i]] << '\t'; }  
+        }else if ( ct != NULL) {
+            if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) {  out << tree[0].groupCount[mGroups[i]] << '\t'; } }
+        }
                 out << endl;
                 
                 //print rest
@@ -427,7 +477,12 @@ void PhyloSummary::print(int i, ofstream& out){
                                         //}
                                         vector<string> mGroups = groupmap->getNamesOfGroups();
                                         for (int i = 0; i < mGroups.size(); i++) {  out << tree[it->second].groupCount[mGroups[i]] << '\t'; } 
-                               }
+                               }else if (ct != NULL) {
+                    if (ct->hasGroupInfo()) {
+                        vector<string> mGroups = ct->getNamesOfGroups();
+                        for (int i = 0; i < mGroups.size(); i++) {  out << tree[it->second].groupCount[mGroups[i]] << '\t'; } 
+                    }
+                }
                                 out << endl;
                                 
                         }
@@ -473,7 +528,13 @@ void PhyloSummary::readTreeStruct(ifstream& in){
                                 for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) {
                                         tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0;
                                 }
-                       }
+                       }else if (ct != NULL) {
+                if (ct->hasGroupInfo()) {
+                    for (int j = 0; j < (ct->getNamesOfGroups()).size(); j++) {
+                        tree[i].groupCount[(ct->getNamesOfGroups())[j]] = 0;
+                    }
+                }
+            }
                         
                         tree[i].total = 0;
                         
diff --git a/phylosummary.h b/phylosummary.h

index cdec0d0a4ac7ca83cb0845988adb90152bcae07b..65a467483ddab1ae58757629644162dc83bab254 100644 (file)
--- a/phylosummary.h
+++ b/phylosummary.h
@@ -13,6 +13,7 @@
  #include "mothur.h"
  #include "mothurout.h"
  #include "groupmap.h"
+#include "counttable.h"
  
  /**************************************************************************************************/
  
@@ -32,13 +33,15 @@ struct rawTaxNode {
  class PhyloSummary {
  
  public:
-       PhyloSummary(string);
-       PhyloSummary(string, string);
-       ~PhyloSummary() { if (groupmap != NULL)  {  delete groupmap;  }  }
+       PhyloSummary(GroupMap*);
+       PhyloSummary(string, GroupMap*);
+    PhyloSummary(CountTable*);
+       PhyloSummary(string, CountTable*);
+       ~PhyloSummary() {}
         
         int summarize(string);  //pass it a taxonomy file and a group file and it makes the tree
         int addSeqToTree(string, string);
-       int addSeqToTree(string, vector<string>);
+       int addSeqToTree(string, map<string, bool>);
         void print(ofstream&);
         int getMaxLevel() { return maxLevel; }
         
@@ -49,6 +52,7 @@ private:
         void assignRank(int);
         void readTreeStruct(ifstream&);
         GroupMap* groupmap;
+    CountTable* ct;
         bool ignore;
         
         int numNodes;
diff --git a/readtree.cpp b/readtree.cpp

index 6fa4c3da814347cc60c0807bd16cb373fff8c962..71c4bd5b96fcd1cc435f2954616899fdde3eedb8 100644 (file)
--- a/readtree.cpp
+++ b/readtree.cpp
@@ -20,12 +20,12 @@ ReadTree::ReadTree() {
         }
  }
  /***********************************************************************/
-int ReadTree::AssembleTrees(map<string, string> nameMap) {
+int ReadTree::AssembleTrees() {
          try {
                  //assemble users trees
                  for (int i = 0; i < Trees.size(); i++) {
                          if (m->control_pressed) { return 0;  }
-                        Trees[i]->assembleTree(nameMap);
+                        Trees[i]->assembleTree();
                  }
                  return 0;
          }
@@ -107,7 +107,7 @@ float ReadTree::readBranchLength(istream& f) {
  /***********************************************************************/
  //This class reads a file in Newick form and stores it in a tree.
  
-int ReadNewickTree::read(TreeMap* tmap) {
+int ReadNewickTree::read(CountTable* ct) {
         try {
                 holder = "";
                 int c, error;
@@ -129,12 +129,12 @@ int ReadNewickTree::read(TreeMap* tmap) {
                                 }
  
                                 //make new tree
-                               T = new Tree(tmap); 
+                               T = new Tree(ct); 
  
                                 numNodes = T->getNumNodes();
                                 numLeaves = T->getNumLeaves();
                                 
-                               error = readTreeString(tmap); 
+                               error = readTreeString(ct); 
                                 
                                 //save trees for later commands
                                 Trees.push_back(T); 
@@ -143,9 +143,9 @@ int ReadNewickTree::read(TreeMap* tmap) {
                 //if you are a nexus file
                 }else if ((c = filehandle.peek()) == '#') {
                         //get right number of seqs from nexus file.
-                       Tree* temp = new Tree(tmap);  delete temp;
+                       Tree* temp = new Tree(ct);  delete temp;
                         
-                       nexusTranslation(tmap);  //reads file through the translation and updates treemap
+                       nexusTranslation(ct);  //reads file through the translation and updates treemap
                         while((c = filehandle.peek()) != EOF) { 
                                 // get past comments
                                 while ((c = filehandle.peek()) != EOF) {        
@@ -166,12 +166,12 @@ int ReadNewickTree::read(TreeMap* tmap) {
                                 filehandle.putback(c);  //put back first ( of tree.
                                 
                                 //make new tree
-                               T = new Tree(tmap); 
+                               T = new Tree(ct); 
                                 numNodes = T->getNumNodes();
                                 numLeaves = T->getNumLeaves();
                                 
                                 //read tree info
-                               error = readTreeString(tmap); 
+                               error = readTreeString(ct); 
                                  
                                 //save trees for later commands
                                 Trees.push_back(T); 
@@ -191,7 +191,7 @@ int ReadNewickTree::read(TreeMap* tmap) {
  }
  /**************************************************************************************************/
  //This function read the file through the translation of the sequences names and updates treemap.
-string ReadNewickTree::nexusTranslation(TreeMap* tmap) {
+string ReadNewickTree::nexusTranslation(CountTable* ct) {
         try {
                 
                 holder = "";
@@ -209,42 +209,14 @@ string ReadNewickTree::nexusTranslation(TreeMap* tmap) {
                         filehandle >> holder; 
                         if(holder == "tree" && comment != 1){return holder;}
                 }
-               
-               //update treemap
-               tmap->namesOfSeqs.clear();
-               
-               /*char c;
-               string number, name;
-               while ((c = filehandle.peek()) != EOF) {        
-                       
-                       filehandle >> number; 
-                       
-                       if ((number == "tree") || (number == ";") ) {  name = number; break;  }
-                       
-                       filehandle >> name; 
-                       
-                       char lastChar;
-                       if (name.length() != 0) { lastChar = name[name.length()-1]; }
-                       
-                       if ((name == "tree") || (name == ";") ) {  break;  }
-                       
-                       if (lastChar == ',') {  name.erase(name.end()-1); } //erase the comma
-                       */      
-               
+    
                 string number, name;
                 for(int i=0;i<numSeqs;i++){
                         
                         filehandle >> number;
                         filehandle >> name;
                         name.erase(name.end()-1);  //erase the comma
-                       
-                       //insert new one with new name
-                       string group = tmap->getGroup(name);
-                       tmap->treemap[toString(number)].groupname = group;
-                       tmap->treemap[toString(number)].vectorIndex = tmap->getIndex(name);
-                       //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
-                       tmap->treemap.erase(name);
-                       tmap->namesOfSeqs.push_back(number);
+                       ct->renameSeq(name, toString(number));
                 }
                 
                 return name;
@@ -256,7 +228,7 @@ string ReadNewickTree::nexusTranslation(TreeMap* tmap) {
  }
  
  /**************************************************************************************************/
-int ReadNewickTree::readTreeString(TreeMap* tmap) {
+int ReadNewickTree::readTreeString(CountTable* ct) {
         try {
                 
                 int n = 0;
@@ -269,7 +241,7 @@ int ReadNewickTree::readTreeString(TreeMap* tmap) {
                 if(ch == '('){
                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
  
-                       lc = readNewickInt(filehandle, n, T, tmap);
+                       lc = readNewickInt(filehandle, n, T, ct);
                         if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
         
                         if(filehandle.peek()==','){                                                     
@@ -281,7 +253,7 @@ int ReadNewickTree::readTreeString(TreeMap* tmap) {
                         }       
                 
                         if(rooted != 1){                                                                
-                               rc = readNewickInt(filehandle, n, T, tmap);
+                               rc = readNewickInt(filehandle, n, T, ct);
                                 if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
                                 if(filehandle.peek() == ')'){                                   
                                         readSpecialChar(filehandle,')',"right parenthesis");
@@ -326,7 +298,7 @@ int ReadNewickTree::readTreeString(TreeMap* tmap) {
  }
  /**************************************************************************************************/
  
-int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
+int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, CountTable* ct) {
         try {
                 
                 if (m->control_pressed) { return -1; } 
@@ -339,7 +311,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
                         //read all children
                         vector<int> childrenNodes;
                         while(f.peek() != ')'){
-                               int child = readNewickInt(f, n, T, tmap);
+                               int child = readNewickInt(f, n, T, ct);
                                 if (child == -1) { return -1; } //reports an error in reading
                 //cout << "child = " << child << endl;          
                                 childrenNodes.push_back(child);
@@ -387,12 +359,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
                         }else{
                                 T->tree[n].setBranchLength(0.0); 
                         }                                               
-                       
-                       //T->tree[n].setChildren(lc,rc);
-                       //T->tree[lc].setParent(n);
-                       //T->tree[rc].setParent(n);
-                       //T->printTree();  cout << endl;
-                       
+                                               
                         return n++;
                 
                 }else{
@@ -410,33 +377,27 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
                         f.putback(d);
                 
                         //set group info
-                       string group = tmap->getGroup(name);
+                       vector<string> group = ct->getGroups(name);
                         
                         //find index in tree of name
                         int n1 = T->getIndex(name);
                         
                         //adds sequence names that are not in group file to the "xxx" group
-                       if(group == "not found") {
+                       if(group.size() == 0) {
                                 m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n");  //readOk = -1; return n1;
                                 
-                               tmap->namesOfSeqs.push_back(name);
-                               tmap->treemap[name].groupname = "xxx";
-                               
-                               map<string, int>::iterator it;
-                               it = tmap->seqsPerGroup.find("xxx");
-                               if (it == tmap->seqsPerGroup.end()) { //its a new group
-                                       tmap->addGroup("xxx");
-                                       tmap->seqsPerGroup["xxx"] = 1;
-                               }else {
-                                       tmap->seqsPerGroup["xxx"]++;
-                               }
-                               
-                               group = "xxx";
-                       }
-                       
-                       vector<string> tempGroup; tempGroup.push_back(group);
-                       
-                       T->tree[n1].setGroup(tempGroup);
+                vector<string> currentGroups = ct->getNamesOfGroups();
+                if (!m->inUsersGroups("xxx", currentGroups)) {  ct->addGroup("xxx");  }
+                currentGroups = ct->getNamesOfGroups();
+                vector<int> thisCounts; thisCounts.resize(currentGroups.size(), 0);
+                for (int h = 0; h < currentGroups.size(); h++) {  
+                    if (currentGroups[h] == "xxx") {  thisCounts[h] = 1;  break; }
+                }
+                ct->push_back(name, thisCounts);
+                
+                               group.push_back("xxx");
+                       }                       
+                       T->tree[n1].setGroup(group);
                         T->tree[n1].setChildren(-1,-1);
                 
                         if(blen == 1){  
diff --git a/readtree.h b/readtree.h

index 6b074de839070a86bd3e010bbf222a7ec4011dce..8a692432c7cda431c6f5d73f409a0fa8e9152671 100644 (file)
--- a/readtree.h
+++ b/readtree.h
@@ -11,6 +11,7 @@
  
  #include "mothur.h"
  #include "tree.h"
+#include "counttable.h"
  
  #define MAX_LINE               513
  #define SKIPLINE(f,c)  {while((c=f.get())!=EOF && ((c) != '\n')){}}
@@ -24,17 +25,17 @@ class ReadTree {
                 ReadTree(); 
                 virtual ~ReadTree() {};
                 
-               virtual int read(TreeMap*) = 0;
+               virtual int read(CountTable*) = 0;
                 int readSpecialChar(istream&, char, string);
                 int readNodeChar(istream& f);
                 float readBranchLength(istream& f);
         
                 vector<Tree*> getTrees() { return Trees; }
-               int AssembleTrees(map<string, string>);
+               int AssembleTrees();
                 
         protected:
                 vector<Tree*> Trees;
-               TreeMap* treeMap;
+               CountTable* ct;
                 int numNodes, numLeaves;
                 MothurOut* m;
                 
@@ -48,13 +49,13 @@ class ReadNewickTree : public ReadTree {
  public:
         ReadNewickTree(string file) : treeFile(file) { m->openInputFile(file, filehandle); readOk = 0; } 
         ~ReadNewickTree() {};
-       int read(TreeMap*);
+       int read(CountTable*);
         
  private:
         Tree* T;
-       int readNewickInt(istream&, int&, Tree*, TreeMap*);
-       int readTreeString(TreeMap*);
-       string nexusTranslation(TreeMap*);
+       int readNewickInt(istream&, int&, Tree*, CountTable*);
+       int readTreeString(CountTable*);
+       string nexusTranslation(CountTable*);
         ifstream filehandle;
         string treeFile;
         string holder;
diff --git a/secondarystructurecommand.cpp b/secondarystructurecommand.cpp

index ee50ab14e6183af4b2fe9979c97336ac1d9b75a5..869df029fdd35efa6c3e9a9a22d46c6c4fea6667 100644 (file)
--- a/secondarystructurecommand.cpp
+++ b/secondarystructurecommand.cpp
@@ -15,7 +15,8 @@ vector<string> AlignCheckCommand::setParameters(){
         try {
                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                 CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pmap);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+        CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                 
                 vector<string> myArray;
@@ -31,7 +32,7 @@ vector<string> AlignCheckCommand::setParameters(){
  string AlignCheckCommand::getHelpString(){     
         try {
                 string helpString = "";
-               helpString += "The align.check command reads a fasta file and map file.\n";
+               helpString += "The align.check command reads a fasta file and map file as well as an optional name file.\n";
                 helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
                 helpString += "The align.check command parameters are fasta and map, both are required.\n";
                 helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
diff --git a/sensspeccommand.cpp b/sensspeccommand.cpp

index cfa1f5b6dc0649925ff4762abdef070e93fd7688..b62bb00548f3e1ee66f0c1cf2b4bee117c5416cf 100644 (file)
--- a/sensspeccommand.cpp
+++ b/sensspeccommand.cpp
@@ -14,7 +14,6 @@ vector<string> SensSpecCommand::setParameters(){
         try {
                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
                 CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
-               //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
                 CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                 CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
@@ -136,16 +135,7 @@ SensSpecCommand::SensSpecCommand(string option)  {
                                         path = m->hasPath(it->second);
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
-                               }
-                               
-                               //it = parameters.find("name");
-                               //user has given a template file
-                               //if(it != parameters.end()){ 
-                                       //path = m->hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       //if (path == "") {     parameters["name"] = inputDir + it->second;             }
-                               //}
-                               
+                               }                               
                         }
                         //check for required parameters
                         listFile = validParameter.validFile(parameters, "list", true);
@@ -196,12 +186,6 @@ SensSpecCommand::SensSpecCommand(string option)  {
                         else if(!m->isTrue(temp))       {       hard = 0;       }
                         else if(m->isTrue(temp))        {       hard = 1;       }
                         
-//                     temp = validParameter.validFile(parameters, "name", true);
-//                     if (temp == "not found")        {       nameFile = "";          }
-//                     else if(temp == "not open")     {       abort = true;           }
-//                     else                                            {       nameFile = temp;        }
-//                     cout << "name:\t" << nameFile << endl;
-
                         temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found") { temp = "-1.00"; }
                         m->mothurConvert(temp, cutoff);  
  //                     cout << cutoff << endl;
diff --git a/shhhercommand.cpp b/shhhercommand.cpp

index fe0685bea19678213fa0ddb8ccfade9ad08c4b12..19ffc899d60ac778ac4eedbeb2034a62b5e04bd7 100644 (file)
--- a/shhhercommand.cpp
+++ b/shhhercommand.cpp
@@ -1931,7 +1931,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
  
  int ShhherCommand::execute(){
         try {
-               if (abort == true) { return 0; }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 
                 getSingleLookUp();      if (m->control_pressed) { return 0; }
                 getJointLookUp();       if (m->control_pressed) { return 0; }
diff --git a/subsample.cpp b/subsample.cpp

index 261297df67cfc1c5a8933a72f8bba307defbef88..c55accd618991279bb3a1a0ff6d29e0d0250a81e 100644 (file)
--- a/subsample.cpp
+++ b/subsample.cpp
@@ -8,62 +8,54 @@
  
  #include "subsample.h"
  //**********************************************************************************************************************
-Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map<string, string> originalNameMap) {
+Tree* SubSample::getSample(Tree* T, CountTable* ct, CountTable* newCt, int size) {
      try {
          Tree* newTree = NULL;
          
-        map<string, vector<string> > newGroups;
-        vector<string> subsampledSeqs = getSample(tmap, size, newGroups);
+        //remove seqs not in sample from counttable
+        vector<string> Groups = ct->getNamesOfGroups();
+        newCt->copy(ct); 
+        newCt->addGroup("doNotIncludeMe");
          
-        //remove seqs not in sample from treemap
-        for (map<string, vector<string> >::iterator it = newGroups.begin(); it != newGroups.end(); it++) {
-            for (int i = 0; i < (it->second).size(); i++) {
-                newTmap->addSeq((it->second)[i], it->first);
-            }
-        }
-        
-        newTree = new Tree(newTmap);
-        newTree->getCopy(T, originalNameMap);
-        
-        return newTree;
-    }
-    catch(exception& e) {
-        m->errorOut(e, "SubSample", "getSample-Tree");
-        exit(1);
-    }
-}
-/**********************************************************************************************************************
-Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
-    try {
-        Tree* newTree = NULL;
-        
-        vector<string> subsampledSeqs = getSample(tmap, size);
-        map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs); 
+        map<string, int> doNotIncludeTotals; 
+        vector<string> namesSeqs = ct->getNamesOfSeqs();
+        for (int i = 0; i < namesSeqs.size(); i++) {  doNotIncludeTotals[namesSeqs[i]] = 0; }
+    
+        for (int i = 0; i < Groups.size(); i++) {
+            if (m->inUsersGroups(Groups[i], m->getGroups())) {
+                if (m->control_pressed) { break; }
          
-        //remove seqs not in sample from treemap
-        for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
-            //is that name in the subsample?
-            int count = 0;
-            for (int j = 0; j < subsampledSeqs.size(); j++) {
-                if (tmap->namesOfSeqs[i] == subsampledSeqs[j]) { break; } //found it
-                count++;
+                int thisSize = ct->getGroupCount(Groups[i]);
+                
+                if (thisSize >= size) {        
+                    
+                    vector<string> names = ct->getNamesOfSeqs(Groups[i]);
+                    vector<int> random;
+                    for (int j = 0; j < names.size(); j++) {
+                        int num = ct->getGroupCount(names[j], Groups[i]);
+                        for (int k = 0; k < num; k++) { random.push_back(j); }
+                    }
+                    random_shuffle(random.begin(), random.end());
+                    
+                    vector<int> sampleRandoms; sampleRandoms.resize(names.size(), 0);
+                    for (int j = 0; j < size; j++) { sampleRandoms[random[j]]++; }
+                    for (int j = 0; j < sampleRandoms.size(); j++) {
+                        newCt->setAbund(names[j], Groups[i], sampleRandoms[j]);
+                    }
+                    sampleRandoms.clear(); sampleRandoms.resize(names.size(), 0);
+                    for (int j = size; j < thisSize; j++) { sampleRandoms[random[j]]++; }
+                    for (int j = 0; j < sampleRandoms.size(); j++) {  doNotIncludeTotals[names[j]] += sampleRandoms[j]; }
+                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
              }
  
-            if (m->control_pressed) { return newTree; }
-            
-            //if you didnt find it, remove it 
-            if (count == subsampledSeqs.size()) { 
-                tmap->removeSeq(tmap->namesOfSeqs[i]);
-                i--; //need this because removeSeq removes name from namesOfSeqs
-            }
          }
          
-        //create new tree
-        int numUniques = sampledNameMap.size();
-        if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
+        for (map<string, int>::iterator it = doNotIncludeTotals.begin(); it != doNotIncludeTotals.end(); it++) {  
+            newCt->setAbund(it->first, "doNotIncludeMe", it->second);
+        } 
          
-        newTree = new Tree(numUniques, tmap); //numNodes, treemap
-        newTree->getSubTree(T, subsampledSeqs, sampledNameMap);
+        newTree = new Tree(newCt);
+        newTree->getCopy(T, true);
          
          return newTree;
      }
@@ -71,7 +63,7 @@ Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, in
          m->errorOut(e, "SubSample", "getSample-Tree");
          exit(1);
      }
-}*/
+}
  //**********************************************************************************************************************
  //assumes whole maps dupName -> uniqueName
  map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
@@ -112,100 +104,6 @@ map<string, string> SubSample::deconvolute(map<string, string> whole, vector<str
         }
  }
  //**********************************************************************************************************************
-vector<string> SubSample::getSample(TreeMap* tMap, int size, map<string, vector<string> >& sample) {
-    try {
-        vector<string> temp2;
-        sample["doNotIncludeMe"] = temp2;
-        
-        vector<string> namesInSample;
-        
-        vector<string> Groups = tMap->getNamesOfGroups();    
-        for (int i = 0; i < Groups.size(); i++) {
-            
-            if (m->inUsersGroups(Groups[i], m->getGroups())) {
-                if (m->control_pressed) { break; }
-                
-                vector<string> thisGroup; thisGroup.push_back(Groups[i]);
-                vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
-                int thisSize = thisGroupsSeqs.size();
-                vector<string> temp;
-                sample[Groups[i]] = temp;
-                
-                if (thisSize >= size) {        
-                    
-                    random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
-                    
-                    for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); }
-                    for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); }
-                    
-                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
-            }
-        } 
-        
-        return namesInSample;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "SubSample", "getSample-TreeMap");
-               exit(1);
-       }
-}      
-
-//**********************************************************************************************************************
-vector<string> SubSample::getSample(TreeMap* tMap, int size) {
-    try {
-        vector<string> sample;
-        
-        vector<string> Groups = tMap->getNamesOfGroups();    
-        for (int i = 0; i < Groups.size(); i++) {
-            
-            if (m->inUsersGroups(Groups[i], m->getGroups())) {
-                if (m->control_pressed) { break; }
-                
-                vector<string> thisGroup; thisGroup.push_back(Groups[i]);
-                vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
-                int thisSize = thisGroupsSeqs.size();
-                
-                if (thisSize >= size) {        
-                    
-                    random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
-                    
-                    for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
-                }else {  m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
-            }
-        } 
-        
-        return sample;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "SubSample", "getSample-TreeMap");
-               exit(1);
-       }
-}      
-//**********************************************************************************************************************
-vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
-    try {
-        vector<string> sample;
-        
-        //vector<string> Groups = tMap->getNamesOfGroups();    
-        for (int i = 0; i < Groups.size(); i++) {
-            
-            if (m->control_pressed) { break; }
-            
-            vector<string> thisGroup; thisGroup.push_back(Groups[i]);
-            vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
-            int thisSize = thisGroupsSeqs.size();
-                
-            for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
-        } 
-        
-        return sample;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "SubSample", "getSample-TreeMap");
-               exit(1);
-       }
-}      
-//**********************************************************************************************************************
  vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
         try {
                 
diff --git a/subsample.h b/subsample.h

index b00f1a7141b49fb99a9683a58b96153c5986a6b7..55abed9adae227e72713a9b1ed083d5731dc3be2 100644 (file)
--- a/subsample.h
+++ b/subsample.h
@@ -13,6 +13,8 @@
  #include "sharedrabundvector.h"
  #include "treemap.h"
  #include "tree.h"
+#include "counttable.h"
+
  
  //subsampling overwrites the sharedRabunds.  If you need to reuse the original use the getSamplePreserve function.
  
@@ -24,20 +26,14 @@ class SubSample {
          ~SubSample() {}
      
          vector<string> getSample(vector<SharedRAbundVector*>&, int); //returns the bin labels for the subsample, mothurOuts binlabels are preserved so you can run this multiple times. Overwrites original vector passed in, if you need to preserve it deep copy first.
-        
-        //Tree* getSample(Tree*, TreeMap*, map<string, string>, int); //creates new subsampled tree, destroys treemap so copy if needed.
-        Tree* getSample(Tree*, TreeMap*, TreeMap*, int, map<string, string>); //creates new subsampled tree. Uses first treemap to fill new treemap with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe".
+        Tree* getSample(Tree*, CountTable*, CountTable*, int); //creates new subsampled tree. Uses first counttable to fill new counttable with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe".
          int getSample(SAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it
      
      private:
      
          MothurOut* m;
          int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
-    
-        vector<string> getSample(TreeMap*, vector<string>);
-        vector<string> getSample(TreeMap*, int); //names of seqs to include in sample tree 
-        vector<string> getSample(TreeMap* tMap, int size, map<string, vector<string> >& sample); //sample maps group -> seqs in group. seqs not in sample are in doNotIncludeMe group
-        map<string, string> deconvolute(map<string, string> wholeSet, vector<string>& subsampleWanted); //returns new nameMap containing only subsampled names, and removes redundants from subsampled wanted because it makes the new nameMap.
+         map<string, string> deconvolute(map<string, string> wholeSet, vector<string>& subsampleWanted); //returns new nameMap containing only subsampled names, and removes redundants from subsampled wanted because it makes the new nameMap.
  
  
  };
diff --git a/summarytaxcommand.cpp b/summarytaxcommand.cpp

index 3e16e74005c5f0b7cfc62d32df7c172be805e511..e932eee49c2477de737caf62d143a5140aea4340 100644 (file)
--- a/summarytaxcommand.cpp
+++ b/summarytaxcommand.cpp
@@ -14,8 +14,9 @@
  vector<string> SummaryTaxCommand::setParameters(){     
         try {
                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -34,9 +35,10 @@ string SummaryTaxCommand::getHelpString(){
         try {
                 string helpString = "";
                 helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n";
-               helpString += "The summary.tax command parameters are taxonomy, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
+               helpString += "The summary.tax command parameters are taxonomy, count, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
                 helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
                 helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
+        helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
                 helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
                 helpString += "The summary.tax command should be in the following format: \n";
                 helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n";
@@ -142,6 +144,14 @@ SummaryTaxCommand::SummaryTaxCommand(string option)  {
                                         if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
                                 }
                                 
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
+
                         }
                         
                         //initialize outputTypes
@@ -166,7 +176,20 @@ SummaryTaxCommand::SummaryTaxCommand(string option)  {
                         if (groupfile == "not open") { groupfile = ""; abort = true; }
                         else if (groupfile == "not found") { groupfile = ""; }
                         else { m->setGroupFile(groupfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
                         
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
                         refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
                         if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
                         else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
@@ -177,11 +200,12 @@ SummaryTaxCommand::SummaryTaxCommand(string option)  {
                                 outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
                         }
                         
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(taxfile);
-                               parser.getNameFile(files);
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
                         }
-                       
                 }
         }
         catch(exception& e) {
@@ -197,23 +221,35 @@ int SummaryTaxCommand::execute(){
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 int start = time(NULL);
                 
-               PhyloSummary* taxaSum;
-               if (refTaxonomy != "") {
-                       taxaSum = new PhyloSummary(refTaxonomy, groupfile);
-               }else {
-                       taxaSum = new PhyloSummary(groupfile);
-               }
+        GroupMap* groupMap = NULL;
+        CountTable* ct = NULL;
+        if (groupfile != "") {
+            groupMap = new GroupMap(groupfile);
+            groupMap->readMap();
+        }else if (countfile != "") {
+            ct = new CountTable();
+            ct->readTable(countfile);
+        }
                 
-               if (m->control_pressed) { delete taxaSum; return 0; }
+        PhyloSummary* taxaSum;
+        if (countfile != "") {
+            if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
+            else { taxaSum = new PhyloSummary(ct); }
+        }else {
+            if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
+            else { taxaSum = new PhyloSummary(groupMap); }
+               }
+        
+               if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
                 
                 int numSeqs = 0;
-               if (namefile == "") { numSeqs = taxaSum->summarize(taxfile);  }
-               else {
+               if ((namefile == "") || (countfile != "")) { numSeqs = taxaSum->summarize(taxfile);  }
+               else if (namefile != "") {
                         map<string, vector<string> > nameMap;
                         map<string, vector<string> >::iterator itNames;
                         m->readNames(namefile, nameMap);
                         
-                       if (m->control_pressed) { delete taxaSum; return 0; }
+                       if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
                         
                         ifstream in;
                         m->openInputFile(taxfile, in);
@@ -222,6 +258,9 @@ int SummaryTaxCommand::execute(){
                         string name, taxon;
                         
                         while(!in.eof()){
+                
+                if (m->control_pressed) { break; }
+                
                                 in >> name >> taxon; m->gobble(in);
                                 
                                 itNames = nameMap.find(name);
@@ -240,7 +279,7 @@ int SummaryTaxCommand::execute(){
                         in.close();
                 }
                 
-               if (m->control_pressed) {  delete taxaSum; return 0; }
+               if (m->control_pressed) {  if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
                 
                 //print summary file
                 ofstream outTaxTree;
@@ -250,6 +289,7 @@ int SummaryTaxCommand::execute(){
                 outTaxTree.close();
                 
                 delete taxaSum;
+        if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; }
                 
                 if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
                 
diff --git a/summarytaxcommand.h b/summarytaxcommand.h

index 5f0630f66a03ba9f08a2dd4d2098808a513f0a3c..e8033e23c76837809aa7828389b9cbae13b9c639 100644 (file)
--- a/summarytaxcommand.h
+++ b/summarytaxcommand.h
@@ -11,6 +11,7 @@
   */
  
  #include "command.hpp"
+#include "counttable.h"
  
  /**************************************************************************************************/
  
@@ -33,7 +34,7 @@ class SummaryTaxCommand : public Command {
                 
         private:
                 bool abort;
-               string taxfile, outputDir, namefile, groupfile, refTaxonomy;
+               string taxfile, outputDir, namefile, groupfile, refTaxonomy, countfile;
                 vector<string> outputNames;
                 map<string, int> nameMap;
  };
diff --git a/tree.cpp b/tree.cpp

index 44ecadd534b60d60b602d9e052259ac69709f9d3..0bd98e01f1c305d93851e1a5e465c92a22fc9a1f 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -10,7 +10,7 @@
  #include "tree.h"
  
  /*****************************************************************/
-Tree::Tree(int num, TreeMap* t) : tmap(t) {
+Tree::Tree(int num, CountTable* t) : ct(t) {
         try {
                 m = MothurOut::getInstance();
                 
@@ -36,21 +36,20 @@ Tree::Tree(string g) { //do not use tree generated by this its just to extract t
         }
  }
  /*****************************************************************/
-Tree::Tree(TreeMap* t) : tmap(t) {
+Tree::Tree(CountTable* t) : ct(t) {
         try {
                 m = MothurOut::getInstance();
                 
                 if (m->runParse == true) {  parseTreeFile();  m->runParse = false;  }
-//for(int i = 0; i <   globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl;  }  
+
                 numLeaves = m->Treenames.size();
                 numNodes = 2*numLeaves - 1;
                 
                 tree.resize(numNodes);
                         
                 //initialize groupNodeInfo
-               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
-                       groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
-               }
+        vector<string> namesOfGroups = ct->getNamesOfGroups();
+               for (int i = 0; i < namesOfGroups.size(); i++) {  groupNodeInfo[namesOfGroups[i]].resize(0);  }
                 
                 //initialize tree with correct number of nodes, name and group info.
                 for (int i = 0; i < numNodes; i++) {
@@ -59,19 +58,35 @@ Tree::Tree(TreeMap* t) : tmap(t) {
                                 tree[i].setName(m->Treenames[i]);
                                 
                                 //save group info
-                               string group = tmap->getGroup(m->Treenames[i]);
-                               
-                               vector<string> tempGroups; tempGroups.push_back(group);
-                               tree[i].setGroup(tempGroups);
-                               groupNodeInfo[group].push_back(i); 
-                               
-                               //set pcount and pGroup for groupname to 1.
-                               tree[i].pcount[group] = 1;
-                               tree[i].pGroups[group] = 1;
-                               
-                               //Treemap knows name, group and index to speed up search
-                               tmap->setIndex(m->Treenames[i], i);
-       
+                int maxPars = 1;
+                               vector<string> group;
+                vector<int> counts = ct->getGroupCounts(m->Treenames[i]);
+                               for (int j = 0; j < namesOfGroups.size(); j++) {  
+                    if (counts[j] != 0) { //you have seqs from this group
+                        groupNodeInfo[namesOfGroups[j]].push_back(i);
+                        group.push_back(namesOfGroups[j]);
+                        tree[i].pGroups[namesOfGroups[j]] = counts[j];
+                        tree[i].pcount[namesOfGroups[j]] = counts[j];
+                        //keep highest group
+                                               if(counts[j] > maxPars){ maxPars = counts[j]; }
+                    }  
+                }
+                               tree[i].setGroup(group);
+                               setIndex(m->Treenames[i], i);
+                
+                if (maxPars > 1) { //then we have some more dominant groups
+                                       //erase all the groups that are less than maxPars because you found a more dominant group.
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+                                               if(it->second < maxPars){
+                                                       tree[i].pGroups.erase(it++);
+                                               }else { it++; }
+                                       }
+                                       //set one remaining groups to 1
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+                                               tree[i].pGroups[it->first] = 1;
+                                       }
+                               }//end if
+                
                         //intialize non leaf nodes
                         }else if (i > (numLeaves-1)) {
                                 tree[i].setName("");
@@ -87,7 +102,7 @@ Tree::Tree(TreeMap* t) : tmap(t) {
         }
  }
  /*****************************************************************/
-Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
+Tree::Tree(CountTable* t, vector< vector<double> >& sims) : ct(t) {
         try {
                 m = MothurOut::getInstance();
                 
@@ -98,9 +113,8 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
                 tree.resize(numNodes);
          
                 //initialize groupNodeInfo
-               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
-                       groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
-               }
+        vector<string> namesOfGroups = ct->getNamesOfGroups();
+               for (int i = 0; i < namesOfGroups.size(); i++) {  groupNodeInfo[namesOfGroups[i]].resize(0);  }
                 
                 //initialize tree with correct number of nodes, name and group info.
                 for (int i = 0; i < numNodes; i++) {
@@ -109,18 +123,34 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
                                 tree[i].setName(m->Treenames[i]);
                                 
                                 //save group info
-                               string group = tmap->getGroup(m->Treenames[i]);
-                               
-                               vector<string> tempGroups; tempGroups.push_back(group);
-                               tree[i].setGroup(tempGroups);
-                               groupNodeInfo[group].push_back(i); 
-                               
-                               //set pcount and pGroup for groupname to 1.
-                               tree[i].pcount[group] = 1;
-                               tree[i].pGroups[group] = 1;
-                               
-                               //Treemap knows name, group and index to speed up search
-                               tmap->setIndex(m->Treenames[i], i);
+                int maxPars = 1;
+                               vector<string> group;
+                vector<int> counts = ct->getGroupCounts(m->Treenames[i]);
+                               for (int j = 0; j < namesOfGroups.size(); j++) {  
+                    if (counts[j] != 0) { //you have seqs from this group
+                        groupNodeInfo[namesOfGroups[j]].push_back(i);
+                        group.push_back(namesOfGroups[j]);
+                        tree[i].pGroups[namesOfGroups[j]] = counts[j];
+                        tree[i].pcount[namesOfGroups[j]] = counts[j];
+                        //keep highest group
+                                               if(counts[j] > maxPars){ maxPars = counts[j]; }
+                    }  
+                }
+                               tree[i].setGroup(group);
+                               setIndex(m->Treenames[i], i);
+                
+                if (maxPars > 1) { //then we have some more dominant groups
+                                       //erase all the groups that are less than maxPars because you found a more dominant group.
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+                                               if(it->second < maxPars){
+                                                       tree[i].pGroups.erase(it++);
+                                               }else { it++; }
+                                       }
+                                       //set one remaining groups to 1
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+                                               tree[i].pGroups[it->first] = 1;
+                                       }
+                               }//end if
                  
                  //intialize non leaf nodes
                         }else if (i > (numLeaves-1)) {
@@ -129,11 +159,12 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
                                 tree[i].setGroup(tempGroups);
                         }
                 }
+
          
          //build tree from matrix
          //initialize indexes
-        map<int, int> indexes;  //maps row in simMatrix to vector index in the tree
-        for (int g = 0; g < numLeaves; g++) {  indexes[g] = g; }
+        map<int, int> thisIndexes;  //maps row in simMatrix to vector index in the tree
+        for (int g = 0; g < numLeaves; g++) {  thisIndexes[g] = g;     }
                 
                 //do merges and create tree structure by setting parents and children
                 //there are numGroups - 1 merges to do
@@ -152,26 +183,26 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
              
                         //set non-leaf node info and update leaves to know their parents
                         //non-leaf
-                       tree[numLeaves + i].setChildren(indexes[row], indexes[column]);
+                       tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]);
                         
                         //parents
-                       tree[indexes[row]].setParent(numLeaves + i);
-                       tree[indexes[column]].setParent(numLeaves + i);
+                       tree[thisIndexes[row]].setParent(numLeaves + i);
+                       tree[thisIndexes[column]].setParent(numLeaves + i);
                         
                         //blength = distance / 2;
                         float blength = ((1.0 - largest) / 2);
                         
                         //branchlengths
-                       tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves());
-                       tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves());
+                       tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves());
+                       tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves());
                         
                         //set your length to leaves to your childs length plus branchlength
-                       tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
+                       tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength());
                         
                         
                         //update index 
-                       indexes[row] = numLeaves+i;
-                       indexes[column] = numLeaves+i;
+                       thisIndexes[row] = numLeaves+i;
+                       thisIndexes[column] = numLeaves+i;
                         
                         //remove highest value that caused the merge.
                         sims[row][column] = -1000.0;
@@ -200,7 +231,7 @@ Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
  }
  /*****************************************************************/
  Tree::~Tree() {}
-/*****************************************************************/
+/*****************************************************************
  void Tree::addNamesToCounts(map<string, string> nameMap) {
         try {
                 //ex. seq1      seq2,seq3,se4
@@ -297,15 +328,15 @@ void Tree::addNamesToCounts(map<string, string> nameMap) {
                 m->errorOut(e, "Tree", "addNamesToCounts");
                 exit(1);
         }
-}
+}*/
  /*****************************************************************/
  int Tree::getIndex(string searchName) {
         try {
-               //Treemap knows name, group and index to speed up search
-               // getIndex function will return the vector index or -1 if seq is not found.
-               int index = tmap->getIndex(searchName);
-               return index;
-               
+        map<string, int>::iterator itIndex = indexes.find(searchName);
+        if (itIndex != indexes.end()) {
+            return itIndex->second;
+        }
+               return -1;
         }
         catch(exception& e) {
                 m->errorOut(e, "Tree", "getIndex");
@@ -316,8 +347,10 @@ int Tree::getIndex(string searchName) {
  
  void Tree::setIndex(string searchName, int index) {
         try {
-               //set index in treemap
-               tmap->setIndex(searchName, index);
+               map<string, int>::iterator itIndex = indexes.find(searchName);
+        if (itIndex == indexes.end()) {
+            indexes[searchName] = index;
+        }
         }
         catch(exception& e) {
                 m->errorOut(e, "Tree", "setIndex");
@@ -325,14 +358,8 @@ void Tree::setIndex(string searchName, int index) {
         }
  }
  /*****************************************************************/
-int Tree::assembleTree(map<string, string> nameMap) {
-       try {
-               //save for later
-        names = nameMap;
-
-               //if user has given a names file we want to include that info in the pgroups and pcount info.
-               if(nameMap.size() != 0) {  addNamesToCounts(nameMap);  }
-               
+int Tree::assembleTree() {
+       try {           
                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
                 for (int i = numLeaves; i < numNodes; i++) {
                         if (m->control_pressed) { return 1; }
@@ -348,66 +375,66 @@ int Tree::assembleTree(map<string, string> nameMap) {
                 exit(1);
         }
  }
-/*****************************************************************
-int Tree::assembleTree(string n) {
-       try {
-               
-               //build the pGroups in non leaf nodes to be used in the parsimony calcs.
-               for (int i = numLeaves; i < numNodes; i++) {
-                       if (m->control_pressed) { return 1; }
-
-                       tree[i].pGroups = (mergeGroups(i));
-                       tree[i].pcount = (mergeGcounts(i));
-               }
-               //float B = clock();
-               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Tree", "assembleTree");
-               exit(1);
-       }
-}
  /*****************************************************************/
  //assumes leaf node names are in groups and no names file - used by indicator command
  void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
         try {
          
          //copy Tree since we are going to destroy it
-        Tree* copy = new Tree(tmap);
+        Tree* copy = new Tree(ct);
          copy->getCopy(Ctree);
-        map<string, string> empty;
-        copy->assembleTree(empty);
+        copy->assembleTree();
          
                 //we want to select some of the leaf nodes to create the output tree
                 //go through the input Tree starting at parents of leaves
+        //initialize groupNodeInfo
+        vector<string> namesOfGroups = ct->getNamesOfGroups();
+               for (int i = 0; i < namesOfGroups.size(); i++) {  groupNodeInfo[namesOfGroups[i]].resize(0);  }
+               
+               //initialize tree with correct number of nodes, name and group info.
                 for (int i = 0; i < numNodes; i++) {
-                       
                         //initialize leaf nodes
                         if (i <= (numLeaves-1)) {
                                 tree[i].setName(Groups[i]);
                                 
                                 //save group info
-                               string group = tmap->getGroup(Groups[i]);
-                               vector<string> tempGroups; tempGroups.push_back(group);
-                               tree[i].setGroup(tempGroups);
-                               groupNodeInfo[group].push_back(i); 
-                               
-                               //set pcount and pGroup for groupname to 1.
-                               tree[i].pcount[group] = 1;
-                               tree[i].pGroups[group] = 1;
-                               
-                               //Treemap knows name, group and index to speed up search
-                               tmap->setIndex(Groups[i], i);
-                               
-                               //intialize non leaf nodes
+                int maxPars = 1;
+                               vector<string> group;
+                vector<int> counts = ct->getGroupCounts(Groups[i]);
+                               for (int j = 0; j < namesOfGroups.size(); j++) {  
+                    if (counts[j] != 0) { //you have seqs from this group
+                        groupNodeInfo[namesOfGroups[j]].push_back(i);
+                        group.push_back(namesOfGroups[j]);
+                        tree[i].pGroups[namesOfGroups[j]] = counts[j];
+                        tree[i].pcount[namesOfGroups[j]] = counts[j];
+                        //keep highest group
+                                               if(counts[j] > maxPars){ maxPars = counts[j]; }
+                    }  
+                }
+                               tree[i].setGroup(group);
+                               setIndex(Groups[i], i);
+                
+                if (maxPars > 1) { //then we have some more dominant groups
+                                       //erase all the groups that are less than maxPars because you found a more dominant group.
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+                                               if(it->second < maxPars){
+                                                       tree[i].pGroups.erase(it++);
+                                               }else { it++; }
+                                       }
+                                       //set one remaining groups to 1
+                                       for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+                                               tree[i].pGroups[it->first] = 1;
+                                       }
+                               }//end if
+                
+                //intialize non leaf nodes
                         }else if (i > (numLeaves-1)) {
                                 tree[i].setName("");
                                 vector<string> tempGroups;
                                 tree[i].setGroup(tempGroups);
                         }
                 }
-               
+
                 set<int> removedLeaves;
                 for (int i = 0; i < copy->getNumLeaves(); i++) {
                         
@@ -534,7 +561,7 @@ void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
                 exit(1);
         }
  }
-/*****************************************************************/
+/*****************************************************************
  //assumes nameMap contains unique names as key or is empty. 
  //assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs.
  int Tree::getSubTree(Tree* copy, vector<string> seqsToInclude, map<string, string> nameMap) {
@@ -578,7 +605,7 @@ int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
                         
                         return (index++);
                 }else { //you are a leaf
-                       int indexInNewTree = tmap->getIndex(oldtree[node].getName());
+                       int indexInNewTree = getIndex(oldtree[node].getName());
                         return indexInNewTree;
                 }
         }
@@ -588,7 +615,7 @@ int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
         }
  }
  /*****************************************************************/
-void Tree::getCopy(Tree* copy, map<string, string> nameMap) {
+void Tree::getCopy(Tree* copy, bool subsample) {
         try {
          
                 //for each node in the tree copy its info
@@ -602,8 +629,6 @@ void Tree::getCopy(Tree* copy, map<string, string> nameMap) {
                         //copy children
                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
          }
-               
-        if (nameMap.size() != 0) {  addNamesToCounts(nameMap);  }
          
          //build the pGroups in non leaf nodes to be used in the parsimony calcs.
                 for (int i = numLeaves; i < numNodes; i++) {
@@ -640,8 +665,8 @@ void Tree::getCopy(Tree* copy) {
                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
                 
                         //copy index in node and tmap
+            setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
                         tree[i].setIndex(copy->tree[i].getIndex());
-                       setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
                         
                         //copy pGroups
                         tree[i].pGroups = copy->tree[i].pGroups;
@@ -805,8 +830,8 @@ void Tree::randomLabels(vector<string> g) {
         try {
         
                 //initialize groupNodeInfo
-               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
-                       groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
+               for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+                       groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0);
                 }
                 
                 for(int i = 0; i < numLeaves; i++){
@@ -868,23 +893,20 @@ void Tree::randomBlengths()  {
  /*************************************************************************************************/
  void Tree::assembleRandomUnifracTree(vector<string> g) {
         randomLabels(g);
-    map<string, string> empty;
-       assembleTree(empty);
+       assembleTree();
  }
  /*************************************************************************************************/
  void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
         vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
         randomLabels(temp);
-    map<string, string> empty;
-       assembleTree(empty);
+       assembleTree();
  }
  
  /*************************************************************************************************/
  //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
  void Tree::assembleRandomTree() {
         randomTopology();
-    map<string, string> empty;
-       assembleTree(empty);
+       assembleTree();
  }
  /**************************************************************************************************/
  
@@ -1103,16 +1125,16 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                                 }
                         }
                 }else { //you are a leaf
-                       string leafGroup = tmap->getGroup(tree[node].getName());
+                       vector<string> leafGroup = ct->getGroups(tree[node].getName());
                         
                         if (mode == "branch") {
-                               out << leafGroup; 
+                               out << leafGroup[0]; 
                                 //if there is a branch length then print it
                                 if (tree[node].getBranchLength() != -1) {
                                         out << ":" << tree[node].getBranchLength();
                                 }
                         }else if (mode == "boot") {
-                               out << leafGroup; 
+                               out << leafGroup[0]; 
                                 //if there is a label then print it
                                 if (tree[node].getLabel() != -1) {
                                         out << tree[node].getLabel();
@@ -1166,16 +1188,16 @@ void Tree::printBranch(int node, ostream& out, string mode, vector<Node>& theseN
                                 }
                         }
                 }else { //you are a leaf
-                       string leafGroup = tmap->getGroup(theseNodes[node].getName());
+                       vector<string> leafGroup = ct->getGroups(theseNodes[node].getName());
                         
                         if (mode == "branch") {
-                               out << leafGroup; 
+                               out << leafGroup[0]; 
                                 //if there is a branch length then print it
                                 if (theseNodes[node].getBranchLength() != -1) {
                                         out << ":" << theseNodes[node].getBranchLength();
                                 }
                         }else if (mode == "boot") {
-                               out << leafGroup; 
+                               out << leafGroup[0]; 
                                 //if there is a label then print it
                                 if (theseNodes[node].getLabel() != -1) {
                                         out << theseNodes[node].getLabel();
diff --git a/tree.h b/tree.h

index 03da5f6841d9f30cef2f946d6c9ecbc1d5c58cc0..88e49c0d0372c16371e7c9f8254a0717ab93ae12 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -11,22 +11,22 @@
   */
  
  #include "treenode.h"
-#include "treemap.h"
+#include "counttable.h"
  /* This class represents the treefile. */
  
  class Tree {
  public: 
         Tree(string);  //do not use tree generated by this constructor its just to extract the treenames, its a chicken before the egg thing that needs to be revisited.
-       Tree(int, TreeMap*); 
-       Tree(TreeMap*);         //to generate a tree from a file
-    Tree(TreeMap*, vector< vector<double> >&); //create tree from sim matrix
+       Tree(int, CountTable*); 
+       Tree(CountTable*);              //to generate a tree from a file
+    Tree(CountTable*, vector< vector<double> >&); //create tree from sim matrix
         ~Tree();
         
-    TreeMap* getTreeMap() { return tmap; }
+    CountTable* getCountTable() { return ct; }
         void getCopy(Tree*);  //makes tree a copy of the one passed in.
-    void getCopy(Tree* copy, map<string, string>); //makes a copy of the tree structure passed in, (just parents, children and br). Used with the Tree(TreeMap*) constructor. Assumes the tmap already has set seqs groups you want.  Used by subsample to reassign seqs you don't want included to group "doNotIncludeMe".
+    void getCopy(Tree* copy, bool); //makes a copy of the tree structure passed in, (just parents, children and br). Used with the Tree(TreeMap*) constructor. Assumes the tmap already has set seqs groups you want.  Used by subsample to reassign seqs you don't want included to group "doNotIncludeMe".
         void getSubTree(Tree*, vector<string>);  //makes tree a that contains only the names passed in.
-    int getSubTree(Tree* originalToCopy, vector<string> seqToInclude, map<string, string> nameMap);  //used with (int, TreeMap) constructor. SeqsToInclude contains subsample wanted - assumes these are unique seqs and size of vector=numLeaves passed into constructor. nameMap is unique -> redundantList can be empty if no namesfile was provided. 
+    //int getSubTree(Tree* originalToCopy, vector<string> seqToInclude, map<string, string> nameMap);  //used with (int, TreeMap) constructor. SeqsToInclude contains subsample wanted - assumes these are unique seqs and size of vector=numLeaves passed into constructor. nameMap is unique -> redundantList can be empty if no namesfile was provided. 
      
         void assembleRandomTree();
         void assembleRandomUnifracTree(vector<string>);
@@ -45,21 +45,22 @@ public:
         int findRoot();  //return index of root node
         
         //this function takes the leaf info and populates the non leaf nodes
-       int assembleTree(map<string, string>);  
+       int assembleTree();     
         
         vector<Node> tree;              //the first n nodes are the leaves, where n is the number of sequences.
         map< string, vector<int> > groupNodeInfo;       //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file.
                         
  private:
-       TreeMap* tmap;
+       CountTable* ct;
         int numNodes, numLeaves;
         ofstream out;
         string filename;
         
-    map<string, string> names;
+    //map<string, string> names;
         map<string, int>::iterator it, it2;
         map<string, int> mergeGroups(int);  //returns a map with a groupname and the number of times that group was seen in the children
         map<string,int> mergeGcounts(int);
+    map<string, int> indexes; //maps seqName -> index in tree vector
         
         void addNamesToCounts(map<string, string>);
         void randomTopology();
diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp

index 6633e5160559297c23a542c373ed1b1cc9a3ccdc..fb4887c73da88c0c631895e82f05090e97645761 100644 (file)
--- a/treegroupscommand.cpp
+++ b/treegroupscommand.cpp
@@ -287,7 +287,7 @@ TreeGroupCommand::~TreeGroupCommand(){
         if (abort == false) {
                 if (format == "sharedfile") {  delete input; }
                 else { delete list; }
-               delete tmap;  
+               delete ct;  
         }
         
  }
@@ -400,8 +400,16 @@ int TreeGroupCommand::execute(){
                         m->runParse = false;
                         
                         //create treemap class from groupmap for tree class to use
-                       tmap = new TreeMap();
-                       tmap->makeSim(m->getAllGroups());
+                       ct = new CountTable();
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < m->getAllGroups().size(); i++) { 
+                nameMap.insert(m->getAllGroups()[i]); 
+                gps.insert(m->getAllGroups()[i]); 
+                groupMap[m->getAllGroups()[i]] = m->getAllGroups()[i];
+            }
+            ct->createTable(nameMap, groupMap, gps);
                         
                         //clear globaldatas old tree names if any
                         m->Treenames.clear();
@@ -429,22 +437,26 @@ int TreeGroupCommand::execute(){
                                 nameMap = new NameAssignment(namefile);
                                 nameMap->readMap();
                         }
-                       else{
-                               nameMap = NULL;
-                       }
+                       else{ nameMap = NULL; }
         
                         readMatrix->read(nameMap);
                         list = readMatrix->getListVector();
                         SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
  
                         //make treemap
-                       tmap = new TreeMap();
-                       
-                       if (m->control_pressed) { return 0; }
-                       
-                       tmap->makeSim(list);
+                       ct = new CountTable();
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < list->getNumBins(); i++) {
+                string bin = list->get(i);
+                nameMap.insert(bin); 
+                gps.insert(bin); 
+                groupMap[bin] = bin;
+            }
+            ct->createTable(nameMap, groupMap, gps);
                         
-                       vector<string> namesGroups = tmap->getNamesOfGroups();
+                       vector<string> namesGroups = ct->getNamesOfGroups();
                         m->setGroups(namesGroups);
                 
                         //clear globaldatas old tree names if any
@@ -505,13 +517,12 @@ int TreeGroupCommand::execute(){
  Tree* TreeGroupCommand::createTree(vector< vector<double> >& simMatrix){
         try {
                 //create tree
-               t = new Tree(tmap, simMatrix);
+               t = new Tree(ct, simMatrix);
          
          if (m->control_pressed) { delete t; t = NULL; return t; }
                 
          //assemble tree
-        map<string, string> empty;
-               t->assembleTree(empty);
+               t->assembleTree();
  
                 return t;
         }
diff --git a/treegroupscommand.h b/treegroupscommand.h

index 6790afb73984377c1aba8f6488a3cf1c2e544280..7342b36584ce76b3b22a96d0e3ba885cb2ab172a 100644 (file)
--- a/treegroupscommand.h
+++ b/treegroupscommand.h
@@ -15,7 +15,7 @@
  #include "groupmap.h"
  #include "validcalculator.h"
  #include "tree.h"
-#include "treemap.h"
+#include "counttable.h"
  #include "readmatrix.hpp"
  #include "readcolumn.h"
  #include "readphylip.h"
@@ -104,7 +104,7 @@ private:
         
         NameAssignment* nameMap;
         ListVector* list;
-       TreeMap* tmap;
+       CountTable* ct;
         Tree* t;
      InputData* input;
         vector<Calculator*> treeCalculators;
diff --git a/treemap.cpp b/treemap.cpp

index 7b9fd32a43cbfa8b2ef213889c3a1d4add7661a8..06d05d4fbad15cbc111609d42e5d825b3bcc0f88 100644 (file)
--- a/treemap.cpp
+++ b/treemap.cpp
@@ -247,7 +247,7 @@ string TreeMap::getGroup(string sequenceName) {
         }
                 
  }
-/************************************************************/
+/************************************************************
  void TreeMap::setIndex(string seq, int index) {
         it = treemap.find(seq);
         if (it != treemap.end()) { //sequence name was in group file
@@ -257,7 +257,7 @@ void TreeMap::setIndex(string seq, int index) {
                 treemap[seq].groupname = "not found";
         }
  }
-/************************************************************/
+/************************************************************
  int TreeMap::getIndex(string seq) {
         
         it = treemap.find(seq);
diff --git a/treemap.h b/treemap.h

index 57822e02d4b8c3a57f7d0ddf26947c470bf9400b..7ffd1e7003154d672d544aa8a993f371a7f9a28a 100644 (file)
--- a/treemap.h
+++ b/treemap.h
@@ -29,8 +29,8 @@ public:
      int readMap(string);
         int getNumGroups();
         int getNumSeqs();
-       void setIndex(string, int);  //sequencename, index
-       int getIndex(string);           //returns vector index of sequence
+       //void setIndex(string, int);  //sequencename, index
+       //int getIndex(string);         //returns vector index of sequence
         bool isValidGroup(string);  //return true if string is a valid group
         void removeSeq(string);  //removes a sequence, this is to accomadate trees that do not contain all the seqs in your groupfile
         string getGroup(string);
diff --git a/treereader.cpp b/treereader.cpp

index b385d214fbf4043d5b32b58ec23673b3e9136438..0e25f12a39b2a5341ad2ba3bf7255bb8b6aa7edc 100644 (file)
--- a/treereader.cpp
+++ b/treereader.cpp
@@ -8,12 +8,23 @@
  
  #include "treereader.h"
  #include "readtree.h"
+#include "groupmap.h"
  
  /***********************************************************************/
-
-TreeReader::TreeReader(string tf) : treefile(tf)  { 
+TreeReader::TreeReader(string tf, string cf) : treefile(tf), countfile(cf)  { 
      try {
          m = MothurOut::getInstance();
+        ct = new CountTable();
+        ct->readTable(cf);
+        
+        //if no groupinfo in count file we need to add it
+        if (!ct->hasGroupInfo()) {
+            ct->addGroup("Group1");
+            vector<string> namesOfSeqs = ct->getNamesOfSeqs();
+            for (int i = 0; i < namesOfSeqs.size(); i++) { 
+                ct->setAbund(namesOfSeqs[i], "Group1", ct->getNumSeqs(namesOfSeqs[i]));
+            }
+        }
          namefile = "";
          groupfile = "";
          readTrees();
@@ -24,22 +35,32 @@ TreeReader::TreeReader(string tf) : treefile(tf)  {
         }
  }
  /***********************************************************************/
-
-TreeReader::TreeReader(string tf, string gf) : treefile(tf),  groupfile(gf)  { 
-    try {
-        m = MothurOut::getInstance();
-        namefile = "";
-        readTrees();
-    }
-       catch(exception& e) {
-               m->errorOut(e, "TreeReader", "TreeReader");
-               exit(1);
-       }
-}
-/***********************************************************************/
  TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf),  groupfile(gf), namefile(nf)  { 
      try {
          m = MothurOut::getInstance();
+        countfile = "";
+        ct = new CountTable();
+        if (namefile != "") { ct->createTable(namefile, groupfile, true); }
+        else {
+            Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < m->Treenames.size(); i++) { nameMap.insert(m->Treenames[i]);  }
+            if (groupfile == "") { gps.insert("Group1"); for (int i = 0; i < m->Treenames.size(); i++) { groupMap[m->Treenames[i]] = "Group1"; } }
+            else {
+                GroupMap g(groupfile); 
+                g.readMap();
+                vector<string> seqs = g.getNamesSeqs();
+                for (int i = 0; i < seqs.size(); i++) {  
+                    string group = g.getGroup(seqs[i]);
+                    groupMap[seqs[i]] = group;
+                    gps.insert(group);
+                }
+            }
+            ct->createTable(nameMap, groupMap, gps);
+        }
+
          readTrees();
      }
         catch(exception& e) {
@@ -51,22 +72,15 @@ TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf),  groupfi
  bool TreeReader::readTrees()  { 
      try {
          
-        tmap = new TreeMap();
-        if (groupfile != "") {      tmap->readMap(groupfile);        }
-               else{ //fake out by putting everyone in one group
-                       Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
-                       for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
-               }
-               
-        int numUniquesInName = 0;
-               if (namefile != "") { numUniquesInName = readNamesFile(); }
+        int numUniquesInName = ct->getNumUniqueSeqs();
+               //if (namefile != "") { numUniquesInName = readNamesFile(); }
                 
                 ReadTree* read = new ReadNewickTree(treefile);
-               int readOk = read->read(tmap); 
+               int readOk = read->read(ct); 
                 
                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine();  delete read; m->control_pressed=true; return 0; }
                 
-               read->AssembleTrees(names);
+               read->AssembleTrees();
                 trees = read->getTrees();
                 delete read;
          
@@ -74,18 +88,19 @@ bool TreeReader::readTrees()  {
                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
                 int numNamesInTree;
                 if (namefile != "")  {  
-                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = nameMap.size();  }
+                       if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = ct->getNumSeqs();  }
                         else {   numNamesInTree = m->Treenames.size();  }
                 }else {  numNamesInTree = m->Treenames.size();  }
                 
                 
                 //output any names that are in group file but not in tree
-               if (numNamesInTree < tmap->getNumSeqs()) {
-                       for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+               if (numNamesInTree < ct->getNumSeqs()) {
+            vector<string> namesSeqsCt = ct->getNamesOfSeqs();
+                       for (int i = 0; i < namesSeqsCt.size(); i++) {
                                 //is that name in the tree?
                                 int count = 0;
                                 for (int j = 0; j < m->Treenames.size(); j++) {
-                                       if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
+                                       if (namesSeqsCt[i] == m->Treenames[j]) { break; } //found it
                                         count++;
                                 }
                                 
@@ -93,14 +108,8 @@ bool TreeReader::readTrees()  {
                                 
                                 //then you did not find it so report it 
                                 if (count == m->Treenames.size()) { 
-                                       //if it is in your namefile then don't remove
-                                       map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-                                       
-                                       if (it == nameMap.end()) {
-                                               m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
-                                               tmap->removeSeq(tmap->namesOfSeqs[i]);
-                                               i--; //need this because removeSeq removes name from namesOfSeqs
-                                       }
+                    m->mothurOut(namesSeqsCt[i] + " is in your name or group file and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+                    ct->remove(namesSeqsCt[i]);
                                 }
                         }
                 }
@@ -112,47 +121,6 @@ bool TreeReader::readTrees()  {
                 exit(1);
         }
  }
-/*****************************************************************/
-int TreeReader::readNamesFile() {
-       try {
-               nameMap.clear();
-        names.clear();
-               int numUniquesInName = 0;
-               
-               ifstream in;
-               m->openInputFile(namefile, in);
-               
-               string first, second;
-               map<string, string>::iterator itNames;
-               
-               while(!in.eof()) {
-                       in >> first >> second; m->gobble(in);
-                       
-                       numUniquesInName++;
-                       
-                       itNames = nameMap.find(first);
-                       if (itNames == nameMap.end()) {  
-                               names[first] = second; 
-                               
-                               //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
-                               vector<string> dupNames;
-                               m->splitAtComma(second, dupNames);
-                               
-                               for (int i = 0; i < dupNames.size(); i++) {     
-                                       nameMap[dupNames[i]] = first; 
-                                       if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } 
-                               }
-                       }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); nameMap.clear(); names.clear(); namefile = ""; return 1; }                    
-               }
-               in.close();
-               
-               return numUniquesInName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TreeReader", "readNamesFile");
-               exit(1);
-       }
-}
  /***********************************************************************/
  
  
diff --git a/treereader.h b/treereader.h

index fb9c791959f96cd04ce95fae0385f02c9740f546..ac24eb0161336e3be68ad17028a2b6a28eab8489 100644 (file)
--- a/treereader.h
+++ b/treereader.h
@@ -11,29 +11,26 @@
  
  #include "mothurout.h"
  #include "tree.h"
+#include "counttable.h"
  
  class TreeReader {
      
  public:
      
-    TreeReader(string tf);
-       TreeReader(string tf, string gf);
+    TreeReader(string tf, string cf);
      TreeReader(string tf, string gf, string nf);
         ~TreeReader() {}        
      
      vector<Tree*> getTrees()            { return trees;     }
-    map<string, string> getNames()      { return nameMap;   } //dups -> unique
-    map<string, string> getNameMap()    { return names;     } //unique -> dups list
-    
      
  private:
      MothurOut* m;
         vector<Tree*> trees;
-    TreeMap* tmap;
-    map<string, string> nameMap; //dupName -> uniqueName
-    map<string, string> names;
+    CountTable* ct;
+    //map<string, string> nameMap; //dupName -> uniqueName
+   // map<string, string> names;
      
-    string treefile, groupfile, namefile;
+    string treefile, groupfile, namefile, countfile;
      
      bool readTrees();
      int readNamesFile();
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp

index 88ed32bd79bfcdc513ae4c848c775a95e762cd57..a90e7adc5c8cb870714c716b02bec5eb6bfddd73 100644 (file)
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -1255,7 +1255,9 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                 while(!inOligos.eof()){
  
                         inOligos >> type; 
-                                       
+            
+                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
+            
                         if(type[0] == '#'){
                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
                                 m->gobble(inOligos);
@@ -1266,6 +1268,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
                                 
                                 inOligos >> oligo;
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
                                 
                                 for(int i=0;i<oligo.length();i++){
                                         oligo[i] = toupper(oligo[i]);
@@ -1287,6 +1291,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                         map<string, int>::iterator itPrime = primers.find(oligo);
                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
                                         
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
+                    
                                         primers[oligo]=indexPrimer; indexPrimer++;              
                                         primerNameVector.push_back(group);
                                 }
@@ -1311,15 +1317,22 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                         
                      //then this is illumina data with 4 columns
                      if (temp != "") {  
-                        string reverseBarcode = reverseOligo(group); //reverse barcode
-                        group = temp;
                          
+                        for(int i=0;i<group.length();i++){
+                            group[i] = toupper(group[i]);
+                            if(group[i] == 'U')        {       group[i] = 'T'; }
+                        }
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); }
+                        
+                        string reverseBarcode = reverseOligo(group); //reverse barcode
                          //check for repeat barcodes
                          map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
-                        if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
-                                               
+                        if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine();  }
+                        
+                        group = temp;
                          rbarcodes[reverseBarcode]=indexBarcode; 
-                    }
+                    }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } }
                          
                                         //check for repeat barcodes
                                         map<string, int>::iterator itBar = barcodes.find(oligo);
@@ -1438,7 +1451,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                 break;
                         }
                 }
-               
+
                 if (allBlank) {
                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
                         allFiles = false;
diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp

index 0749cb79ff41e03c55ca2a9defeb1b70e44645d8..edc4bbc027edf761568919163ca8f696096c7900 100644 (file)
--- a/unifracunweightedcommand.cpp
+++ b/unifracunweightedcommand.cpp
@@ -16,8 +16,9 @@
  vector<string> UnifracUnweightedCommand::setParameters(){      
         try {
                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
@@ -42,7 +43,7 @@ vector<string> UnifracUnweightedCommand::setParameters(){
  string UnifracUnweightedCommand::getHelpString(){      
         try {
                 string helpString = "";
-               helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
+               helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random.  tree parameter is required unless you have valid current tree file.\n";
                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n";
                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
@@ -165,6 +166,14 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
              //check for required parameters
@@ -186,6 +195,19 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                         if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
                         
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
                         
@@ -233,7 +255,13 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                         consensus = m->isTrue(temp);
              
                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
-                       if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
+            if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
+            else {  
+                CountTable testCt; 
+                if ((!testCt.testGroups(countfile)) && (subsample)) {
+                    m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
+                }
+            }
              if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
              if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
  
@@ -246,10 +274,12 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                                 m->setGroups(Groups);
                         }
                         
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile=="") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                } 
+            }
                 }
                 
         }
@@ -267,12 +297,12 @@ int UnifracUnweightedCommand::execute() {
                 
                 m->setTreeFile(treefile);
                 
-               TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+               TreeReader* reader;
+        if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+        else { reader = new TreeReader(treefile, countfile); }
          T = reader->getTrees();
-        tmap = T[0]->getTreeMap();
-        map<string, string> nameMap = reader->getNames();
-        map<string, string> unique2Dup = reader->getNameMap();
-        delete reader; 
+        ct = T[0]->getCountTable();
+        delete reader;
          
                 sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
                 outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
@@ -280,7 +310,7 @@ int UnifracUnweightedCommand::execute() {
                 
                 SharedUtil util;
                 Groups = m->getGroups();
-               vector<string> namesGroups = tmap->getNamesOfGroups();
+               vector<string> namesGroups = ct->getNamesOfGroups();
                 util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted");        //sets the groups the user wants to analyze
                 
                 Unweighted unweighted(includeRoot);
@@ -292,10 +322,9 @@ int UnifracUnweightedCommand::execute() {
              //user has not set size, set size = smallest samples size
              if (subsampleSize == -1) { 
                  vector<string> temp; temp.push_back(Groups[0]);
-                subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
+                subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
                  for (int i = 1; i < Groups.size(); i++) {
-                    temp.clear(); temp.push_back(Groups[i]);
-                    int thisSize = (tmap->getNamesSeqs(temp)).size();
+                    int thisSize = ct->getGroupCount(Groups[i]);
                      if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
                  }
                  m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
@@ -303,9 +332,7 @@ int UnifracUnweightedCommand::execute() {
                  vector<string> newGroups = Groups;
                  Groups.clear();
                  for (int i = 0; i < newGroups.size(); i++) {
-                    vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
-                    vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
-                    int thisSize = thisGroupsSeqs.size();
+                    int thisSize = ct->getGroupCount(newGroups[i]);
                      
                      if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]);        }
                      else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
@@ -330,7 +357,7 @@ int UnifracUnweightedCommand::execute() {
          
                 //get pscores for users trees
                 for (int i = 0; i < T.size(); i++) {
-                       if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
+                       if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
                         
              counter = 0;
                         
@@ -351,7 +378,7 @@ int UnifracUnweightedCommand::execute() {
  
                         userData = unweighted.getValues(T[i], processors, outputDir);  //userData[0] = unweightedscore
                 
-                       if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  }return 0; }
+                       if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close();  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  }return 0; }
                         
                         //output scores for each combination
                         for(int k = 0; k < numComp; k++) {
@@ -366,7 +393,7 @@ int UnifracUnweightedCommand::execute() {
              
              if (random) {  runRandomCalcs(T[i], userData);  }
                         
-                       if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0;  }
+                       if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0;  }
              
              int startSubsample = time(NULL);
              
@@ -376,32 +403,28 @@ int UnifracUnweightedCommand::execute() {
                  if (m->control_pressed) { break; }
                  
                  //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
-                TreeMap* newTmap = new TreeMap();
-                //newTmap->getCopy(*tmap);
-                
-                //SubSample sample;
-                //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
-                
+                CountTable* newCt = new CountTable();
+                 
                  //uses method of setting groups to doNotIncludeMe
                  SubSample sample;
-                Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
+                Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
                  
                  //call new weighted function
                  vector<double> iterData; iterData.resize(numComp,0);
                  Unweighted thisUnweighted(includeRoot);
                  iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
-                
+        
                  //save data to make ave dist, std dist
                  calcDistsTotals.push_back(iterData);
                  
-                delete newTmap;
+                delete newCt;
                  delete subSampleTree;
                  
                  if((thisIter+1) % 100 == 0){   m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
              }
-            m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
+            if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
              
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0;  }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output;  } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0;  }
  
              if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
              if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
@@ -420,7 +443,7 @@ int UnifracUnweightedCommand::execute() {
                 
  
                 outSum.close();
-               delete tmap; 
+               delete ct; 
                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
                 
                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  }     return 0; }
@@ -472,7 +495,7 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& di
          //find standard deviation
          vector<double> stdDev; stdDev.resize(numComp, 0);
          
-        for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
              for (int j = 0; j < dists[thisIter].size(); j++) {
                  stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
              }
@@ -578,8 +601,16 @@ int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists,
          m->runParse = false;
          
          //create treemap class from groupmap for tree class to use
-        TreeMap newTmap;
-        newTmap.makeSim(m->getGroups());
+        CountTable newCt;
+        set<string> nameMap;
+        map<string, string> groupMap;
+        set<string> gps;
+        for (int i = 0; i < m->getGroups().size(); i++) { 
+            nameMap.insert(m->getGroups()[i]); 
+            gps.insert(m->getGroups()[i]); 
+            groupMap[m->getGroups()[i]] = m->getGroups()[i];
+        }
+        newCt.createTable(nameMap, groupMap, gps);
          
          //clear  old tree names if any
          m->Treenames.clear();
@@ -587,7 +618,7 @@ int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists,
          //fills globaldatas tree names
          m->Treenames = m->getGroups();
          
-        vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
+        vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
          
          if (m->control_pressed) { return 0; }
          
@@ -613,7 +644,7 @@ int UnifracUnweightedCommand::getConsensusTrees(vector< vector<double> >& dists,
  }
  /**************************************************************************************************/
  
-vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
+vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
         try {
          
          vector<Tree*> trees;
@@ -647,9 +678,8 @@ vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dis
                         }
              
              //create tree
-            Tree* tempTree = new Tree(&mytmap, sims);
-            map<string, string> empty;
-            tempTree->assembleTree(empty);
+            Tree* tempTree = new Tree(&myct, sims);
+            tempTree->assembleTree();
              
              trees.push_back(tempTree);
              
diff --git a/unifracunweightedcommand.h b/unifracunweightedcommand.h

index 15c3b9684b01fa58403276b80864684f99e8d189..107083fa41a3dc80f922a74ce6c95c18201511a2 100644 (file)
--- a/unifracunweightedcommand.h
+++ b/unifracunweightedcommand.h
@@ -12,7 +12,7 @@
  
  #include "command.hpp"
  #include "unweighted.h"
-#include "treemap.h"
+#include "counttable.h"
  #include "sharedutilities.h"
  #include "fileoutput.h"
  #include "readtree.h"
@@ -39,7 +39,7 @@ class UnifracUnweightedCommand : public Command {
         private:
                 FileOutput* output;
                 vector<Tree*> T;           //user trees
-               TreeMap* tmap;
+               CountTable* ct;
                 string sumFile, allGroups;
                 vector<string> groupComb; // AB. AC, BC...
                 int iters, numGroups, numComp, counter, processors, subsampleSize, subsampleIters;
@@ -50,7 +50,7 @@ class UnifracUnweightedCommand : public Command {
                 vector< map<float, float> > rCumul;  //map <unweighted score, cumulative percentage of number of random trees with that score or higher.> -vector entry for each combination.
                 
                 bool abort, phylip, random, includeRoot, consensus, subsample;
-               string groups, itersString, outputDir, outputForm, treefile, groupfile, namefile;
+               string groups, itersString, outputDir, outputForm, treefile, groupfile, namefile, countfile;
                 vector<string> Groups, outputNames; //holds groups to be used
  
                 ofstream outSum, out;
@@ -60,7 +60,7 @@ class UnifracUnweightedCommand : public Command {
                 void printUWSummaryFile(int);
                 void printUnweightedFile();
                 void createPhylipFile(int);
-        vector<Tree*> buildTrees(vector< vector<double> >&, int, TreeMap&);
+        vector<Tree*> buildTrees(vector< vector<double> >&, int, CountTable&);
          int getConsensusTrees(vector< vector<double> >&, int);
          int getAverageSTDMatrices(vector< vector<double> >&, int);
                 
diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp

index d1e883382890081495dac72238cd05b3c5ad644e..cbec7490bfe028ea02de0b4a1e024b1ce2d89e1f 100644 (file)
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -16,8 +16,9 @@
  vector<string> UnifracWeightedCommand::setParameters(){        
         try {
                 CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                 CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
@@ -42,7 +43,7 @@ vector<string> UnifracWeightedCommand::setParameters(){
  string UnifracWeightedCommand::getHelpString(){        
         try {
                 string helpString = "";
-               helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
+               helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random.  tree parameter is required unless you have valid current tree file.\n";
                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
                 helpString += "The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
                 helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
@@ -164,6 +165,14 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                 }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                         }
                         
                         //check for required parameters
@@ -186,6 +195,19 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(treefile);       }
                         
                                                                                                                                         
@@ -233,14 +255,22 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         consensus = m->isTrue(temp);
              
                         if (subsample && random) {  m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true;  } 
-                       if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } 
+                       if (countfile == "") { if (subsample && (groupfile == "")) {  m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true;  } }
+            else {  
+                CountTable testCt; 
+                if ((!testCt.testGroups(countfile)) && (subsample)) {
+                    m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;  
+                }
+            }
              if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
              if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
              
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(treefile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile=="") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(treefile);
+                    parser.getNameFile(files);
+                } 
+            }
                 }
                 
                 
@@ -258,14 +288,14 @@ int UnifracWeightedCommand::execute() {
                 
                 m->setTreeFile(treefile);
                 
-        TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+        TreeReader* reader;
+        if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+        else { reader = new TreeReader(treefile, countfile); }
          T = reader->getTrees();
-        tmap = T[0]->getTreeMap();
-        map<string, string> nameMap = reader->getNames();
-        map<string, string> unique2Dup = reader->getNameMap();
+        ct = T[0]->getCountTable();
          delete reader;
-    
-        if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+        
+        if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
                                 
                 sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
                 m->openOutputFile(sumFile, outSum);
@@ -274,11 +304,11 @@ int UnifracWeightedCommand::execute() {
          SharedUtil util;
                 string s; //to make work with setgroups
                 Groups = m->getGroups();
-               vector<string> nameGroups = tmap->getNamesOfGroups();
+               vector<string> nameGroups = ct->getNamesOfGroups();
                 util.setGroups(Groups, nameGroups, s, numGroups, "weighted");   //sets the groups the user wants to analyze
                 m->setGroups(Groups);
                 
-        if (m->control_pressed) {  delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+        if (m->control_pressed) {  delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
          
                 Weighted weighted(includeRoot);
                         
@@ -289,10 +319,9 @@ int UnifracWeightedCommand::execute() {
              //user has not set size, set size = smallest samples size
              if (subsampleSize == -1) { 
                  vector<string> temp; temp.push_back(Groups[0]);
-                subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
+                subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
                  for (int i = 1; i < Groups.size(); i++) {
-                    temp.clear(); temp.push_back(Groups[i]);
-                    int thisSize = (tmap->getNamesSeqs(temp)).size();
+                    int thisSize = ct->getGroupCount(Groups[i]);
                      if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
                  }
                  m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
@@ -300,12 +329,10 @@ int UnifracWeightedCommand::execute() {
                  vector<string> newGroups = Groups;
                  Groups.clear();
                  for (int i = 0; i < newGroups.size(); i++) {
-                    vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
-                    vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
-                    int thisSize = thisGroupsSeqs.size();
+                    int thisSize = ct->getGroupCount(newGroups[i]);
                      
                      if (thisSize >= subsampleSize) {    Groups.push_back(newGroups[i]);        }
-                    else {  m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
+                    else {   m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
                  } 
                  m->setGroups(Groups);
              }
@@ -321,7 +348,7 @@ int UnifracWeightedCommand::execute() {
          //get weighted scores for users trees
          for (int i = 0; i < T.size(); i++) {
              
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
              
              counter = 0;
              rScores.resize(numComp);  //data[0] = weightedscore AB, data[1] = weightedscore AC...
@@ -337,7 +364,7 @@ int UnifracWeightedCommand::execute() {
              } 
              
              userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
              
              //save users score
              for (int s=0; s<numComp; s++) {
@@ -361,16 +388,12 @@ int UnifracWeightedCommand::execute() {
                  if (m->control_pressed) { break; }
                  
                  //copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
-                TreeMap* newTmap = new TreeMap();
-                //newTmap->getCopy(*tmap);
-                
-                //SubSample sample;
-               //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
+                CountTable* newCt = new CountTable();
                  
                  //uses method of setting groups to doNotIncludeMe
                  SubSample sample;
-                Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
-
+                Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
+               
                  //call new weighted function
                  vector<double> iterData; iterData.resize(numComp,0);
                  Weighted thisWeighted(includeRoot);
@@ -379,20 +402,20 @@ int UnifracWeightedCommand::execute() {
                  //save data to make ave dist, std dist
                  calcDistsTotals.push_back(iterData);
                  
-                delete newTmap;
+                delete newCt;
                  delete subSampleTree;
                  
                  if((thisIter+1) % 100 == 0){   m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine();              }
              }
              
-            if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]);  } return 0; }
              
              if (subsample) {  getAverageSTDMatrices(calcDistsTotals, i); }
              if (consensus) {  getConsensusTrees(calcDistsTotals, i);  }
          }
          
                 
-               if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);  } return 0;  }
+               if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);  } return 0;  }
                 
          if (phylip) {  createPhylipFile();             }
      
@@ -400,7 +423,7 @@ int UnifracWeightedCommand::execute() {
                 
                 //clear out users groups
                 m->clearGroups();
-               delete tmap; 
+               delete ct; 
                 for (int i = 0; i < T.size(); i++) { delete T[i]; }
                 
                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
@@ -557,9 +580,17 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
          //used in tree constructor 
          m->runParse = false;
          
-        //create treemap class from groupmap for tree class to use
-        TreeMap newTmap;
-        newTmap.makeSim(m->getGroups());
+        ///create treemap class from groupmap for tree class to use
+        CountTable newCt;
+        set<string> nameMap;
+        map<string, string> groupMap;
+        set<string> gps;
+        for (int i = 0; i < m->getGroups().size(); i++) { 
+            nameMap.insert(m->getGroups()[i]); 
+            gps.insert(m->getGroups()[i]); 
+            groupMap[m->getGroups()[i]] = m->getGroups()[i];
+        }
+        newCt.createTable(nameMap, groupMap, gps);
          
          //clear  old tree names if any
          m->Treenames.clear();
@@ -567,7 +598,7 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
          //fills globaldatas tree names
          m->Treenames = m->getGroups();
          
-        vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
+        vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
          
          if (m->control_pressed) { return 0; }
          
@@ -593,7 +624,7 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector<double> >& dists, i
  }
  /**************************************************************************************************/
  
-vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
+vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
         try {
          
          vector<Tree*> trees;
@@ -627,9 +658,8 @@ vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists
                         }
  
              //create tree
-            Tree* tempTree = new Tree(&mytmap, sims);
-            map<string, string> empty;
-            tempTree->assembleTree(empty);
+            Tree* tempTree = new Tree(&myct, sims);
+            tempTree->assembleTree();
              
              trees.push_back(tempTree);
              
@@ -682,7 +712,7 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersS
          
          //get scores for random trees
          for (int j = 0; j < iters; j++) {
-            
+            cout << j << endl; 
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
              if(processors == 1){
                  driver(thisTree,  namesOfGroupCombos, 0, namesOfGroupCombos.size(),  rScores);
@@ -693,7 +723,7 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector<double> usersS
              driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
  #endif
              
-            if (m->control_pressed) { delete tmap;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } return 0; }
+            if (m->control_pressed) { delete ct;  for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) {    m->mothurRemove(outputNames[i]);  } return 0; }
              
              //report progress
              //                                 m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();          
@@ -796,7 +826,7 @@ int UnifracWeightedCommand::createProcesses(Tree* t, vector< vector<string> > na
  /**************************************************************************************************/
  int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) { 
   try {
-               Tree* randT = new Tree(tmap);
+               Tree* randT = new Tree(ct);
       
          Weighted weighted(includeRoot);
       
diff --git a/unifracweightedcommand.h b/unifracweightedcommand.h

index 06354ce5cbeed8dc7c1629a482c65d9381b40e04..fead41b0e8cb5dae95eed551ff76427b8c8d85f3 100644 (file)
--- a/unifracweightedcommand.h
+++ b/unifracweightedcommand.h
@@ -12,7 +12,7 @@
  
  #include "command.hpp"
  #include "weighted.h"
-#include "treemap.h"
+#include "counttable.h"
  #include "progress.hpp"
  #include "sharedutilities.h"
  #include "fileoutput.h"
@@ -43,7 +43,7 @@ class UnifracWeightedCommand : public Command {
                         linePair(int i, int j) : start(i), num(j) {}
                 };
                 vector<linePair> lines;
-        TreeMap* tmap;
+        CountTable* ct;
                 FileOutput* output;
                 vector<Tree*> T;           //user trees
                 vector<double> utreeScores;  //user tree unweighted scores
@@ -58,7 +58,7 @@ class UnifracWeightedCommand : public Command {
                 map<float, float>  validScores;  //map contains scores from random
                 
                 bool abort, phylip, random, includeRoot, subsample, consensus;
-               string groups, itersString, outputForm, treefile, groupfile, namefile;
+               string groups, itersString, outputForm, treefile, groupfile, namefile, countfile;
                 vector<string> Groups, outputNames; //holds groups to be used
                 int processors, subsampleSize, subsampleIters;
                 ofstream outSum;
@@ -73,7 +73,7 @@ class UnifracWeightedCommand : public Command {
                 int createProcesses(Tree*,  vector< vector<string> >,  vector< vector<double> >&);
                 int driver(Tree*, vector< vector<string> >, int, int,  vector< vector<double> >&);
          int runRandomCalcs(Tree*, vector<double>);
-        vector<Tree*> buildTrees(vector< vector<double> >&, int, TreeMap&);
+        vector<Tree*> buildTrees(vector< vector<double> >&, int, CountTable&);
          int getConsensusTrees(vector< vector<double> >&, int);
          int getAverageSTDMatrices(vector< vector<double> >&, int);
                 
diff --git a/unweighted.cpp b/unweighted.cpp

index 864a9f8bab16f5d1ea52480c37c141ddc934e199..e95834fb485fb6131fcacdb458e3f6c96b862693 100644 (file)
--- a/unweighted.cpp
+++ b/unweighted.cpp
@@ -16,7 +16,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
                 processors = p;
                 outputDir = o;
          
-        TreeMap* tmap = t->getTreeMap();
+        CountTable* ct = t->getCountTable();
          
                 //if the users enters no groups then give them the score of all groups
                 int numGroups = m->getNumGroups();
@@ -36,9 +36,9 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
                         vector<string> groups;
                         if (numGroups == 0) {
                                 //get score for all users groups
-                               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
-                                       if ((tmap->getNamesOfGroups())[i] != "xxx") {
-                                               groups.push_back((tmap->getNamesOfGroups())[i]);
+                               for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+                                       if ((ct->getNamesOfGroups())[i] != "xxx") {
+                                               groups.push_back((ct->getNamesOfGroups())[i]);
                                         }
                                 }
                                 namesOfGroupCombos.push_back(groups);
@@ -52,7 +52,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
  
                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                         if(processors == 1){
-                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
                         }else{
                                 int numPairs = namesOfGroupCombos.size();
                                 
@@ -67,11 +67,11 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
                 
                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
                                 }
-                               data = createProcesses(t, namesOfGroupCombos, tmap);
+                               data = createProcesses(t, namesOfGroupCombos, ct);
                                 lines.clear();
                         }
                 #else
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
                 #endif
                 
                 return data;
@@ -83,7 +83,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) {
  }
  /**************************************************************************************************/
  
-EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                 int process = 1;
@@ -100,7 +100,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
                                 process++;
                         }else if (pid == 0){
                                 EstOutput myresults;
-                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
+                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
                                 
                                 if (m->control_pressed) { exit(0); }
                                 
@@ -122,7 +122,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
                         }
                 }
                 
-               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<(processors-1);i++) { 
@@ -167,7 +167,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
         }
  }
  /**************************************************************************************************/
-EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) { 
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) { 
   try {
         
          
@@ -261,7 +261,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
                 processors = p;
                 outputDir = o;
                 
-        TreeMap* tmap = t->getTreeMap();
+        CountTable* ct = t->getCountTable();
       
                 //if the users enters no groups then give them the score of all groups
                 int numGroups = m->getNumGroups();
@@ -281,9 +281,9 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
                         vector<string> groups;
                         if (numGroups == 0) {
                                 //get score for all users groups
-                               for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
-                                       if ((tmap->getNamesOfGroups())[i] != "xxx") {
-                                               groups.push_back((tmap->getNamesOfGroups())[i]);
+                               for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+                                       if ((ct->getNamesOfGroups())[i] != "xxx") {
+                                               groups.push_back((ct->getNamesOfGroups())[i]);
                                         }
                                 }
                                 namesOfGroupCombos.push_back(groups);
@@ -297,7 +297,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
  
                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                         if(processors == 1){
-                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap);
+                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
                         }else{
                                 int numPairs = namesOfGroupCombos.size();
                                 
@@ -311,12 +311,12 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
                                 }
                                         
-                               data = createProcesses(t, namesOfGroupCombos, true, tmap);
+                               data = createProcesses(t, namesOfGroupCombos, true, ct);
                                 
                                 lines.clear();
                         }
                 #else
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap);
+                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
                 #endif
         
                 return data;
@@ -328,7 +328,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st
  }
  /**************************************************************************************************/
  
-EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, TreeMap* tmap) {
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                 int process = 1;
@@ -345,7 +345,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
                                 process++;
                         }else if (pid == 0){
                                 EstOutput myresults;
-                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, tmap);
+                               myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, ct);
                                 
                                 if (m->control_pressed) { exit(0); }
                                 
@@ -365,7 +365,7 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
                         }
                 }
                 
-               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, tmap);
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<(processors-1);i++) { 
@@ -409,14 +409,14 @@ EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfG
         }
  }
  /**************************************************************************************************/
-EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, TreeMap* tmap) { 
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, CountTable* ct) { 
   try {
                 
                 EstOutput results; results.resize(num);
                 
                 int count = 0;
                 
-               Tree* copyTree = new Tree(tmap);
+               Tree* copyTree = new Tree(ct);
                 
                 for (int h = start; h < (start+num); h++) {
                 
diff --git a/unweighted.h b/unweighted.h

index c6c13bb3a7abd14c824dfe6b495871790809833d..b136b007b517e56645be544ee6556f83a45de649 100644 (file)
--- a/unweighted.h
+++ b/unweighted.h
@@ -12,7 +12,7 @@
   */
  
  #include "treecalculator.h"
-#include "treemap.h"
+#include "counttable.h"
  
  /***********************************************************************/
  
@@ -38,10 +38,10 @@ class Unweighted : public TreeCalculator  {
                 map< vector<string>, set<int> > rootForGrouping;  //maps a grouping combo to the roots for that combo
                 bool includeRoot;
                 
-               EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*); 
-               EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
-               EstOutput driver(Tree*, vector< vector<string> >, int, int, bool, TreeMap*); 
-               EstOutput createProcesses(Tree*, vector< vector<string> >, bool, TreeMap*);
+               EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*); 
+               EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
+               EstOutput driver(Tree*, vector< vector<string> >, int, int, bool, CountTable*); 
+               EstOutput createProcesses(Tree*, vector< vector<string> >, bool, CountTable*);
                 int getRoot(Tree*, int, vector<string>);
  };
  
diff --git a/validparameter.cpp b/validparameter.cpp

index 3e1f3498ca0be8f0f010bd038273f3ea70f89d3c..7d1af2551c7a6717c1d3ea807af0e56de0fc218e 100644 (file)
--- a/validparameter.cpp
+++ b/validparameter.cpp
@@ -307,6 +307,14 @@ string ValidParameters::validFile(map<string, string>& container, string paramet
                                         
                                         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ". I suspect you entered a column formatted file as a phylip file, aborting."); m->mothurOutEndLine(); return "not found"; }
                                 }
+                
+                //check for blank file
+                if (ableToOpen != 1) {
+                    if (m->isBlank(container[parameter])) {
+                        m->mothurOut("[ERROR]: " + container[parameter] + " is blank, aborting."); m->mothurOutEndLine(); return "not found"; 
+                    }
+                }
+                    
                         }
                 }else { return "not found"; }
                 
diff --git a/weighted.cpp b/weighted.cpp

index 85eed5207ff20d586bd999f670d59a4c8e840c67..cf1291dfa91af84e4d821541b2a8f43a169d64f7 100644 (file)
--- a/weighted.cpp
+++ b/weighted.cpp
@@ -19,7 +19,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) {
                 processors = p;
                 outputDir = o;
          
-        TreeMap* tmap = t->getTreeMap();
+        CountTable* ct = t->getCountTable();
                 
                 numGroups = m->getNumGroups();
                 
@@ -38,7 +38,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) {
                 
                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                         if(processors == 1){
-                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+                               data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
                         }else{
                                 int numPairs = namesOfGroupCombos.size();
                                 
@@ -52,12 +52,12 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) {
                                         lines.push_back(linePair(startPos, numPairsPerProcessor));
                                 }
  
-                               data = createProcesses(t, namesOfGroupCombos, tmap);
+                               data = createProcesses(t, namesOfGroupCombos, ct);
                                 
                                 lines.clear();
                         }
                 #else
-                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+                       data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
                 #endif
                 
                 return data;
@@ -69,7 +69,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) {
  }
  /**************************************************************************************************/
  
-EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
+EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                 int process = 1;
@@ -87,7 +87,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGro
                         }else if (pid == 0){
         
                                 EstOutput Myresults;
-                               Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
+                               Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
                         
                                 //m->mothurOut("Merging results."); m->mothurOutEndLine();
                                 
@@ -110,7 +110,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGro
                         }
                 }
         
-               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
+               results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
         
                 //force parent to wait until all the processes are done
                 for (int i=0;i<(processors-1);i++) { 
@@ -155,7 +155,7 @@ EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGro
         }
  }
  /**************************************************************************************************/
-EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) { 
+EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) { 
   try {
                 EstOutput results;
                 vector<double> D;
@@ -179,7 +179,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                                 int numSeqsInGroupI = it->second;
                                 
                                 double sum = getLengthToRoot(t, t->groupNodeInfo[groupA][j], groupA, groupB);
-                               double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groupA]);
+                               double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groupA));
                         
                                 D[count] += weightedSum;
                         }
@@ -190,7 +190,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                                 int numSeqsInGroupL = it->second;
                                 
                                 double sum = getLengthToRoot(t, t->groupNodeInfo[groupB][j], groupA, groupB);
-                               double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groupB]);
+                               double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groupB));
                         
                                 D[count] += weightedSum;
                         }
@@ -216,7 +216,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                                 it = t->tree[i].pcount.find(groupA);
                                 //if it does u = # of its descendants with a certain group / total number in tree with a certain group
                                 if (it != t->tree[i].pcount.end()) {
-                                       u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
+                                       u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA);
                                 }else { u = 0.00; }
                                 
                                 
@@ -225,7 +225,7 @@ EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos,
                                 
                                 //if it does subtract their percentage from u
                                 if (it != t->tree[i].pcount.end()) {
-                                       u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
+                                       u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB);
                                 }
                                 
                                 if (includeRoot) {
@@ -270,7 +270,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                 
                 data.clear(); //clear out old values
       
-        TreeMap* tmap = t->getTreeMap();
+        CountTable* ct = t->getCountTable();
                 
                 if (m->control_pressed) { return data; }
                 
@@ -287,7 +287,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                         int numSeqsInGroupI = it->second;
                         
                         double sum = getLengthToRoot(t, t->groupNodeInfo[groups[0]][j], groups[0], groups[1]);
-                       double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]);
+                       double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groups[0]));
                 
                         D += weightedSum;
                 }
@@ -298,7 +298,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                         int numSeqsInGroupL = it->second;
                         
                         double sum = getLengthToRoot(t, t->groupNodeInfo[groups[1]][j], groups[0], groups[1]);
-                       double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]);
+                       double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groups[1]));
                 
                         D += weightedSum;
                 }
@@ -314,7 +314,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                         it = t->tree[i].pcount.find(groupA);
                         //if it does u = # of its descendants with a certain group / total number in tree with a certain group
                         if (it != t->tree[i].pcount.end()) {
-                               u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
+                               u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA);
                         }else { u = 0.00; }
                         
                         
@@ -322,7 +322,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                         it = t->tree[i].pcount.find(groupB);
                         //if it does subtract their percentage from u
                         if (it != t->tree[i].pcount.end()) {
-                               u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
+                               u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB);
                         }
                         
                         if (includeRoot) {
diff --git a/weighted.h b/weighted.h

index 180409ce2dbad5da67b570a7ca4a3666c61ae3b3..d4082fe9c82a7aeeb41aeaad62f016d0ec588d4d 100644 (file)
--- a/weighted.h
+++ b/weighted.h
@@ -12,7 +12,7 @@
   */
  
  #include "treecalculator.h"
-#include "treemap.h"
+#include "counttable.h"
  
  /***********************************************************************/
  
@@ -41,8 +41,8 @@ class Weighted : public TreeCalculator  {
                 map< vector<string>, set<int> > rootForGrouping;  //maps a grouping combo to the root for that combo
                 bool includeRoot;
                 
-               EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*); 
-               EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
+               EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*); 
+               EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
                 double getLengthToRoot(Tree*, int, string, string);
  };
author	Sarah Westcott <mothur.westcott@gmail.com>
	Fri, 7 Sep 2012 18:30:06 +0000 (14:30 -0400)
committer	Sarah Westcott <mothur.westcott@gmail.com>
	Fri, 7 Sep 2012 18:30:06 +0000 (14:30 -0400)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
aligncommand.cpp		patch \| blob \| history
binsequencecommand.cpp		patch \| blob \| history
binsequencecommand.h		patch \| blob \| history
chimeraperseuscommand.cpp		patch \| blob \| history
classifyotucommand.cpp		patch \| blob \| history
classifyotucommand.h		patch \| blob \| history
classifyseqscommand.cpp		patch \| blob \| history
classifyseqscommand.h		patch \| blob \| history
classifytreecommand.cpp		patch \| blob \| history
classifytreecommand.h		patch \| blob \| history
clusterfragmentscommand.cpp		patch \| blob \| history
clusterfragmentscommand.h		patch \| blob \| history
consensus.cpp		patch \| blob \| history
consensusseqscommand.cpp		patch \| blob \| history
consensusseqscommand.h		patch \| blob \| history
countseqscommand.cpp		patch \| blob \| history
counttable.cpp		patch \| blob \| history
counttable.h		patch \| blob \| history
deconvolutecommand.cpp		patch \| blob \| history
deconvolutecommand.h		patch \| blob \| history
deuniquetreecommand.cpp		patch \| blob \| history
indicatorcommand.cpp		patch \| blob \| history
indicatorcommand.h		patch \| blob \| history
parsimony.cpp		patch \| blob \| history
parsimony.h		patch \| blob \| history
parsimonycommand.cpp		patch \| blob \| history
parsimonycommand.h		patch \| blob \| history
phylodiversitycommand.cpp		patch \| blob \| history
phylodiversitycommand.h		patch \| blob \| history
phylosummary.cpp		patch \| blob \| history
phylosummary.h		patch \| blob \| history
readtree.cpp		patch \| blob \| history
readtree.h		patch \| blob \| history
secondarystructurecommand.cpp		patch \| blob \| history
sensspeccommand.cpp		patch \| blob \| history
shhhercommand.cpp		patch \| blob \| history
subsample.cpp		patch \| blob \| history
subsample.h		patch \| blob \| history
summarytaxcommand.cpp		patch \| blob \| history
summarytaxcommand.h		patch \| blob \| history
tree.cpp		patch \| blob \| history
tree.h		patch \| blob \| history
treegroupscommand.cpp		patch \| blob \| history
treegroupscommand.h		patch \| blob \| history
treemap.cpp		patch \| blob \| history
treemap.h		patch \| blob \| history
treereader.cpp		patch \| blob \| history
treereader.h		patch \| blob \| history
trimseqscommand.cpp		patch \| blob \| history
unifracunweightedcommand.cpp		patch \| blob \| history
unifracunweightedcommand.h		patch \| blob \| history
unifracweightedcommand.cpp		patch \| blob \| history
unifracweightedcommand.h		patch \| blob \| history
unweighted.cpp		patch \| blob \| history
unweighted.h		patch \| blob \| history
validparameter.cpp		patch \| blob \| history
weighted.cpp		patch \| blob \| history
weighted.h		patch \| blob \| history