]> git.donarmstrong.com Git - mothur.git/commitdiff
added count file to get.oturep, pre.cluster, screen.seqs, tree.shared. made remove...
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 25 Sep 2012 13:18:08 +0000 (09:18 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 25 Sep 2012 13:18:08 +0000 (09:18 -0400)
28 files changed:
classifyseqscommand.cpp
clusterfragmentscommand.cpp
consensusseqscommand.cpp
countseqscommand.cpp
counttable.cpp
counttable.h
deconvolutecommand.cpp
getgroupscommand.cpp
getlineagecommand.cpp
getoturepcommand.cpp
getoturepcommand.h
getseqscommand.cpp
makebiomcommand.cpp
mothurout.cpp
mothurout.h
preclustercommand.cpp
preclustercommand.h
removegroupscommand.cpp
removelineagecommand.cpp
removeseqscommand.cpp
screenseqscommand.cpp
screenseqscommand.h
splitgroupscommand.cpp
splitgroupscommand.h
treegroupscommand.cpp
treegroupscommand.h
treemap.cpp
trimseqscommand.cpp

index bab774012f20283e12b43606349b5b519721f0b7..2e55673934a03b4c0ba5984f930499ec86e1e165 100644 (file)
@@ -619,7 +619,7 @@ int ClassifySeqsCommand::execute(){
                
                        m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
                        
-                       string baseTName = taxonomyFileName;
+                       string baseTName = m->getSimpleName(taxonomyFileName);
                        if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
                        
             //set rippedTaxName to 
index 378e8ab0e09350e6e8853a5f4c1a1b0fe7c77fe4..f785c506db502c32c52209badf1af3258b97f1c9 100644 (file)
@@ -79,7 +79,7 @@ string ClusterFragmentsCommand::getOutputFileNameTag(string type, string inputNa
         else {
             if (type == "fasta") {  outputFileName =  "fragclust.fasta"; }
             else if (type == "name") {  outputFileName =  "fragclust.names"; }
-            else if (type == "count") {  outputFileName =  "fragclust.count.table"; }
+            else if (type == "count") {  outputFileName =  "fragclust.count_table"; }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
index 4c7aefb340602efffa969b2612568299cbf1865b..94d66827068338c9b254f8e3aa30d9ec65301db9 100644 (file)
@@ -66,7 +66,7 @@ string ConsensusSeqsCommand::getOutputFileNameTag(string type, string inputName=
         else {
             if (type == "fasta") {  outputFileName =  "cons.fasta"; }
             else if (type == "name") {  outputFileName =  "cons.names"; }
-            else if (type == "count") {  outputFileName =  "cons.count.table"; }
+            else if (type == "count") {  outputFileName =  "cons.count_table"; }
             else if (type == "summary") {  outputFileName =  "cons.summary"; }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
index 75d21c2250166d202f71ed21786dca16db40a38f..fa6fd4f09a2c0c1703a4f12b65943d6df1ac06a2 100644 (file)
@@ -35,7 +35,7 @@ vector<string> CountSeqsCommand::setParameters(){
 string CountSeqsCommand::getHelpString(){      
        try {
                string helpString = "";
-               helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count.table file.  You may also provide a group file to get the counts broken down by group.\n";
+               helpString += "The count.seqs aka. make.table command reads a name file and outputs a .count_table file.  You may also provide a group file to get the counts broken down by group.\n";
                helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
         helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
                helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
@@ -59,7 +59,7 @@ string CountSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
         it = outputTypes.find(type);
         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
         else {
-            if (type == "counttable") {  outputFileName =  "count.table"; }
+            if (type == "counttable") {  outputFileName =  "count_table"; }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
index cd623ecbcf5fbea1cdd25fa8c9c2b872746da162..a79047d0af943e1c2bbef6d169aeefa884c1e164 100644 (file)
@@ -52,6 +52,12 @@ int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>&
             }
         }
         
+        if (hasGroups) {
+            for (int i = 0; i < totalGroups.size(); i++) {
+                if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
+            }
+        }
+
         return 0;
     }
        catch(exception& e) {
@@ -179,7 +185,14 @@ int CountTable::createTable(string namefile, string groupfile, bool createGroup)
         in.close();
                
         if (error) { m->control_pressed = true; }
-               if (groupfile != "") { delete groupMap; }
+        else { //check for zero groups
+            if (hasGroups) {
+                for (int i = 0; i < totalGroups.size(); i++) {
+                    if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
+                }
+            }
+        }
+        if (groupfile != "") { delete groupMap; }
         
         return 0;
     }
@@ -243,6 +256,13 @@ int CountTable::readTable(string file) {
         in.close();
         
         if (error) { m->control_pressed = true; }
+        else { //check for zero groups
+            if (hasGroups) {
+                for (int i = 0; i < totalGroups.size(); i++) {
+                    if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; }
+                }
+            }
+        }
         
         return 0;
     }
@@ -457,6 +477,51 @@ int CountTable::addGroup(string groupName) {
        }
 }
 /************************************************************/
+//remove group
+int CountTable::removeGroup(string groupName) {
+    try {        
+        if (hasGroups) {
+            map<string, int>::iterator it = indexGroupMap.find(groupName);
+            if (it == indexGroupMap.end()) {
+                m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+            }else { 
+                int indexOfGroupToRemove = it->second;
+                map<string, int> currentGroupIndex = indexGroupMap;
+                vector<string> newGroups;
+                for (int i = 0; i < groups.size(); i++) {
+                    if (groups[i] != groupName) { 
+                        newGroups.push_back(groups[i]);
+                        indexGroupMap[groups[i]] = i;
+                    }
+                }
+                indexGroupMap.erase(groupName);
+                groups = newGroups;
+                totalGroups.erase(totalGroups.begin()+indexOfGroupToRemove);
+                 
+                for (int i = 0; i < counts.size(); i++) {
+                    int num = counts[i][indexOfGroupToRemove];
+                    counts[i].erase(counts[i].begin()+indexOfGroupToRemove);
+                    totals[i] -= num;
+                    total -= num;
+                    if (totals[i] == 0) { //your sequences are only from the group we want to remove, then remove you.
+                        counts.erase(counts.begin()+i);
+                        totals.erase(totals.begin()+i);
+                        uniques--;
+                        i--;
+                    }
+                }
+                if (groups.size() == 0) { hasGroups = false; }
+            }
+        }else { m->mothurOut("[ERROR]: your count table does not contain group information, can not remove group " + groupName + ".\n"); m->control_pressed = true; }
+    
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "removeGroup");
+               exit(1);
+       }
+}
+/************************************************************/
 //vector of groups for the seq
 vector<string> CountTable::getGroups(string seqName) {
     try {
@@ -560,7 +625,7 @@ int CountTable::push_back(string seqName) {
 int CountTable::remove(string seqName) {
     try {
         map<string, int>::iterator it = indexNameMap.find(seqName);
-        if (it == indexNameMap.end()) {
+        if (it != indexNameMap.end()) {
             uniques--;
             if (hasGroups){ //remove this sequences counts from group totals
                 for (int i = 0; i < totalGroups.size(); i++) {  totalGroups[i] -= counts[it->second][i];  counts[it->second][i] = 0; }
index b66c4c6ef5e90007025935c4f0820cedb11c624d..34c941bece659b832afd9f2ff8866a16d37c14f1 100644 (file)
@@ -47,9 +47,11 @@ class CountTable {
         CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; uniques = 0; }
         ~CountTable() {}
     
+        //reads and creates smart enough to eliminate groups with zero counts 
         int createTable(set<string>&, map<string, string>&, set<string>&); //seqNames, seqName->group, groupNames 
         int createTable(string, string, bool); //namefile, groupfile, createGroup
-        int readTable(string);    
+        int readTable(string); 
+    
         int printTable(string);
         int printHeaders(ofstream&);
         int printSeq(ofstream&, string);
@@ -60,6 +62,7 @@ class CountTable {
         int getNumGroups() { return groups.size(); }
         vector<string> getNamesOfGroups() {  return groups;   }  //returns group names, if no group info vector is blank.
         int addGroup(string);
+        int removeGroup(string);
         
         int renameSeq(string, string); //used to change name of sequence for use with trees
         int setAbund(string, string, int); //set abundance number of seqs for that group for that seq
index b6f71c826066114849acfc04b5130e09e876462f..90a40ce8e86dbe6a6e3cba8cb9d070de374805cc 100644 (file)
@@ -57,7 +57,7 @@ string DeconvoluteCommand::getOutputFileNameTag(string type, string inputName=""
         else {
             if (type == "fasta") {  outputFileName =  "unique" + m->getExtension(inputName); }
             else if (type == "name") {  outputFileName =  "names"; }
-            else if (type == "count") {  outputFileName =  "count.table"; }
+            else if (type == "count") {  outputFileName =  "count_table"; }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
index 910a8720bc3ec88079d2663ebf819408436ca02a..69f4403ca0039dab9d3c4f5aa626a566fa0805d9 100644 (file)
@@ -72,7 +72,7 @@ string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
+            else if (type == "count")       {   outputFileName =  "pick.count_table";   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
index 1cd139b5213733e759a7a32a1fbdf8221d09dfb6..645655d03f72bbebac45cfa108c499cc1daf8e3b 100644 (file)
@@ -10,6 +10,7 @@
 #include "getlineagecommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> GetLineageCommand::setParameters(){     
@@ -71,7 +72,7 @@ string GetLineageCommand::getOutputFileNameTag(string type, string inputName="")
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick.count.table";                    }  
+            else if (type == "count")       {   outputFileName =  "pick.count_table";                    }  
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";   }
@@ -438,6 +439,14 @@ int GetLineageCommand::readCount(){
         }
         in.close();
                out.close();
+        
+        //check for groups that have been eliminated
+        CountTable ct;
+        if (ct.testGroups(outputFileName)) {
+            ct.readTable(outputFileName);
+            ct.printTable(outputFileName);
+        }
+
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
@@ -691,15 +700,17 @@ int GetLineageCommand::readTax(){
                        in >> name;                             //read from first column
                        in >> tax;                      //read from second column
                        
+            string noQuotesTax = m->removeQuotes(tax);
+            
                        for (int j = 0; j < listOfTaxons.size(); j++) {
                                                        
-                               string newtax = tax;
+                               string newtax = noQuotesTax;
                        
                                //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
                                if (!taxonsHasConfidence[j]) {
-                                       int hasConfidences = tax.find_first_of('(');
+                                       int hasConfidences = noQuotesTax.find_first_of('(');
                                        if (hasConfidences != string::npos) { 
-                                               newtax = tax;
+                                               newtax = noQuotesTax;
                                                m->removeConfidences(newtax);
                                        }
                                
@@ -712,7 +723,7 @@ int GetLineageCommand::readTax(){
                                                break;
                                        }
                                }else{//if listOfTaxons[i] has them and you don't them remove taxons
-                                       int hasConfidences = tax.find_first_of('(');
+                                       int hasConfidences = noQuotesTax.find_first_of('(');
                                        if (hasConfidences == string::npos) { 
                                        
                                                int pos = newtax.find(noConfidenceTaxons[j]);
@@ -726,10 +737,10 @@ int GetLineageCommand::readTax(){
                                        }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
                                                //first remove confidences from both and see if the taxonomy exists
                                        
-                                               string noNewTax = tax;
-                                               int hasConfidences = tax.find_first_of('(');
+                                               string noNewTax = noQuotesTax;
+                                               int hasConfidences = noQuotesTax.find_first_of('(');
                                                if (hasConfidences != string::npos) { 
-                                                       noNewTax = tax;
+                                                       noNewTax = noQuotesTax;
                                                        m->removeConfidences(noNewTax);
                                                }
                                        
index 4967f245fb11c2ff37195a83baf39d29667fc582..9f4dd54491bab042ae243998de20559b03285dfb 100644 (file)
@@ -41,9 +41,10 @@ vector<string> GetOTURepCommand::setParameters(){
        try {
                CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "ColumnName",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
@@ -68,9 +69,9 @@ vector<string> GetOTURepCommand::setParameters(){
 string GetOTURepCommand::getHelpString(){      
        try {
                string helpString = "";
-               helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label.  The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
+               helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, count, large, weighted, cutoff, precision, groups, sorted and label.  The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
                helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
-               helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n";
+               helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name or count filename is required. \n";
                helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
                helpString += "The get.oturep command should be in the following format: get.oturep(phylip=yourDistanceMatrix, fasta=yourFastaFile, list=yourListFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
                helpString += "Example get.oturep(phylip=amazon.dist, fasta=amazon.fasta, list=amazon.fn.list, group=amazon.groups).\n";
@@ -106,6 +107,7 @@ string GetOTURepCommand::getOutputFileNameTag(string type, string inputName=""){
         else {
             if (type == "fasta")            {   outputFileName =  "rep.fasta";   }
             else if (type == "name")        {   outputFileName =  "rep.names";   }
+            else if (type == "count")        {   outputFileName =  "rep.count_table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -123,6 +125,7 @@ GetOTURepCommand::GetOTURepCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "GetOTURepCommand", "GetOTURepCommand");
@@ -157,6 +160,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -210,6 +214,14 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -245,6 +257,24 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
+            
+            hasGroups = false;
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not found") { countfile =  "";   }
+                       else if (countfile == "not open") { abort = true; countfile =  ""; }    
+                       else {   
+                m->setCountTableFile(countfile); 
+                ct.readTable(countfile);
+                if (ct.hasGroupInfo()) { hasGroups = true; }
+            }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
                        
                        if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
                                //give priority to column, then phylip
@@ -261,14 +291,18 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        }else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a get.oturep command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
                
                        if (columnfile != "") {  
-                               if (namefile == "") {  
+                               if ((namefile == "") && (countfile == "")) { 
                                        namefile = m->getNameFile(); 
                                        if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                                               countfile = m->getCountTableFile();
+                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
-                               } 
+                               }
                        }
 
                        //check for optional parameter and set defaults
@@ -292,15 +326,15 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                sorted = "";
                        }
                        
-                       if ((sorted == "group") && (groupfile == "")) {
-                               m->mothurOut("You must provide a groupfile to sort by group. I will not sort."); m->mothurOutEndLine();
+                       if ((sorted == "group") && ((groupfile == "")&& !hasGroups)) {
+                               m->mothurOut("You must provide a groupfile or have a count file with group info to sort by group. I will not sort."); m->mothurOutEndLine();
                                sorted = "";
                        }
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = ""; }
                        else { 
-                               if (groupfile == "") {
+                               if ((groupfile == "") && (!hasGroups)) {
                                        m->mothurOut("You must provide a groupfile to use groups."); m->mothurOutEndLine();
                                        abort = true;
                                }else { 
@@ -340,106 +374,9 @@ int GetOTURepCommand::execute(){
                int error;
                list = NULL;
                
-               if (!large) {
-                       //read distance files
-                       if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
-                       else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
-                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
-                       
-                       readMatrix->setCutoff(cutoff);
-       
-                       if(namefile != ""){     
-                               nameMap = new NameAssignment(namefile);
-                               nameMap->readMap();
-                       }else{  nameMap = NULL;         }
-                       
-                       readMatrix->read(nameMap);
-                       
-                       if (m->control_pressed) { delete readMatrix; return 0; }
-
-                       list = readMatrix->getListVector();
-
-                       SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
-                       
-                       // Create a data structure to quickly access the distance information.
-                       // It consists of a vector of distance maps, where each map contains
-                       // all distances of a certain sequence. Vector and maps are accessed
-                       // via the index of a sequence in the distance matrix
-                       seqVec = vector<SeqMap>(list->size()); 
-            for (int i = 0; i < matrix->seqVec.size(); i++) {
-                for (int j = 0; j < matrix->seqVec[i].size(); j++) {
-                    if (m->control_pressed) { delete readMatrix; return 0; }
-                    //already added everyone else in row
-                    if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
-                }
-                       }
-                       //add dummy map for unweighted calc
-                       SeqMap dummy;
-                       seqVec.push_back(dummy);
-                       
-                       delete matrix;
-                       delete readMatrix;
-                       delete nameMap;
-                       
-                       if (m->control_pressed) { return 0; }
-               }else {
-                       //process file and set up indexes
-                       if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
-                       else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
-                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
-                       
-                       formatMatrix->setCutoff(cutoff);
-       
-                       if(namefile != ""){     
-                               nameMap = new NameAssignment(namefile);
-                               nameMap->readMap();
-                       }else{  nameMap = NULL;         }
-                       
-                       formatMatrix->read(nameMap);
-                       
-                       if (m->control_pressed) { delete formatMatrix;  return 0; }
-
-                       list = formatMatrix->getListVector();
-                       
-                       distFile = formatMatrix->getFormattedFileName();
-                       
-                       //positions in file where the distances for each sequence begin
-                       //rowPositions[1] = position in file where distance related to sequence 1 start.
-                       rowPositions = formatMatrix->getRowPositions();
-                       rowPositions.push_back(-1); //dummy row for unweighted calc
-                       
-                       delete formatMatrix;
-                       delete nameMap;
-                       
-                       //openfile for getMap to use
-                       m->openInputFile(distFile, inRow);
-                       
-                       if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
-               }
-               
-               
-               //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
-               if (list != NULL) {
-                       vector<string> names;
-                       string binnames;
-                       //map names to rows in sparsematrix
-                       for (int i = 0; i < list->size(); i++) {
-                               names.clear();
-                               binnames = list->get(i);
-                               
-                               m->splitAtComma(binnames, names);
-                               
-                               for (int j = 0; j < names.size(); j++) {
-                                       nameToIndex[names[j]] = i;
-                               }
-                       }
-               } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
-               
+               readDist();             
                                
-               if (m->control_pressed) { 
-                       if (large) {  inRow.close(); m->mothurRemove(distFile);  }
-                       return 0; 
-               }
+               if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  } return 0; }
                
                if (groupfile != "") {
                        //read in group map info.
@@ -448,13 +385,18 @@ int GetOTURepCommand::execute(){
                        if (error == 1) { delete groupMap; m->mothurOut("Error reading your groupfile. Proceeding without groupfile."); m->mothurOutEndLine(); groupfile = "";  }
                        
                        if (Groups.size() != 0) {
-                               SharedUtil* util = new SharedUtil();
+                               SharedUtil util;
                                vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
-                               util->setGroups(Groups, gNamesOfGroups, "getoturep");
+                               util.setGroups(Groups, gNamesOfGroups, "getoturep");
                                groupMap->setNamesOfGroups(gNamesOfGroups);
-                               delete util;
                        }
-               }
+               }else if (hasGroups) {
+            if (Groups.size() != 0) {
+                               SharedUtil util;
+                               vector<string> gNamesOfGroups = ct.getNamesOfGroups();
+                               util.setGroups(Groups, gNamesOfGroups, "getoturep");
+                       }
+        }
                
                //done with listvector from matrix
                if (list != NULL) { delete list; }
@@ -595,6 +537,11 @@ int GetOTURepCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -608,7 +555,116 @@ int GetOTURepCommand::execute(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int GetOTURepCommand::readDist() {
+       try {
+        
+        if (!large) {
+                       //read distance files
+                       if (format == "column") { readMatrix = new ReadColumnMatrix(distFile); }        
+                       else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(distFile); }
+                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
+                       
+                       readMatrix->setCutoff(cutoff);
+            
+                       NameAssignment* nameMap = NULL;
+            if(namefile != ""){        
+                nameMap = new NameAssignment(namefile);
+                nameMap->readMap();
+                readMatrix->read(nameMap);
+            }else if (countfile != "") {
+                readMatrix->read(&ct);
+            }
+                       
+                       if (m->control_pressed) { delete readMatrix; return 0; }
+            
+                       list = readMatrix->getListVector();
+                       SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
+                       
+                       // Create a data structure to quickly access the distance information.
+                       // It consists of a vector of distance maps, where each map contains
+                       // all distances of a certain sequence. Vector and maps are accessed
+                       // via the index of a sequence in the distance matrix
+                       seqVec = vector<SeqMap>(list->size()); 
+            for (int i = 0; i < matrix->seqVec.size(); i++) {
+                for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+                    if (m->control_pressed) { delete readMatrix; return 0; }
+                    //already added everyone else in row
+                    if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
+                }
+                       }
+                       //add dummy map for unweighted calc
+                       SeqMap dummy;
+                       seqVec.push_back(dummy);
+                       
+                       delete matrix;
+                       delete readMatrix;
+                       delete nameMap;
+                       
+                       if (m->control_pressed) { return 0; }
+               }else {
+                       //process file and set up indexes
+                       if (format == "column") { formatMatrix = new FormatColumnMatrix(distFile); }    
+                       else if (format == "phylip") { formatMatrix = new FormatPhylipMatrix(distFile); }
+                       else { m->mothurOut("File format error."); m->mothurOutEndLine(); return 0;  }
+                       
+                       formatMatrix->setCutoff(cutoff);
+            
+                       NameAssignment* nameMap = NULL;
+            if(namefile != ""){        
+                nameMap = new NameAssignment(namefile);
+                nameMap->readMap();
+                readMatrix->read(nameMap);
+            }else if (countfile != "") {
+                readMatrix->read(&ct);
+            }
+                       
+                       if (m->control_pressed) { delete formatMatrix;  return 0; }
+            
+                       list = formatMatrix->getListVector();
+                       distFile = formatMatrix->getFormattedFileName();
+                       
+                       //positions in file where the distances for each sequence begin
+                       //rowPositions[1] = position in file where distance related to sequence 1 start.
+                       rowPositions = formatMatrix->getRowPositions();
+                       rowPositions.push_back(-1); //dummy row for unweighted calc
+                       
+                       delete formatMatrix;
+                       delete nameMap;
+                       
+                       //openfile for getMap to use
+                       m->openInputFile(distFile, inRow);
+                       
+                       if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
+               }
+               
+               
+               //list bin 0 = first name read in distance matrix, list bin 1 = second name read in distance matrix
+               if (list != NULL) {
+                       vector<string> names;
+                       string binnames;
+                       //map names to rows in sparsematrix
+                       for (int i = 0; i < list->size(); i++) {
+                               names.clear();
+                               binnames = list->get(i);
+                               
+                               m->splitAtComma(binnames, names);
+                               
+                               for (int j = 0; j < names.size(); j++) {
+                                       nameToIndex[names[j]] = i;
+                               }
+                       }
+               } else { m->mothurOut("error, no listvector."); m->mothurOutEndLine(); }
 
+        if (m->control_pressed) { if (large) {  inRow.close(); m->mothurRemove(distFile);  }return 0; }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "execute");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 void GetOTURepCommand::readNamesFile() {
        try {
@@ -677,32 +733,38 @@ void GetOTURepCommand::readNamesFile(bool w) {
        }
 }
 //**********************************************************************************************************************
-string GetOTURepCommand::findRep(vector<string> names) {
+string GetOTURepCommand::findRep(vector<string> names, string group) {
        try{
                // if only 1 sequence in bin or processing the "unique" label, then 
                // the first sequence of the OTU is the representative one
                if ((names.size() == 1)) {
                        return names[0];
                }else{
-                       vector<int> seqIndex(names.size());
-                       vector<float> max_dist(names.size());
-                       vector<float> total_dist(names.size());
+                       vector<int> seqIndex; //(names.size());
                        map<string, string>::iterator itNameFile;
                        map<string, int>::iterator itNameIndex;
 
                        //fill seqIndex and initialize sums
                        for (size_t i = 0; i < names.size(); i++) {
                                if (weighted) {
-                                       seqIndex[i] = nameToIndex[names[i]];
+                                       seqIndex.push_back(nameToIndex[names[i]]);
+                    if (countfile != "") {  //if countfile is not blank then we can assume the list file contains only uniques, otherwise we assume list file contains everyone.
+                        int numRep = 0;
+                        if (group != "") {  numRep = ct.getGroupCount(names[i], group);  }
+                        else { numRep = ct.getGroupCount(names[i]);  }
+                        for (int j = 1; j < numRep; j++) { //don't add yourself again
+                            seqIndex.push_back(nameToIndex[names[i]]);
+                        }
+                    }
                                }else { 
                                        if (namefile == "") {
                                                itNameIndex = nameToIndex.find(names[i]);
                                                
                                                if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
-                                                       if (large) {  seqIndex[i] = (rowPositions.size()-1); }
-                                                       else {  seqIndex[i] = (seqVec.size()-1); }
+                                                       if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
+                                                       else {  seqIndex.push_back((seqVec.size()-1)); }
                                                }else {
-                                                       seqIndex[i] = itNameIndex->second;
+                                                       seqIndex.push_back(itNameIndex->second);
                                                }
                                                
                                        }else {
@@ -715,17 +777,18 @@ string GetOTURepCommand::findRep(vector<string> names) {
                                                        string name2 = itNameFile->second;
                                                        
                                                        if (name1 == name2) { //then you are unique so add your real dists
-                                                               seqIndex[i] = nameToIndex[names[i]];
+                                                               seqIndex.push_back(nameToIndex[names[i]]);
                                                        }else { //add dummy
-                                                               if (large) {  seqIndex[i] = (rowPositions.size()-1); }
-                                                               else {  seqIndex[i] = (seqVec.size()-1); }
+                                                               if (large) {  seqIndex.push_back((rowPositions.size()-1)); }
+                                                               else {  seqIndex.push_back((seqVec.size()-1)); }
                                                        }
                                                }
                                        }
                                }
-                               max_dist[i] = 0.0;
-                               total_dist[i] = 0.0;
                        }
+            
+            vector<float> max_dist(seqIndex.size(), 0.0);
+                       vector<float> total_dist(seqIndex.size(), 0.0);
                        
                        // loop through all entries in seqIndex
                        SeqMap::iterator it;
@@ -795,19 +858,33 @@ int GetOTURepCommand::process(ListVector* processList) {
                map<string, ofstream*> filehandles;
                
                if (Groups.size() == 0) { //you don't want to use groups
-                       outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + getOutputFileNameTag("name");
-                       m->openOutputFile(outputNamesFile, newNamesOutput);
-                       outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
+                       outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".";
+            if (countfile == "") { 
+                outputNamesFile += getOutputFileNameTag("name");
+                outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
+            }else {
+                outputNamesFile += getOutputFileNameTag("count");
+                outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
+            }
                        outputNameFiles[outputNamesFile] = processList->getLabel();
+            m->openOutputFile(outputNamesFile, newNamesOutput);
+            newNamesOutput << "noGroup" << endl;
                }else{ //you want to use groups
                        ofstream* temp;
                        for (int i=0; i<Groups.size(); i++) {
                                temp = new ofstream;
                                filehandles[Groups[i]] = temp;
-                               outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + "." + getOutputFileNameTag("name");
+                               outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".";
+                if (countfile == "") { 
+                    outputNamesFile += getOutputFileNameTag("name");
+                    outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
+                }else {
+                    outputNamesFile += getOutputFileNameTag("count");
+                    outputNames.push_back(outputNamesFile); outputTypes["count"].push_back(outputNamesFile); 
+                }
                                
                                m->openOutputFile(outputNamesFile, *(temp));
-                               outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
+                *(temp) << Groups[i] << endl;
                                outputNameFiles[outputNamesFile] = processList->getLabel() + "." + Groups[i];
                        }
                }
@@ -832,7 +909,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                        m->splitAtComma(temp, namesInBin);
                        
                        if (Groups.size() == 0) {
-                               nameRep = findRep(namesInBin);
+                               nameRep = findRep(namesInBin, "");
                                newNamesOutput << i << '\t' << nameRep << '\t' << processList->get(i) << endl;
                        }else{
                                map<string, vector<string> > NamesInGroup;
@@ -841,20 +918,25 @@ int GetOTURepCommand::process(ListVector* processList) {
                                }
                                
                                for (int j=0; j<namesInBin.size(); j++) {
-                                       string thisgroup = groupMap->getGroup(namesInBin[j]);
-                                       
-                                       if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
-                                       
-                                       if (m->inUsersGroups(thisgroup, Groups)) { //add this name to correct group
-                                               NamesInGroup[thisgroup].push_back(namesInBin[j]);
-                                       }
+                    if (groupfile != "") {
+                        string thisgroup = groupMap->getGroup(namesInBin[j]);
+                        if (thisgroup == "not found") { m->mothurOut(namesInBin[j] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                        
+                        //add this name to correct group
+                        if (m->inUsersGroups(thisgroup, Groups)) { NamesInGroup[thisgroup].push_back(namesInBin[j]);  }
+                    }else {
+                        vector<string> thisSeqsGroups = ct.getGroups(namesInBin[j]);
+                        for (int k = 0; k < thisSeqsGroups.size(); k++) {
+                            if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { NamesInGroup[thisSeqsGroups[k]].push_back(namesInBin[j]);  }
+                        }
+                    }
                                }
                                
                                //get rep for each group in otu
                                for (int j=0; j<Groups.size(); j++) {
                                        if (NamesInGroup[Groups[j]].size() != 0) { //are there members from this group in this otu?
                                                //get rep for each group
-                                               nameRep = findRep(NamesInGroup[Groups[j]]);
+                                               nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
                                                
                                                //output group rep and other members of this group
                                                (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
@@ -900,28 +982,51 @@ int GetOTURepCommand::processFastaNames(string filename, string label) {
                ofstream out2;
                string tempNameFile = filename + ".temp";
                m->openOutputFile(tempNameFile, out2);
-               
+            
                ifstream in;
                m->openInputFile(filename, in);
                
                int i = 0;
+        string tempGroup = "";
+        in >> tempGroup; m->gobble(in);
+        
+        CountTable thisCt;
+        if (countfile != "") {
+            thisCt.readTable(countfile);
+            if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
+        }
+    
+        int thistotal = 0;
                while (!in.eof()) {
                        string rep, binnames;
                        in >> i >> rep >> binnames; m->gobble(in);
-                       out2 << rep << '\t' << binnames << endl;
                        
                        vector<string> names;
                        m->splitAtComma(binnames, names);
                        int binsize = names.size();
-                       
+            
+            if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
+            else {
+                if (tempGroup == "noGroup") {
+                    for (int j = 0; j < names.size(); j++) {
+                        if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
+                    }
+                    binsize = thisCt.getNumSeqs(rep);
+                }else {
+                    int total = 0; 
+                    for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
+                    out2 << rep << '\t' << total << '\t' << total << endl;
+                    binsize = total;
+                }
+            }
+                       thistotal += binsize;
                        //if you have a groupfile
                        string group = "";
+            map<string, string> groups;
+            map<string, string>::iterator groupIt;
                        if (groupfile != "") {
-                               map<string, string> groups;
-                               map<string, string>::iterator groupIt;
-                               
                                //find the groups that are in this bin
-                               for (size_t i = 0; i < names.size(); i++) {
+                               for (int i = 0; i < names.size(); i++) {
                                        string groupName = groupMap->getGroup(names[i]);
                                        if (groupName == "not found") {  
                                                m->mothurOut(names[i] + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
@@ -937,7 +1042,21 @@ int GetOTURepCommand::processFastaNames(string filename, string label) {
                                }
                                //rip off last dash
                                group = group.substr(0, group.length()-1);
-                       }else{ group = ""; }
+                       }else if (hasGroups) {
+                map<string, string> groups;
+                for (int i = 0; i < names.size(); i++) {
+                    vector<string> thisSeqsGroups = ct.getGroups(names[i]);
+                    for (int j = 0; j < thisSeqsGroups.size(); j++) { groups[thisSeqsGroups[j]] = thisSeqsGroups[j]; }
+                }
+                //turn the groups into a string
+                               for (groupIt = groups.begin(); groupIt != groups.end(); groupIt++) {
+                                       group += groupIt->first + "-";
+                               }
+                               //rip off last dash
+                               group = group.substr(0, group.length()-1);
+                //cout << group << endl;
+            }
+            else{ group = ""; }
 
                        
                        //print out name and sequence for that bin
@@ -947,7 +1066,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label) {
                                if (sorted == "") { //print them out
                                        rep = rep + "\t" + toString(i+1);
                                        rep = rep + "|" + toString(binsize);
-                                       if (groupfile != "") {
+                                       if (group != "") {
                                                rep = rep + "|" + group;
                                        }
                                        out << ">" << rep << endl;
@@ -973,7 +1092,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label) {
                                string sequence = fasta->getSequence(reps[i].name);
                                string outputName = reps[i].name + "\t" + toString(reps[i].bin);
                                outputName = outputName + "|" + toString(reps[i].size);
-                               if (groupfile != "") {
+                               if (reps[i].group != "") {
                                        outputName = outputName + "|" + reps[i].group;
                                }
                                out << ">" << outputName << endl;
@@ -984,9 +1103,11 @@ int GetOTURepCommand::processFastaNames(string filename, string label) {
                in.close();
                out.close();
                out2.close();
-               
+               
                m->mothurRemove(filename);
                rename(tempNameFile.c_str(), filename.c_str());
+        
+        if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
                
                return 0;
 
@@ -1012,10 +1133,35 @@ int GetOTURepCommand::processNames(string filename, string label) {
                
                int i = 0;
                string rep, binnames;
+        
+        string tempGroup = "";
+        in >> tempGroup; m->gobble(in);
+        
+        CountTable thisCt;
+        if (countfile != "") {
+            thisCt.readTable(countfile);
+            if (tempGroup != "noGroup") { out2 << "Representative_Sequence\ttotal\t" << tempGroup << endl; }
+        }
+        
                while (!in.eof()) {
                        if (m->control_pressed) { break; }
                        in >> i >> rep >> binnames; m->gobble(in);
-                       out2 << rep << '\t' << binnames << endl;
+            
+                       if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
+            else {
+                vector<string> names;
+                m->splitAtComma(binnames, names);
+                if (tempGroup == "noGroup") {
+                    for (int j = 0; j < names.size(); j++) {
+                        if (names[j] != rep) { thisCt.mergeCounts(rep, names[j]); }
+                    }
+                }else {
+                    int total = 0; 
+                    for (int j = 0; j < names.size(); j++) {  total += thisCt.getGroupCount(names[j], tempGroup);  }
+                    out2 << rep << '\t' << total << '\t' << total << endl;
+                }
+            }
+
                }
                in.close();
                out2.close();
@@ -1023,6 +1169,8 @@ int GetOTURepCommand::processNames(string filename, string label) {
                m->mothurRemove(filename);
                rename(tempNameFile.c_str(), filename.c_str());
                
+        if ((countfile != "") && (tempGroup == "noGroup")) { thisCt.printTable(filename); } 
+        
                return 0;
        }
        catch(exception& e) {
index d19a396410f2e2c9850f9b20a93d65c03000bac2..390632975afc85aac5b714c7194c81e74d58e16e 100644 (file)
@@ -18,6 +18,7 @@
 #include "groupmap.h"
 #include "readmatrix.hpp"
 #include "formatmatrix.h"
+#include "counttable.h"
 
 typedef map<int, float> SeqMap;
 
@@ -60,10 +61,11 @@ private:
        ReadMatrix* readMatrix;
        FormatMatrix* formatMatrix;
        NameAssignment* nameMap;
-       string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, columnfile, distFile, format, outputDir, groups;
+    CountTable ct;
+       string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, countfile, columnfile, distFile, format, outputDir, groups;
        ofstream out;
        ifstream in, inNames, inRow;
-       bool abort, allLines, groupError, large, weighted;
+       bool abort, allLines, groupError, large, weighted, hasGroups;
        set<string> labels; //holds labels to be used
        map<string, int> nameToIndex;  //maps sequence name to index in sparsematrix
        map<string, string> nameFileMap;
@@ -79,9 +81,10 @@ private:
        void readNamesFile(bool);
        int process(ListVector*);
        SeqMap getMap(int);
-       string findRep(vector<string>);         // returns the name of the "representative" sequence of given bin or subset of a bin, for groups
+       string findRep(vector<string>, string);         // returns the name of the "representative" sequence of given bin or subset of a bin, for groups
        int processNames(string, string);
        int processFastaNames(string, string);
+    int readDist();
 };
 
 #endif
index e0faef4cc07bd3fc6626fe186ec67fe05c512023..6b16111fdcda83099c0e36396543d40ee345c865 100644 (file)
@@ -10,6 +10,7 @@
 #include "getseqscommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> GetSeqsCommand::setParameters(){        
@@ -90,7 +91,7 @@ string GetSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
+            else if (type == "count")       {   outputFileName =  "pick.count_table";   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
@@ -568,6 +569,13 @@ int GetSeqsCommand::readCount(){
         }
         in.close();
                out.close();
+        
+        //check for groups that have been eliminated
+        CountTable ct;
+        if (ct.testGroups(outputFileName)) {
+            ct.readTable(outputFileName);
+            ct.printTable(outputFileName);
+        }
                
                if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
                outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
@@ -617,19 +625,16 @@ int GetSeqsCommand::readList(){
                        
                                //parse out names that are in accnos file
                                string binnames = list.get(i);
+                vector<string> bnames;
+                m->splitAtComma(binnames, bnames);
                                
                                string newNames = "";
-                               while (binnames.find_first_of(',') != -1) { 
-                                       string name = binnames.substr(0,binnames.find_first_of(','));
-                                       binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-                                       
+                for (int i = 0; i < bnames.size(); i++) {
+                                       string name = bnames[i];
                                        //if that name is in the .accnos file, add it
                                        if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
                                }
                        
-                               //get last name
-                               if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  if (m->debug) { sanity["list"].insert(binnames); } }
-
                                //if there are names in this bin add to new list
                                if (newNames != "") { 
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
index 9e8d3e39cb58e31e9f3dca2eeff35257eadd537a..68e70ee3e48e58a89f75ef7547d7409620947cb2 100644 (file)
@@ -549,15 +549,16 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup)
                 if (m->control_pressed) { return metadata; }
                 
                 //if there is a bin label use it otherwise make one
-                string binLabel = binTag;
-                string sbinNumber = otuLabels[i];
-                if (sbinNumber.length() < snumBins.length()) { 
-                    int diff = snumBins.length() - sbinNumber.length();
-                    for (int h = 0; h < diff; h++) { binLabel += "0"; }
-                }
-                binLabel += sbinNumber;
-                
-                labelTaxMap[binLabel] = taxs[i];
+                if (m->isContainingOnlyDigits(otuLabels[i])) {
+                    string binLabel = binTag;
+                    string sbinNumber = otuLabels[i];
+                    if (sbinNumber.length() < snumBins.length()) { 
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    labelTaxMap[binLabel] = taxs[i];
+                }else {  labelTaxMap[otuLabels[i]] = taxs[i]; }
             }
             
             
index d9df5a078b425b773611d25b2616e3f66a58bddf..c5eb0dca828a47023973f1684986ba05d45bb0fb 100644 (file)
@@ -2890,6 +2890,28 @@ int MothurOut::removeConfidences(string& tax) {
        }
 }
 /**************************************************************************************************/
+string MothurOut::removeQuotes(string tax) {
+       try {
+               
+               string taxon;
+               string newTax = "";
+               
+               for (int i = 0; i < tax.length(); i++) {
+                       
+                       if (control_pressed) { return newTax; }
+            
+            if ((tax[i] != '\'') && (tax[i] != '\"')) { newTax += tax[i]; }
+                       
+        }
+               
+               return newTax;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "removeQuotes");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
 
 
index 33384034ae55ee7f11c72b48378fa59bfe29e16e..0c2e448bc783da0e0aec643b9234ae79fbee98c9 100644 (file)
@@ -140,6 +140,7 @@ class MothurOut {
                void splitAtChar(string&, vector<string>&, char);
         void splitAtChar(string&, string&, char);
                int removeConfidences(string&);
+        string removeQuotes(string);
         string makeList(vector<string>&);
         bool isSubset(vector<string>, vector<string>); //bigSet, subset
                
index 951b2008b062c26c5fabb54dfd8ef2d2a858da67..dadc9186f243952445db24766cf3a08c9dc4d5d0 100644 (file)
@@ -14,8 +14,9 @@
 vector<string> PreClusterCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pdiffs);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -36,9 +37,10 @@ string PreClusterCommand::getHelpString(){
                string helpString = "";
                helpString += "The pre.cluster command groups sequences that are within a given number of base mismatches.\n";
                helpString += "The pre.cluster command outputs a new fasta and name file.\n";
-               helpString += "The pre.cluster command parameters are fasta, names and diffs. The fasta parameter is required. \n";
-               helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
+               helpString += "The pre.cluster command parameters are fasta, name, group, count, processors and diffs. The fasta parameter is required. \n";
+               helpString += "The name parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
                helpString += "The group parameter allows you to provide a group file so you can cluster by group. \n";
+        helpString += "The count parameter allows you to provide a count file so you can cluster by group. \n";
                helpString += "The diffs parameter allows you to specify maximum number of mismatched bases allowed between sequences in a grouping. The default is 1.\n";
                helpString += "The pre.cluster command should be in the following format: \n";
                helpString += "pre.cluster(fasta=yourFastaFile, names=yourNamesFile, diffs=yourMaxDiffs) \n";
@@ -63,6 +65,7 @@ string PreClusterCommand::getOutputFileNameTag(string type, string inputName="")
         else {
             if (type == "fasta") {  outputFileName =  "precluster" + m->getExtension(inputName); }
             else if (type == "name") {  outputFileName =  "precluster.names"; }
+            else if (type == "count") {  outputFileName =  "precluster.count_table"; }
             else if (type == "map") {  outputFileName =  "precluster.map"; }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
@@ -81,6 +84,7 @@ PreClusterCommand::PreClusterCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
                outputTypes["map"] = tempOutNames;
        }
        catch(exception& e) {
@@ -117,6 +121,7 @@ PreClusterCommand::PreClusterCommand(string option) {
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
                        outputTypes["map"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -146,6 +151,14 @@ PreClusterCommand::PreClusterCommand(string option) {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["group"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
@@ -175,6 +188,25 @@ PreClusterCommand::PreClusterCommand(string option) {
                        if (groupfile == "not found") { groupfile =  "";  bygroup = false; }
                        else if (groupfile == "not open") { abort = true; groupfile =  ""; }    
                        else {   m->setGroupFile(groupfile); bygroup = true;  }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not found") { countfile =  "";   }
+                       else if (countfile == "not open") { abort = true; countfile =  ""; }    
+                       else {   
+                m->setCountTableFile(countfile); 
+                ct.readTable(countfile);
+                if (ct.hasGroupInfo()) { bygroup = true; }
+                else { bygroup = false;  }
+            }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                        
                        string temp     = validParameter.validFile(parameters, "diffs", false);         if(temp == "not found"){        temp = "1"; }
                        m->mothurConvert(temp, diffs); 
@@ -183,10 +215,12 @@ PreClusterCommand::PreClusterCommand(string option) {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
                                
        }
@@ -207,10 +241,11 @@ int PreClusterCommand::execute(){
                string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
                string newFastaFile = fileroot + getOutputFileNameTag("fasta", fastafile);
                string newNamesFile = fileroot + getOutputFileNameTag("name");
+        string newCountFile = fileroot + getOutputFileNameTag("count");
                string newMapFile = fileroot + getOutputFileNameTag("map"); //add group name if by group
                outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile);
-               outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile);
-               
+               if (countfile == "") { outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); }
+               else { outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); }
                
                if (bygroup) {
                        //clear out old files
@@ -219,39 +254,45 @@ int PreClusterCommand::execute(){
                        newMapFile = fileroot + "precluster.";
                        
                        //parse fasta and name file by group
-                       SequenceParser* parser;
-                       if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile);      }
-                       else                            { parser = new SequenceParser(groupfile, fastafile);                    }
-                       
-                       vector<string> groups = parser->getNamesOfGroups();
-                       
-                       if(processors == 1)     {       driverGroups(parser, newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); }
-                       else                            {       createProcessesGroups(parser, newFastaFile, newNamesFile, newMapFile, groups);                  }
-                       
-                       delete parser;
-                       
-                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        }        return 0; }
-                       
-                       //run unique.seqs for deconvolute results
-                       string inputString = "fasta=" + newFastaFile;
-                       if (namefile != "") { inputString += ", name=" + newNamesFile; }
-                       m->mothurOutEndLine(); 
-                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
-                       m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-                       m->mothurCalling = true;
+            vector<string> groups;
+                       if (countfile != "") {
+                cparser = new SequenceCountParser(countfile, fastafile);
+                groups = cparser->getNamesOfGroups();
+            }else {
+                if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile);     }
+                else                           { parser = new SequenceParser(groupfile, fastafile);                    }
+                groups = parser->getNamesOfGroups();
+                       }
             
-                       Command* uniqueCommand = new DeconvoluteCommand(inputString);
-                       uniqueCommand->execute();
-                       
-                       map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
-                       
-                       delete uniqueCommand;
-                       m->mothurCalling = false;
-                       m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
-                       
-                       m->renameFile(filenames["fasta"][0], newFastaFile);
-            m->renameFile(filenames["name"][0], newNamesFile);
-                       
+                       if(processors == 1)     {       driverGroups(newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); }
+                       else                            {       createProcessesGroups(newFastaFile, newNamesFile, newMapFile, groups);                  }
+                       
+                       if (countfile != "") { 
+                mergeGroupCounts(newCountFile, newNamesFile, newFastaFile);
+                delete cparser; 
+            }else {  
+                delete parser; 
+                //run unique.seqs for deconvolute results
+                string inputString = "fasta=" + newFastaFile;
+                if (namefile != "") { inputString += ", name=" + newNamesFile; }
+                m->mothurOutEndLine(); 
+                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
+                m->mothurCalling = true;
+                
+                Command* uniqueCommand = new DeconvoluteCommand(inputString);
+                uniqueCommand->execute();
+                
+                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+                
+                delete uniqueCommand;
+                m->mothurCalling = false;
+                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
+                
+                m->renameFile(filenames["fasta"][0], newFastaFile);
+                m->renameFile(filenames["name"][0], newNamesFile); 
+                       }
+            if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);        }        return 0; }
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); 
                                
                }else {
@@ -272,8 +313,9 @@ int PreClusterCommand::execute(){
                        
                        m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
                        m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
-                       printData(newFastaFile, newNamesFile);
-                       
+                       if (countfile != "") { newNamesFile = newCountFile; }
+            printData(newFastaFile, newNamesFile, "");
+                               
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
                }
                                
@@ -295,6 +337,11 @@ int PreClusterCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
                
                return 0;
                
@@ -305,7 +352,7 @@ int PreClusterCommand::execute(){
        }
 }
 /**************************************************************************************************/
-int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newFName, string newNName, string newMFile, vector<string> groups) {
+int PreClusterCommand::createProcessesGroups(string newFName, string newNName, string newMFile, vector<string> groups) {
        try {
                
                vector<int> processIDS;
@@ -336,7 +383,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF
                                process++;
                        }else if (pid == 0){
                 outputNames.clear();
-                               num = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups);
+                               num = driverGroups(newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMFile, lines[process].start, lines[process].end, groups);
                 
                 string tempFile = toString(getpid()) + ".outputNames.temp";
                 ofstream outTemp;
@@ -355,7 +402,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF
                }
                
                //do my part
-               num = driverGroups(parser, newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
+               num = driverGroups(newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -395,7 +442,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
                        
-                       preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, i);
+                       preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, i);
                        pDataArray.push_back(tempPreCluster);
                        processIDS.push_back(i);
                        
@@ -406,7 +453,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF
                
                                
                //using the main process as a worker saves time and memory
-               num = driverGroups(parser, newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
+               num = driverGroups(newFName, newNName, newMFile, lines[0].start, lines[0].end, groups);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -443,7 +490,7 @@ int PreClusterCommand::createProcessesGroups(SequenceParser* parser, string newF
        }
 }
 /**************************************************************************************************/
-int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
+int PreClusterCommand::driverGroups(string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
        try {
                
                int numSeqs = 0;
@@ -458,24 +505,29 @@ int PreClusterCommand::driverGroups(SequenceParser* parser, string newFFile, str
                        m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
                        
                        map<string, string> thisNameMap;
-                       if (namefile != "") { thisNameMap = parser->getNameMap(groups[i]); }
-                       vector<Sequence> thisSeqs = parser->getSeqs(groups[i]);
-                       
+            vector<Sequence> thisSeqs;
+                       if (groupfile != "") { 
+                thisSeqs = parser->getSeqs(groups[i]);
+            }else if (countfile != "") {
+                thisSeqs = cparser->getSeqs(groups[i]);
+            }
+                       if (namefile != "") {  thisNameMap = parser->getNameMap(groups[i]); }
+            
                        //fill alignSeqs with this groups info.
-                       numSeqs = loadSeqs(thisNameMap, thisSeqs);
+                       numSeqs = loadSeqs(thisNameMap, thisSeqs, groups[i]);
                        
                        if (m->control_pressed) {   return 0; }
                        
                        if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); m->control_pressed = true; return 0;  }
                        
-                       int count = process(newMFile+groups[i]+".map");
+                       int count= process(newMFile+groups[i]+".map");
                        outputNames.push_back(newMFile+groups[i]+".map"); outputTypes["map"].push_back(newMFile+groups[i]+".map");
                        
                        if (m->control_pressed) {  return 0; }
                        
                        m->mothurOut("Total number of sequences before pre.cluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
                        m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
-                       printData(newFFile, newNFile);
+                       printData(newFFile, newNFile, groups[i]);
                        
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
                        
@@ -559,26 +611,13 @@ int PreClusterCommand::readFASTA(){
                //ifstream inNames;
                ifstream inFasta;
                
-               //m->openInputFile(namefile, inNames);
                m->openInputFile(fastafile, inFasta);
-               
-               //string firstCol, secondCol, nameString;
                set<int> lengths;
                
                while (!inFasta.eof()) {
                        
                        if (m->control_pressed) { inFasta.close(); return 0; }
-                       
-                       //inNames >> firstCol >> secondCol;
-                       //nameString = secondCol;
-                       
-                       //m->gobble(inNames);
-                       //int size = 1;
-                       //while (secondCol.find_first_of(',') != -1) { 
-                       //      size++;
-                       //      secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
-                       //}
-                       
+                                               
                        Sequence seq(inFasta);  m->gobble(inFasta);
                        
                        if (seq.getName() != "") {  //can get "" if commented line is at end of fasta file
@@ -592,14 +631,15 @@ int PreClusterCommand::readFASTA(){
                                                lengths.insert(seq.getAligned().length());
                                        }       
                                }else { //no names file, you are identical to yourself 
-                                       seqPNode tempNode(1, seq, seq.getName());
+                    int numRep = 1;
+                    if (countfile != "") { numRep = ct.getNumSeqs(seq.getName()); }
+                                       seqPNode tempNode(numRep, seq, seq.getName());
                                        alignSeqs.push_back(tempNode);
                                        lengths.insert(seq.getAligned().length());
                                }
                        }
                }
                inFasta.close();
-               //inNames.close();
         
         if (lengths.size() > 1) { m->control_pressed = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
         else if (lengths.size() == 1) { length = *(lengths.begin()); }
@@ -613,13 +653,15 @@ int PreClusterCommand::readFASTA(){
        }
 }
 /**************************************************************************************************/
-int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>& thisSeqs){
+int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>& thisSeqs, string group){
        try {
                set<int> lengths;
                alignSeqs.clear();
                map<string, string>::iterator it;
                bool error = false;
-                       
+        map<string, int> thisCount;
+        if (countfile != "") { thisCount = cparser->getCountTable(group);  }
+               
                for (int i = 0; i < thisSeqs.size(); i++) {
                        
                        if (m->control_pressed) { return 0; }
@@ -641,12 +683,20 @@ int PreClusterCommand::loadSeqs(map<string, string>& thisName, vector<Sequence>&
                     lengths.insert(thisSeqs[i].getAligned().length());
                                }       
                        }else { //no names file, you are identical to yourself 
-                               seqPNode tempNode(1, thisSeqs[i], thisSeqs[i].getName());
+                int numRep = 1;
+                if (countfile != "") { 
+                    map<string, int>::iterator it2 = thisCount.find(thisSeqs[i].getName());
+                    
+                    //should never be true since parser checks for this
+                    if (it2 == thisCount.end()) { m->mothurOut(thisSeqs[i].getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); error = true; }
+                    else { numRep = it2->second;  }
+                }
+                               seqPNode tempNode(numRep, thisSeqs[i], thisSeqs[i].getName());
                                alignSeqs.push_back(tempNode);
                                lengths.insert(thisSeqs[i].getAligned().length());
                        }
                }
-               
+    
         if (lengths.size() > 1) { error = true; m->mothurOut("[ERROR]: your sequences are not all the same length. pre.cluster requires sequences to be aligned."); m->mothurOutEndLine(); }
         else if (lengths.size() == 1) { length = *(lengths.begin()); }
         
@@ -683,10 +733,84 @@ int PreClusterCommand::calcMisMatches(string seq1, string seq2){
                exit(1);
        }
 }
+/**************************************************************************************************/
+
+int PreClusterCommand::mergeGroupCounts(string newcount, string newname, string newfasta){
+       try {
+               ifstream inNames;
+        m->openInputFile(newname, inNames);
+        
+        string group, first, second;
+        set<string> uniqueNames;
+        while (!inNames.eof()) {
+            if (m->control_pressed) { break; }
+            inNames >> group; m->gobble(inNames);
+            inNames >> first; m->gobble(inNames);
+            inNames >> second; m->gobble(inNames);
+            
+            vector<string> names;
+            m->splitAtComma(second, names);
+            
+            uniqueNames.insert(first);
+            
+            int total = ct.getGroupCount(first, group);
+            for (int i = 1; i < names.size(); i++) {
+                total += ct.getGroupCount(names[i], group);
+                ct.setAbund(names[i], group, 0);
+            }
+            ct.setAbund(first, group, total);
+        }
+        inNames.close();
+        
+        vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+        for (int i = 0; i < namesOfSeqs.size(); i++) {
+            if (ct.getNumSeqs(namesOfSeqs[i]) == 0) {
+                ct.remove(namesOfSeqs[i]);
+            }
+        }
+        
+        ct.printTable(newcount); 
+        m->mothurRemove(newname);
+        
+        if (bygroup) { //if by group, must remove the duplicate seqs that are named the same
+            ifstream in;
+            m->openInputFile(newfasta, in);
+            
+            ofstream out;
+            m->openOutputFile(newfasta+"temp", out);
+            
+            int count = 0;
+            set<string> already;
+            while(!in.eof()) {
+                if (m->control_pressed) { break; }
+                
+                Sequence seq(in); m->gobble(in);
+                
+                if (seq.getName() != "") {
+                    count++;
+                    if (already.count(seq.getName()) == 0) {
+                        seq.printSequence(out);
+                        already.insert(seq.getName());
+                    }
+                }
+            }
+            in.close();
+            out.close();
+            m->mothurRemove(newfasta);
+            m->renameFile(newfasta+"temp", newfasta);
+        }
+                       return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PreClusterCommand", "mergeGroupCounts");
+               exit(1);
+       }
+}
 
 /**************************************************************************************************/
 
-void PreClusterCommand::printData(string newfasta, string newname){
+void PreClusterCommand::printData(string newfasta, string newname, string group){
        try {
                ofstream outFasta;
                ofstream outNames;
@@ -699,10 +823,14 @@ void PreClusterCommand::printData(string newfasta, string newname){
                        m->openOutputFile(newname, outNames);
                }
                
+        if ((countfile != "") && (group == ""))  { outNames << "Representative_Sequence\ttotal\n";  }
                for (int i = 0; i < alignSeqs.size(); i++) {
                        if (alignSeqs[i].numIdentical != 0) {
                                alignSeqs[i].seq.printSequence(outFasta); 
-                               outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+                               if (countfile != "") {  
+                    if (group != "") {  outNames << group << '\t' << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
+                    else {  outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].numIdentical << endl;  }
+                }else {  outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;  }
                        }
                }
                
index 084bdc61ee464c665e2f37e200b38c09b85e8ef6..9844b8ffc1a951c47d606ec23e7caf45b4bf4ce1 100644 (file)
@@ -15,6 +15,7 @@
 #include "command.hpp"
 #include "sequence.hpp"
 #include "sequenceparser.h"
+#include "sequencecountparser.h"
 
 /************************************************************/
 struct seqPNode {
@@ -28,7 +29,13 @@ struct seqPNode {
        ~seqPNode() {}
 };
 /************************************************************/
-inline bool comparePriority(seqPNode first, seqPNode second) {  return (first.numIdentical > second.numIdentical); }
+inline bool comparePriority(seqPNode first, seqPNode second) {  
+    if (first.numIdentical > second.numIdentical) { return true;  }
+    else if (first.numIdentical == second.numIdentical) { 
+        if (first.seq.getName() > second.seq.getName()) { return true; }
+    }
+    return false; 
+}
 //************************************************************/
 
 class PreClusterCommand : public Command {
@@ -58,9 +65,13 @@ private:
                linePair(int i, int j) : start(i), end(j) {}
        };
        
+    SequenceParser* parser;
+    SequenceCountParser* cparser;
+    CountTable ct;
+    
        int diffs, length, processors;
        bool abort, bygroup;
-       string fastafile, namefile, outputDir, groupfile;
+       string fastafile, namefile, outputDir, groupfile, countfile;
        vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
        map<string, string> names; //represents the names file first column maps to second column
        map<string, int> sizes;  //this map a seq name to the number of identical seqs in the names file
@@ -73,11 +84,12 @@ private:
        void readNameFile();
        //int readNamesFASTA();
        int calcMisMatches(string, string);
-       void printData(string, string); //fasta filename, names file name
+       void printData(string, string, string); //fasta filename, names file name
        int process(string);
-       int loadSeqs(map<string, string>&, vector<Sequence>&);
-       int driverGroups(SequenceParser*, string, string, string, int, int, vector<string> groups);
-       int createProcessesGroups(SequenceParser*, string, string, string, vector<string>);
+       int loadSeqs(map<string, string>&, vector<Sequence>&, string);
+       int driverGroups(string, string, string, int, int, vector<string> groups);
+       int createProcessesGroups(string, string, string, vector<string>);
+    int mergeGroupCounts(string, string, string);
 };
 
 /**************************************************************************************************/
@@ -87,7 +99,7 @@ private:
 struct preClusterData {
        string fastafile; 
        string namefile; 
-       string groupfile;
+       string groupfile, countfile;
        string newFName, newNName, newMName;
        MothurOut* m;
        int start;
@@ -97,7 +109,7 @@ struct preClusterData {
        vector<string> mapFileNames;
        
        preClusterData(){}
-       preClusterData(string f, string n, string g, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
+       preClusterData(string f, string n, string g, string c, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
                fastafile = f;
                namefile = n;
                groupfile = g;
@@ -110,6 +122,7 @@ struct preClusterData {
                diffs = d;
                threadID = tid;
                groups = gr;
+        countfile = c;
        }
 };
 
@@ -124,10 +137,15 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                
                //parse fasta and name file by group
                SequenceParser* parser;
-               if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile);      }
-               else                                                    { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                                            }
-               
-               int numSeqs = 0;
+        SequenceCountParser* cparser;
+        if (pDataArray->countfile != "") {
+            cparser = new SequenceCountParser(pDataArray->countfile, pDataArray->fastafile);
+        }else {
+            if (pDataArray->namefile != "") { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile, pDataArray->namefile); }
+            else                               { parser = new SequenceParser(pDataArray->groupfile, pDataArray->fastafile);                    }
+        }
+        
+               int numSeqs = 0;
                vector<seqPNode> alignSeqs;
                //clear out old files
                ofstream outF; pDataArray->m->openOutputFile(pDataArray->newFName, outF); outF.close();
@@ -143,8 +161,13 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                        pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Processing group " + pDataArray->groups[k] + ":"); pDataArray->m->mothurOutEndLine();
                        
                        map<string, string> thisNameMap;
-                       if (pDataArray->namefile != "") { thisNameMap = parser->getNameMap(pDataArray->groups[k]); }
-                       vector<Sequence> thisSeqs = parser->getSeqs(pDataArray->groups[k]);
+            vector<Sequence> thisSeqs;
+                       if (pDataArray->groupfile != "") { 
+                thisSeqs = parser->getSeqs(pDataArray->groups[k]);
+            }else if (pDataArray->countfile != "") {
+                thisSeqs = cparser->getSeqs(pDataArray->groups[k]);
+            }
+                       if (pDataArray->namefile != "") {  thisNameMap = parser->getNameMap(pDataArray->groups[k]); }
                        
                        //fill alignSeqs with this groups info.
                        ////////////////////////////////////////////////////
@@ -154,6 +177,9 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                        alignSeqs.clear();
                        map<string, string>::iterator it;
                        bool error = false;
+            map<string, int> thisCount;
+            if (pDataArray->countfile != "") { thisCount = cparser->getCountTable(pDataArray->groups[k]);  }
+
                        
                        for (int i = 0; i < thisSeqs.size(); i++) {
                                
@@ -176,8 +202,16 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                                                if (thisSeqs[i].getAligned().length() > length) {  length = thisSeqs[i].getAligned().length();  }
                                        }       
                                }else { //no names file, you are identical to yourself 
-                                       seqPNode tempNode(1, thisSeqs[i], thisSeqs[i].getName());
-                                       alignSeqs.push_back(tempNode);
+                                       int numRep = 1;
+                    if (pDataArray->countfile != "") { 
+                        map<string, int>::iterator it2 = thisCount.find(thisSeqs[i].getName());
+                        
+                        //should never be true since parser checks for this
+                        if (it2 == thisCount.end()) { pDataArray->m->mothurOut(thisSeqs[i].getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); error = true; }
+                        else { numRep = it2->second;  }
+                    }
+                    seqPNode tempNode(numRep, thisSeqs[i], thisSeqs[i].getName());
+                    alignSeqs.push_back(tempNode);
                                        if (thisSeqs[i].getAligned().length() > length) {  length = thisSeqs[i].getAligned().length();  }
                                }
                        }
@@ -274,7 +308,9 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                        for (int i = 0; i < alignSeqs.size(); i++) {
                                if (alignSeqs[i].numIdentical != 0) {
                                        alignSeqs[i].seq.printSequence(outFasta); 
-                                       outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+                                       if (pDataArray->countfile != "") {  outNames << pDataArray->groups[k] << '\t' << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; 
+                    }else {  outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;  }
+
                                }
                        }
                        
index 80aeb3fbad0495b3788e518812d6740f31ac71f8..a29906c5f41769c93b3675b88eb1267fa2eb7127 100644 (file)
@@ -71,7 +71,7 @@ string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName="
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
+            else if (type == "count")       {   outputFileName =  "pick.count_table";   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
index a3fd1b83ee5025adb340922df226895230a464ed..2b930b5e2450cc15b7871923fb02dbcc1c5824ee 100644 (file)
@@ -10,6 +10,7 @@
 #include "removelineagecommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> RemoveLineageCommand::setParameters(){  
@@ -73,7 +74,7 @@ string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
+            else if (type == "count")       {   outputFileName =  "pick.count_table";   }
             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
@@ -585,6 +586,13 @@ int RemoveLineageCommand::readCount(){
         }
         in.close();
                out.close();
+        
+        //check for groups that have been eliminated
+        CountTable ct;
+        if (ct.testGroups(outputFileName)) {
+            ct.readTable(outputFileName);
+            ct.printTable(outputFileName);
+        }
                
                if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
@@ -678,15 +686,17 @@ int RemoveLineageCommand::readTax(){
                        
                        bool remove = false;
                        
+            string noQuotesTax = m->removeQuotes(tax);
+            
                        for (int j = 0; j < listOfTaxons.size(); j++) {
-                               string newtax = tax;
+                               string newtax = noQuotesTax;
                                
                                //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
                                if (!taxonsHasConfidence[j]) {
                                        
-                                       int hasConfidences = tax.find_first_of('(');
+                                       int hasConfidences = noQuotesTax.find_first_of('(');
                                        if (hasConfidences != string::npos) { 
-                                               newtax = tax;
+                                               newtax = noQuotesTax;
                                                m->removeConfidences(newtax);
                                        }
                                        
@@ -701,7 +711,7 @@ int RemoveLineageCommand::readTax(){
                                        }
                                        
                                }else{//if taxons has them and you don't them remove taxons
-                                       int hasConfidences = tax.find_first_of('(');
+                                       int hasConfidences = noQuotesTax.find_first_of('(');
                                        if (hasConfidences == string::npos) { 
                                                
                                                int pos = newtax.find(noConfidenceTaxons[j]);
@@ -716,10 +726,10 @@ int RemoveLineageCommand::readTax(){
                                        }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
                                                //first remove confidences from both and see if the taxonomy exists
                                                
-                                               string noNewTax = tax;
-                                               int hasConfidences = tax.find_first_of('(');
+                                               string noNewTax = noQuotesTax;
+                                               int hasConfidences = noQuotesTax.find_first_of('(');
                                                if (hasConfidences != string::npos) { 
-                                                       noNewTax = tax;
+                                                       noNewTax = noQuotesTax;
                                                        m->removeConfidences(noNewTax);
                                                }
                                                
index 1fb94462ac48fa13ac871c6b9d3cf4f3ae2ceff6..73873f3a4744eb0e9ce59a9b2640bf013f524852 100644 (file)
@@ -10,6 +10,7 @@
 #include "removeseqscommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> RemoveSeqsCommand::setParameters(){     
@@ -71,7 +72,7 @@ string RemoveSeqsCommand::getOutputFileNameTag(string type, string inputName="")
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
-            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
+            else if (type == "count")       {   outputFileName =  "pick.count_table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -538,6 +539,14 @@ int RemoveSeqsCommand::readCount(){
         }
         in.close();
                out.close();
+        
+        //check for groups that have been eliminated
+        CountTable ct;
+        if (ct.testGroups(outputFileName)) {
+            ct.readTable(outputFileName);
+            ct.printTable(outputFileName);
+        }
+
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
                outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
index 6a9a61323b50a4ea688ba9faa7ea855a233bbab9..312f475cabde7ba0d9c23f8f2231dba32f2f6963 100644 (file)
@@ -8,14 +8,15 @@
  */
 
 #include "screenseqscommand.h"
-
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> ScreenSeqsCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport);
                CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
@@ -44,8 +45,8 @@ vector<string> ScreenSeqsCommand::setParameters(){
 string ScreenSeqsCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The screen.seqs command reads a fastafile and creates .....\n";
-               helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
+               helpString += "The screen.seqs command reads a fastafile and screens sequences.\n";
+               helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
                helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
@@ -83,6 +84,7 @@ string ScreenSeqsCommand::getOutputFileNameTag(string type, string inputName="")
             if (type == "fasta")            {   outputFileName =  "good" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "good" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "good" + m->getExtension(inputName);   }
+            else if (type == "count")        {   outputFileName =  "good" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "good" + m->getExtension(inputName);   }
             else if (type == "accnos")      {   outputFileName =  "bad.accnos";   }
             else if (type == "qfile")       {   outputFileName =  "good" + m->getExtension(inputName);   }
@@ -110,6 +112,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(){
                outputTypes["accnos"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
                outputTypes["taxonomy"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
@@ -149,6 +152,7 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        outputTypes["accnos"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["taxonomy"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -202,6 +206,14 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
@@ -229,6 +241,19 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        else if (namefile == "not found") { namefile = ""; }    
                        else { m->setNameFile(namefile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
                        alignreport = validParameter.validFile(parameters, "alignreport", true);
                        if (alignreport == "not open") { abort = true; }
                        else if (alignreport == "not found") { alignreport = ""; }
@@ -288,10 +313,12 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){       temp = "90";                            }
                        m->mothurConvert(temp, criteria); 
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile == "") { 
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -312,6 +339,11 @@ int ScreenSeqsCommand::execute(){
                if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
                        //use the namefile to optimize correctly
                        if (namefile != "") { nameMap = m->readNames(namefile); }
+            else if (countfile != "") {
+                CountTable ct;
+                ct.readTable(countfile);
+                nameMap = ct.getNameMap();
+            }
                        getSummary(positions); 
                } 
                else { 
@@ -472,7 +504,9 @@ int ScreenSeqsCommand::execute(){
                        screenNameGroupFile(badSeqNames);
                        if (m->control_pressed) {  m->mothurRemove(goodSeqFile);  return 0; }   
                }else if(groupfile != "")                               {       screenGroupFile(badSeqNames);           }       // this screens just the group
-               
+               else if (countfile != "") {     screenCountFile(badSeqNames);           }
+            
+                
                if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
 
                if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
@@ -519,6 +553,11 @@ int ScreenSeqsCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
 
                m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
                m->mothurOutEndLine();
@@ -962,7 +1001,69 @@ int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
                exit(1);
        }
 }
+//***************************************************************************************************************
+int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
+       try {
+               ifstream in;
+               m->openInputFile(countfile, in);
+               set<string>::iterator it;
+               
+               string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+        outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
+               ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
+               
+        string headers = m->getline(in); m->gobble(in);
+        goodCountOut << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
 
+                       if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+            
+                       in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            
+                       it = badSeqNames.find(name);
+                       
+                       if(it != badSeqNames.end()){
+                               badSeqNames.erase(it);
+                       }
+                       else{
+                               goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
+                       }
+               }
+               
+               if (m->control_pressed) { goodCountOut.close();  in.close(); m->mothurRemove(goodCountFile);  return 0; }
+        
+               //we were unable to remove some of the bad sequences
+               if (badSeqNames.size() != 0) {
+                       for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
+                               m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
+                               m->mothurOutEndLine();
+                       }
+               }
+               
+               in.close();
+               goodCountOut.close();
+        
+        //check for groups that have been eliminated
+        CountTable ct;
+        if (ct.testGroups(goodCountFile)) {
+            ct.readTable(goodCountFile);
+            ct.printTable(goodCountFile);
+        }
+               
+               if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
+               
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ScreenSeqsCommand", "screenCountFile");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 
 int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
index 771113da1bda0ee34859faaa39b2fe7be1bc2672..b0d7c7c18f3852dd40395b0f699749fc885cc14b 100644 (file)
@@ -44,6 +44,7 @@ private:
 
        int screenNameGroupFile(set<string>);
        int screenGroupFile(set<string>);
+    int screenCountFile(set<string>);
        int screenAlignReport(set<string>);
        int screenQual(set<string>);
        int screenTaxonomy(set<string>);
@@ -56,7 +57,7 @@ private:
        #endif
 
        bool abort;
-       string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
+       string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile;
        int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
        vector<string> outputNames;
        vector<string> optimize;
index af3ca665b5791d0001141f27192b78aab171d71a..f3c6cd9e2b7a5e5308bc6624bb4489062f71b2a8 100644 (file)
 #include "splitgroupscommand.h"
 #include "sharedutilities.h"
 #include "sequenceparser.h"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> SplitGroupCommand::setParameters(){     
        try {           
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "CountGroup", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "CountGroup", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -34,9 +36,9 @@ vector<string> SplitGroupCommand::setParameters(){
 string SplitGroupCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The split.group command reads a group file, and parses your fasta and names files by groups. \n";
-               helpString += "The split.group command parameters are fasta, name, group and groups.\n";
-               helpString += "The fasta and group parameters are required.\n";
+               helpString += "The split.group command reads a group or count file, and parses your fasta and names or count files by groups. \n";
+               helpString += "The split.group command parameters are fasta, name, group, count and groups.\n";
+               helpString += "The fasta and group or count parameters are required.\n";
                helpString += "The groups parameter allows you to select groups to create files for.  \n";
                helpString += "For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files.  \n";
                helpString += "If you want .fasta and .names files for all groups, set groups=all.  \n";
@@ -62,6 +64,7 @@ string SplitGroupCommand::getOutputFileNameTag(string type, string inputName="")
         else {
             if (type == "fasta")            {   outputFileName =  "fasta";   }
             else if (type == "name")        {   outputFileName =  "names";   }
+            else if (type == "count")        {   outputFileName =  "count_table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -79,6 +82,7 @@ SplitGroupCommand::SplitGroupCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "SplitGroupCommand", "SplitGroupCommand");
@@ -112,6 +116,7 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -141,7 +146,14 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
-
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -160,23 +172,56 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") {  groupfile = ""; abort = true; } 
-                       else if (groupfile == "not found") {                    
-                               groupfile = m->getGroupFile(); 
-                               if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current groupfile and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       else if (groupfile == "not found") { groupfile = "";
                        }else {  m->setGroupFile(groupfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = ""; }  
+                       else { m->setCountTableFile(countfile); }
+            
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
+            if ((countfile != "") && (groupfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
+            
+            if ((countfile == "") && (groupfile == "")) {
+                if (namefile == "") { //check for count then group
+                    countfile = m->getCountTableFile(); 
+                                       if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               groupfile = m->getGroupFile(); 
+                        if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
+                                       }       
+                }else { //check for group
+                    groupfile = m->getGroupFile(); 
+                    if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                    else { 
+                        m->mothurOut("You need to provide a count or group file."); m->mothurOutEndLine(); 
+                        abort = true; 
+                    }  
+                }
+            }
                        
                        groups = validParameter.validFile(parameters, "groups", false);         
                        if (groups == "not found") { groups = ""; }
                        else { m->splitAtDash(groups, Groups);  }
                                                
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(groupfile);      }
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                if (groupfile != "") { outputDir = m->hasPath(groupfile); }
+                else { outputDir = m->hasPath(countfile);  }
+            }
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -191,13 +236,48 @@ int SplitGroupCommand::execute(){
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               SequenceParser* parser;
+        if (countfile == "" ) {  runNameGroup();  }
+        else { runCount();  }
+                               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
+               
+               string current = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+               
+               itTypes = outputTypes.find("name");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
+               }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
                
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SplitGroupCommand::runNameGroup(){
+       try {
+        SequenceParser* parser;
                if (namefile == "") {   parser = new SequenceParser(groupfile, fastafile);                              }
                else                            {       parser = new SequenceParser(groupfile, fastafile, namefile);    }
                
                if (m->control_pressed) { delete parser; return 0; }
-
+        
                vector<string> namesGroups = parser->getNamesOfGroups();
                SharedUtil util;  util.setGroups(Groups, namesGroups);  
                
@@ -215,7 +295,7 @@ int SplitGroupCommand::execute(){
                        parser->getSeqs(Groups[i], newFasta, false);
                        outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
                        if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]);        } return 0; }
-
+            
                        if (namefile != "") { 
                                parser->getNameMap(Groups[i], newName); 
                                outputNames.push_back(newName); outputTypes["name"].push_back(newName);
@@ -225,29 +305,77 @@ int SplitGroupCommand::execute(){
                }
                
                delete parser;
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
-               
-               string current = "";
-               itTypes = outputTypes.find("fasta");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
-               }
-               
-               itTypes = outputTypes.find("name");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
-               }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
-               
-               return 0;
+        
+        return 0;
+
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "runNameGroup");
+               exit(1);
        }
+}
+//**********************************************************************************************************************
+int SplitGroupCommand::runCount(){
+       try {
+        
+        CountTable ct;
+        ct.readTable(countfile);
+        if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
+        
+        if (m->control_pressed) { return 0; }
+        
+        vector<string> namesGroups = ct.getNamesOfGroups();
+        SharedUtil util;  util.setGroups(Groups, namesGroups); 
+        
+        //fill filehandles with neccessary ofstreams
+        map<string, ofstream*> ffiles;
+        map<string, ofstream*> cfiles;
+        ofstream* temp;
+        for (int i=0; i<Groups.size(); i++) {
+            temp = new ofstream;
+            ffiles[Groups[i]] = temp;
+            string newFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + Groups[i] + "." + getOutputFileNameTag("fasta");
+            outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
+            m->openOutputFile(newFasta, (*temp));
+            temp = new ofstream;
+            cfiles[Groups[i]] = temp;
+            string newCount = outputDir + m->getRootName(m->getSimpleName(countfile)) + Groups[i] + "." + getOutputFileNameTag("count");
+            m->openOutputFile(newCount, (*temp));
+            outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
+            (*temp) << "Representative_Sequence\ttotal\t" << Groups[i] << endl;
+        }
+        
+        ifstream in; 
+        m->openInputFile(fastafile, in);
+        
+        while (!in.eof()) {
+            Sequence seq(in); m->gobble(in);
+            
+            if (m->control_pressed) { break; }
+            if (seq.getName() != "") {
+                vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
+                for (int i = 0; i < thisSeqsGroups.size(); i++) {
+                    if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
+                        seq.printSequence(*(ffiles[thisSeqsGroups[i]]));
+                        int numSeqs = ct.getGroupCount(seq.getName(), Groups[i]);
+                        (*(cfiles[thisSeqsGroups[i]])) << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl;
+                    }
+                }
+            }
+        }
+        in.close();
+        
+        //close and delete ofstreams
+        for (int i=0; i<Groups.size(); i++) {  
+            (*ffiles[Groups[i]]).close(); delete ffiles[Groups[i]];
+            (*cfiles[Groups[i]]).close(); delete cfiles[Groups[i]];
+        }
+        
+        return 0;
+
+    }
        catch(exception& e) {
-               m->errorOut(e, "SplitGroupCommand", "execute");
+               m->errorOut(e, "SplitGroupCommand", "runCount");
                exit(1);
        }
 }
index a8dc9a19e656651be58416c99e802e1b98b72a56..62e063d8a0fe93c3942959527cde646996e3430d 100644 (file)
@@ -42,9 +42,12 @@ public:
 private:
        vector<string> outputNames;
                
-       string outputDir, namefile, groupfile, groups, fastafile;
+       string outputDir, namefile, groupfile, countfile, groups, fastafile;
        vector<string> Groups;
        bool abort;
+    
+    int runNameGroup();
+    int runCount();
 };
 
 /***************************************************************************************/
index fb4887c73da88c0c631895e82f05090e97645761..bba6289ada54856c25d683126b0d181badab4c15 100644 (file)
@@ -16,8 +16,9 @@ vector<string> TreeGroupCommand::setParameters(){
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
-               CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);  
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
+               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "countcolumn",false,false); parameters.push_back(pcount);
+        CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName-countcolumn",false,false); parameters.push_back(pcolumn);             
         CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
         CommandParameter psubsample("subsample", "String", "", "", "", "", "",false,false); parameters.push_back(psubsample);
         CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
@@ -160,6 +161,14 @@ TreeGroupCommand::TreeGroupCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //check for required parameters
@@ -182,6 +191,11 @@ TreeGroupCommand::TreeGroupCommand(string option)  {
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
                        
                        if ((phylipfile == "") && (columnfile == "") && (sharedfile == "")) { 
                                //is there are current file available for either of these?
@@ -204,15 +218,20 @@ TreeGroupCommand::TreeGroupCommand(string option)  {
                        else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When running the tree.shared command with a distance file you may not use both the column and the phylip parameters."); m->mothurOutEndLine(); abort = true; }
                        
                        if (columnfile != "") {
-                               if (namefile == "") 
+                               if ((namefile == "") && (countfile == ""))
                                        namefile = m->getNameFile(); 
                                        if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                                               countfile = m->getCountTableFile();
+                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
                                }
                        }
+
                        
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
@@ -433,17 +452,22 @@ int TreeGroupCommand::execute(){
                                
                        readMatrix->setCutoff(cutoff);
        
-                       if(namefile != ""){     
-                               nameMap = new NameAssignment(namefile);
-                               nameMap->readMap();
-                       }
-                       else{ nameMap = NULL; }
-       
-                       readMatrix->read(nameMap);
+            ct = NULL;
+            if(namefile != ""){        
+                nameMap = new NameAssignment(namefile);
+                nameMap->readMap();
+                readMatrix->read(nameMap);
+            }else if (countfile != "") {
+                ct = new CountTable();
+                ct->readTable(countfile);
+                readMatrix->read(ct);
+            }
+
                        list = readMatrix->getListVector();
                        SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
 
                        //make treemap
+            if (ct != NULL) { delete ct; }
                        ct = new CountTable();
             set<string> nameMap;
             map<string, string> groupMap;
@@ -461,7 +485,7 @@ int TreeGroupCommand::execute(){
                
                        //clear globaldatas old tree names if any
                        m->Treenames.clear();
-               
+            
                        //fills globaldatas tree names
                        m->Treenames = m->getGroups();
                        
index 7342b36584ce76b3b22a96d0e3ba885cb2ab172a..b29670ae614205a7ee286116379ddda86f0e7cf9 100644 (file)
@@ -110,7 +110,7 @@ private:
        vector<Calculator*> treeCalculators;
        vector<SharedRAbundVector*> lookup;
        string lastLabel;
-       string format, groupNames, filename, sharedfile, inputfile;
+       string format, groupNames, filename, sharedfile, countfile, inputfile;
        int numGroups, subsampleSize, iters, processors;
        ofstream out;
        float precision, cutoff;
index 06d05d4fbad15cbc111609d42e5d825b3bcc0f88..47b7cf343635e2d104db75114aa77fbe8007c466 100644 (file)
@@ -246,26 +246,6 @@ string TreeMap::getGroup(string sequenceName) {
                return "not found";
        }
                
-}
-/************************************************************
-void TreeMap::setIndex(string seq, int index) {
-       it = treemap.find(seq);
-       if (it != treemap.end()) { //sequence name was in group file
-               treemap[seq].vectorIndex = index;       
-       }else {
-               treemap[seq].vectorIndex = index;
-               treemap[seq].groupname = "not found";
-       }
-}
-/************************************************************
-int TreeMap::getIndex(string seq) {
-       
-       it = treemap.find(seq);
-       // if it is a valid sequence name then return index
-       if (it != treemap.end()) { return treemap[seq].vectorIndex; }
-       // if not return error code
-       else { return -1; }
-       
 }
 /************************************************************/
 
index 29b8733721a2b710468f960ac474c99734a181df..bbb0b367a59de85b94e6dd9e561b697d37832b00 100644 (file)
@@ -114,7 +114,7 @@ string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "fasta")            {   outputFileName =  "fasta";   }
             else if (type == "group")            {   outputFileName =  "groups";   }
             else if (type == "name")            {   outputFileName =  "names";   }
-            else if (type == "count")            {   outputFileName =  "count.table";   }
+            else if (type == "count")            {   outputFileName =  "count_table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;