]> git.donarmstrong.com Git - mothur.git/commitdiff
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 14 Sep 2012 19:17:37 +0000 (15:17 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 14 Sep 2012 19:17:37 +0000 (15:17 -0400)
26 files changed:
classify.cpp
counttable.cpp
counttable.h
getgroupscommand.cpp
getlineagecommand.cpp
getlineagecommand.h
getseqscommand.cpp
getseqscommand.h
heatmapsimcommand.cpp
heatmapsimcommand.h
listseqscommand.cpp
listseqscommand.h
makefile
removegroupscommand.cpp
removelineagecommand.cpp
removelineagecommand.h
removeseqscommand.cpp
removeseqscommand.h
secondarystructurecommand.cpp
secondarystructurecommand.h
seqsummarycommand.cpp
seqsummarycommand.h
summaryqualcommand.cpp
summaryqualcommand.h
trimseqscommand.cpp
trimseqscommand.h

index ace89b9cbe9a4c0350ee14396b70340b1b7bc6b3..459c90f9ddd769628b0fc9061e7bd504a88b2900 100644 (file)
@@ -61,7 +61,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                        names.push_back(temp.getName());
                                        database->addSequence(temp);    
                                }
                                        names.push_back(temp.getName());
                                        database->addSequence(temp);    
                                }
-                               database->generateDB();
+                               if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); }
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
                                database->readKmerDB(kmerFileTest);     
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
                                database->readKmerDB(kmerFileTest);     
@@ -200,7 +201,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                }
                                fastaFile.close();
 
                                }
                                fastaFile.close();
 
-                database->generateDB(); 
+                if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); } 
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
index bc9d4da05966b6b3fc3ec5fe0e751c21be7f180f..cd623ecbcf5fbea1cdd25fa8c9c2b872746da162 100644 (file)
@@ -661,6 +661,22 @@ vector<string> CountTable::getNamesOfSeqs() {
        }
 }
 /************************************************************/
        }
 }
 /************************************************************/
+//returns the names of all unique sequences in file mapped to their seqCounts
+map<string, int> CountTable::getNameMap() {
+    try {
+        map<string, int> names;
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+            names[it->first] = totals[it->second];
+        }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNameMap");
+               exit(1);
+       }
+}
+/************************************************************/
 //returns the names of all unique sequences in file
 vector<string> CountTable::getNamesOfSeqs(string group) {
     try {
 //returns the names of all unique sequences in file
 vector<string> CountTable::getNamesOfSeqs(string group) {
     try {
index 68ba8d211cae1a12aa1d39a76f720a0998a87ad9..b66c4c6ef5e90007025935c4f0820cedb11c624d 100644 (file)
@@ -83,6 +83,7 @@ class CountTable {
         vector<string> getNamesOfSeqs(string);
         int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
         ListVector getListVector();
         vector<string> getNamesOfSeqs(string);
         int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
         ListVector getListVector();
+        map<string, int> getNameMap();
     
     private:
         string filename;
     
     private:
         string filename;
index 7585c122eeb3a212fab404240f5b6f6568574634..910a8720bc3ec88079d2663ebf819408436ca02a 100644 (file)
@@ -72,7 +72,7 @@ string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
index 1aba0fed4e6e772de07718a544335b6e6bb58db9..1cd139b5213733e759a7a32a1fbdf8221d09dfb6 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> GetLineageCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
 vector<string> GetLineageCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -38,9 +39,9 @@ vector<string> GetLineageCommand::setParameters(){
 string GetLineageCommand::getHelpString(){     
        try {
                string helpString = "";
 string GetLineageCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
+               helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
                helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
-               helpString += "The get.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
+               helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
                helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
                helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
@@ -70,6 +71,7 @@ string GetLineageCommand::getOutputFileNameTag(string type, string inputName="")
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";                    }  
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";   }
@@ -94,6 +96,7 @@ GetLineageCommand::GetLineageCommand(){
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
        }
        catch(exception& e) {
                m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
@@ -131,6 +134,7 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
 
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
 
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -187,6 +191,14 @@ GetLineageCommand::GetLineageCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
                        }
 
                        
@@ -230,6 +242,19 @@ GetLineageCommand::GetLineageCommand(string option)  {
                                else                            {  temp = "false"; usedDups = "";       }
                        }
                        dups = m->isTrue(temp);
                                else                            {  temp = "false"; usedDups = "";       }
                        }
                        dups = m->isTrue(temp);
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
                        
                        taxons = validParameter.validFile(parameters, "taxon", false);  
                        if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
                        
                        taxons = validParameter.validFile(parameters, "taxon", false);  
                        if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
@@ -240,12 +265,14 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        }
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
                        }
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                
                
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
                }
 
        }
@@ -262,11 +289,18 @@ int GetLineageCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (m->control_pressed) { return 0; }
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (taxfile != "")                      {               readTax();              }  //fills the set of names to get
                if (namefile != "")                     {               readName();             }
                if (fastafile != "")            {               readFasta();    }
                
                //read through the correct file and output lines you want to keep
                if (taxfile != "")                      {               readTax();              }  //fills the set of names to get
                if (namefile != "")                     {               readName();             }
                if (fastafile != "")            {               readFasta();    }
+        if (countfile != "")           {               readCount();    }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
@@ -305,7 +339,12 @@ int GetLineageCommand::execute(){
                        itTypes = outputTypes.find("taxonomy");
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                        itTypes = outputTypes.find("taxonomy");
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
-                       }                       
+                       }
+                       
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
                }
                
                return 0;               
@@ -353,7 +392,7 @@ int GetLineageCommand::readFasta(){
                in.close();     
                out.close();
                
                in.close();     
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
                
                return 0;
                outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
                
                return 0;
@@ -365,6 +404,52 @@ int GetLineageCommand::readFasta(){
        }
 }
 //**********************************************************************************************************************
        }
 }
 //**********************************************************************************************************************
+int GetLineageCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) != 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+            }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+                      
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetLineageCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int GetLineageCommand::readList(){
        try {
                string thisOutputDir = outputDir;
 int GetLineageCommand::readList(){
        try {
                string thisOutputDir = outputDir;
@@ -425,7 +510,7 @@ int GetLineageCommand::readList(){
                in.close();     
                out.close();
                
                in.close();     
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
                
                return 0;
                outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
                
                return 0;
@@ -510,7 +595,7 @@ int GetLineageCommand::readName(){
                in.close();
                out.close();
                
                in.close();
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
                
                return 0;
                outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
                
                return 0;
@@ -558,7 +643,7 @@ int GetLineageCommand::readGroup(){
                in.close();
                out.close();
                
                in.close();
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
                
                return 0;
                outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
                
                return 0;
@@ -814,7 +899,7 @@ int GetLineageCommand::readAlign(){
                in.close();
                out.close();
                
                in.close();
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
                
                return 0;
                outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
                
                return 0;
index 0ab042bac365b64840e32212a6705f62d46d5ff1..99bc0fa347313bb023e8bd1b41a5c3e2be5c93ed 100644 (file)
@@ -36,11 +36,12 @@ class GetLineageCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames, listOfTaxons;
        private:
                set<string> names;
                vector<string> outputNames, listOfTaxons;
-               string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
+               string fastafile, namefile, groupfile, alignfile, countfile, listfile, taxfile, outputDir, taxons;
                bool abort, dups;
                
                int readFasta();
                int readName();
                bool abort, dups;
                
                int readFasta();
                int readName();
+        int readCount();
                int readGroup();
                int readAlign();
                int readList();
                int readGroup();
                int readAlign();
                int readList();
index ccabafb6d19c008eb6957a35e79b5cd0623021d0..e0faef4cc07bd3fc6626fe186ec67fe05c512023 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> GetSeqsCommand::setParameters(){        
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
 vector<string> GetSeqsCommand::setParameters(){        
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -40,7 +41,7 @@ vector<string> GetSeqsCommand::setParameters(){
 string GetSeqsCommand::getHelpString(){        
        try {
                string helpString = "";
 string GetSeqsCommand::getHelpString(){        
        try {
                string helpString = "";
-               helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
+               helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
                helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
                helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
@@ -68,6 +69,7 @@ GetSeqsCommand::GetSeqsCommand(){
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
                outputTypes["accnosreport"] = tempOutNames;
        }
        catch(exception& e) {
                outputTypes["accnosreport"] = tempOutNames;
        }
        catch(exception& e) {
@@ -88,6 +90,7 @@ string GetSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
@@ -135,6 +138,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        outputTypes["list"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["accnosreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["accnosreport"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -215,6 +219,14 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
                        }
 
                        
@@ -270,17 +282,32 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (accnosfile2 == "not open") { abort = true; }
                        else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
                        
                        if (accnosfile2 == "not open") { abort = true; }
                        else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                        
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
                        dups = m->isTrue(temp);
                        
                        
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
                        dups = m->isTrue(temp);
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
-               
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+            
+            if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
                }
 
        }
@@ -300,11 +327,18 @@ int GetSeqsCommand::execute(){
                names = m->readAccnos(accnosfile);
                
                if (m->control_pressed) { return 0; }
                names = m->readAccnos(accnosfile);
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();                     }
                if (fastafile != "")            {               readFasta();            }
                if (groupfile != "")            {               readGroup();            }
                
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();                     }
                if (fastafile != "")            {               readFasta();            }
                if (groupfile != "")            {               readGroup();            }
+        if (countfile != "")           {               readCount();            }
                if (alignfile != "")            {               readAlign();            }
                if (listfile != "")                     {               readList();                     }
                if (taxfile != "")                      {               readTax();                      }
                if (alignfile != "")            {               readAlign();            }
                if (listfile != "")                     {               readList();                     }
                if (taxfile != "")                      {               readTax();                      }
@@ -354,6 +388,10 @@ int GetSeqsCommand::execute(){
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
                        }
                        
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
                        }
                        
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
                }
                
                return 0;               
@@ -493,6 +531,57 @@ int GetSeqsCommand::readQual(){
                exit(1);
        }
 }
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int GetSeqsCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               int selectedCount = 0;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) != 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+                selectedCount+= thisTotal;
+            }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetSeqsCommand", "readCount");
+               exit(1);
+       }
+}
+
 //**********************************************************************************************************************
 int GetSeqsCommand::readList(){
        try {
 //**********************************************************************************************************************
 int GetSeqsCommand::readList(){
        try {
index c71b5f2804a71190c6919f0dc80bf80d32f15508..60e471eebf7865d5924406ef7c720c3bdcdf4648 100644 (file)
@@ -35,7 +35,7 @@ class GetSeqsCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames;
        private:
                set<string> names;
                vector<string> outputNames;
-               string accnosfile, accnosfile2, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+               string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
     
         //for debug
                bool abort, dups;
     
         //for debug
@@ -44,6 +44,7 @@ class GetSeqsCommand : public Command {
                int readFasta();
                int readName();
                int readGroup();
                int readFasta();
                int readName();
                int readGroup();
+        int readCount();
                int readAlign();
                int readList();
                int readTax();
                int readAlign();
                int readList();
                int readTax();
index 3de10e6a8cd9411d6829ac699ffd11991af81007..8a4a12bc70f2d707fb923e74036043f12c1dcd3f 100644 (file)
@@ -25,7 +25,8 @@ vector<string> HeatMapSimCommand::setParameters(){
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);        
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);        
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);          
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);          
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
@@ -48,9 +49,8 @@ string HeatMapSimCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc, fontsize and label.  shared or phylip or column and name are required unless valid current files exist.\n";
-               helpString += "There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n";
-               helpString += "With the read.otu command you may use the groups, label and calc parameters. \n";
+               helpString += "The heatmap.sim command parameters are shared, phylip, column, name, count, groups, calc, fontsize and label.  shared or phylip or column and name are required unless valid current files exist.\n";
+               helpString += "There are two ways to use the heatmap.sim command. The first is with a shared file, and you may use the groups, label and calc parameter. \n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n";
                helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n";
                helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n";
                helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n";
                helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n";
@@ -174,6 +174,14 @@ HeatMapSimCommand::HeatMapSimCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //required parameters
                        }
 
                        //required parameters
@@ -197,6 +205,12 @@ HeatMapSimCommand::HeatMapSimCommand(string option)  {
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        //error checking on files                       
                        if ((sharedfile == "") && ((phylipfile == "") && (columnfile == "")))   { 
                        
                        //error checking on files                       
                        if ((sharedfile == "") && ((phylipfile == "") && (columnfile == "")))   { 
@@ -224,8 +238,12 @@ HeatMapSimCommand::HeatMapSimCommand(string option)  {
                                        namefile = m->getNameFile(); 
                                        if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
                                        else { 
                                        namefile = m->getNameFile(); 
                                        if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                        countfile = m->getCountTableFile(); 
+                                               if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a name or count file if you are going to use the column format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
                                }
                        }
                                        }       
                                }
                        }
@@ -520,20 +538,28 @@ int HeatMapSimCommand::runCommandDist() {
                        in.close();
                }else {
                        //read names file
                        in.close();
                }else {
                        //read names file
-                       NameAssignment* nameMap = new NameAssignment(namefile);
-                       nameMap->readMap();
-                       
-                       //put names in order in vector
-                       for (int i = 0; i < nameMap->size(); i++) {
-                               names.push_back(nameMap->get(i));
-                       }
-                       
-                       //resize matrix
-                       matrix.resize(nameMap->size());
-                       for (int i = 0; i < nameMap->size(); i++) {
-                               matrix[i].resize(nameMap->size(), 0.0);
-                       }
-                       
+                       NameAssignment* nameMap;
+            CountTable ct; 
+            if (namefile != "") { 
+                nameMap = new NameAssignment(namefile);
+                nameMap->readMap();
+                
+                //put names in order in vector
+                for (int i = 0; i < nameMap->size(); i++) {
+                    names.push_back(nameMap->get(i));
+                }
+             }else if (countfile != "") {
+                nameMap = NULL;
+                ct.readTable(countfile);
+                names = ct.getNamesOfSeqs();
+            }
+                       
+            //resize matrix
+            matrix.resize(names.size());
+            for (int i = 0; i < names.size(); i++) {
+                matrix[i].resize(names.size(), 0.0);
+            }
+                                               
                        //read column file
                        string first, second;
                        double dist;
                        //read column file
                        string first, second;
                        double dist;
@@ -544,19 +570,26 @@ int HeatMapSimCommand::runCommandDist() {
                                
                                if (m->control_pressed) { return 0; }
                                
                                
                                if (m->control_pressed) { return 0; }
                                
-                               map<string, int>::iterator itA = nameMap->find(first);
-                               map<string, int>::iterator itB = nameMap->find(second);
-                               
-                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1);  }
-                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1);  }
-                               
-                               //save distance
-                               matrix[itA->second][itB->second] = dist;
-                               matrix[itB->second][itA->second] = dist;
+                if (namefile != "") {
+                    map<string, int>::iterator itA = nameMap->find(first);
+                    map<string, int>::iterator itB = nameMap->find(second);
+                    
+                    if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1);  }
+                    if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1);  }
+                    
+                    //save distance
+                    matrix[itA->second][itB->second] = dist;
+                    matrix[itB->second][itA->second] = dist;
+                }else if (countfile != "") {
+                    int itA = ct.get(first);
+                    int itB = ct.get(second);
+                    matrix[itA][itB] = dist;
+                    matrix[itB][itA] = dist;
+                }
                        }
                        in.close();
                        
                        }
                        in.close();
                        
-                       delete nameMap;
+                       if (namefile != "") { delete nameMap; }
                }
                
                
                }
                
                
index 7b74880955594da0bb863c4b1755cf32c7b429e7..2c3a470761d719af0bff13bc4d194e5164bd42e0 100644 (file)
@@ -43,7 +43,7 @@ private:
        OptionParser* parser;
        bool abort, allLines;
        set<string> labels; //holds labels to be used
        OptionParser* parser;
        bool abort, allLines;
        set<string> labels; //holds labels to be used
-       string format, groups, label, calc, sharedfile, phylipfile, columnfile, namefile, outputDir, inputfile;
+       string format, groups, label, calc, sharedfile, phylipfile, columnfile, countfile, namefile, outputDir, inputfile;
        vector<string> Estimators, Groups, outputNames;
        int fontsize;
        
        vector<string> Estimators, Groups, outputNames;
        int fontsize;
        
index bfbb0788c5092382f22bea643c5405674361dd67..7c3f07f96e092d81c307673192dad29a31fee091 100644 (file)
@@ -10,6 +10,7 @@
 #include "listseqscommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
 #include "listseqscommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
+#include "counttable.h"
 
 
 //**********************************************************************************************************************
 
 
 //**********************************************************************************************************************
@@ -17,6 +18,7 @@ vector<string> ListSeqsCommand::setParameters(){
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pcount);
                CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
@@ -37,8 +39,8 @@ vector<string> ListSeqsCommand::setParameters(){
 string ListSeqsCommand::getHelpString(){       
        try {
                string helpString = "";
 string ListSeqsCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The list.seqs command reads a fasta, name, group, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
-               helpString += "The list.seqs command parameters are fasta, name, group, list, taxonomy and alignreport.  You must provide one of these parameters.\n";
+               helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
+               helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport.  You must provide one of these parameters.\n";
                helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
                helpString += "Example list.seqs(fasta=amazon.fasta).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
                helpString += "Example list.seqs(fasta=amazon.fasta).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
@@ -164,6 +166,14 @@ ListSeqsCommand::ListSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
                        }
 
                        //check for required parameters
@@ -195,8 +205,13 @@ ListSeqsCommand::ListSeqsCommand(string option)  {
                        if (taxfile == "not open") { abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
                        else { m->setTaxonomyFile(taxfile); }
                        if (taxfile == "not open") { abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
                        else { m->setTaxonomyFile(taxfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; }
+                       else if (countfile == "not found") {  countfile = "";  }
+                       else { m->setCountTableFile(countfile); }
                        
                        
-                       if ((fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
+                       if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
                        
                        int okay = 1;
                        if (outputDir != "") { okay++; }
                        
                        int okay = 1;
                        if (outputDir != "") { okay++; }
@@ -225,6 +240,7 @@ int ListSeqsCommand::execute(){
                else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
                else if (listfile != "")        {       inputFileName = listfile;       readList();             }
                else if (taxfile != "")         {       inputFileName = taxfile;        readTax();              }
                else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
                else if (listfile != "")        {       inputFileName = listfile;       readList();             }
                else if (taxfile != "")         {       inputFileName = taxfile;        readTax();              }
+        else if (countfile != "")      {       inputFileName = countfile;      readCount();    }
                
                if (m->control_pressed) { outputTypes.clear();  return 0; }
                
                
                if (m->control_pressed) { outputTypes.clear();  return 0; }
                
@@ -293,12 +309,6 @@ int ListSeqsCommand::readFasta(){
                        
                        Sequence currSeq(in);
                        name = currSeq.getName();
                        
                        Sequence currSeq(in);
                        name = currSeq.getName();
-                       //if (lastName == "") { lastName = name; }
-                       //if (name != lastName) { count = 1; }
-               //      lastName = name;
-                       
-                       //Sequence newSeq(name+"_"+toString(count), currSeq.getAligned());
-                       //newSeq.printSequence(out);
                        
                        if (name != "") {  names.push_back(name);  }
                        
                        
                        if (name != "") {  names.push_back(name);  }
                        
@@ -404,7 +414,24 @@ int ListSeqsCommand::readGroup(){
                exit(1);
        }
 }
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+int ListSeqsCommand::readCount(){
+       try {
+               CountTable ct;
+               ct.readTable(countfile);
+        
+        if (m->control_pressed) { return 0; }
+        
+        names = ct.getNamesOfSeqs();
+        
+        return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListSeqsCommand", "readCount");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
 int ListSeqsCommand::readAlign(){
 //**********************************************************************************************************************
 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
 int ListSeqsCommand::readAlign(){
index 1a31a6dd432e837159be8d916f7ce2a25e4d4dec..8e4cce3c932100ff1ab88d49b346ff87e7791a06 100644 (file)
@@ -34,7 +34,7 @@ class ListSeqsCommand : public Command {
        
        private:
                vector<string> names, outputNames;
        
        private:
                vector<string> names, outputNames;
-               string fastafile, namefile, groupfile, alignfile, inputFileName, outputDir, listfile, taxfile;
+               string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile;
                bool abort;
                
                int readFasta();
                bool abort;
                
                int readFasta();
@@ -43,6 +43,7 @@ class ListSeqsCommand : public Command {
                int readAlign();
                int readList();
                int readTax();
                int readAlign();
                int readList();
                int readTax();
+        int readCount();
                
 };
 
                
 };
 
index 32ede6e5fa70a2ab42f4aa6ebcc0382b32dc8d76..bc5a569e31cd2e12526bb20244adb75e58333ece 100644 (file)
--- a/makefile
+++ b/makefile
@@ -17,7 +17,7 @@ USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
 RELEASE_DATE = "\"7/9/2012\""
 VERSION = "\"1.26.0\""
 MOTHUR_FILES="\"Enter_your_default_path_here\""
 RELEASE_DATE = "\"7/9/2012\""
 VERSION = "\"1.26.0\""
-FORTAN_COMPILER = gfortran
+FORTAN_COMPILER = /usr/local/gfortran/bin/gfortran
 FORTRAN_FLAGS = 
 
 # Optimize to level 3:
 FORTRAN_FLAGS = 
 
 # Optimize to level 3:
index 86ddf946a9d51c1df7e43783e0470bf5aead6301..80aeb3fbad0495b3788e518812d6740f31ac71f8 100644 (file)
@@ -71,7 +71,7 @@ string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName="
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
index 4cec90f567c764e8770987c150b67b5ececc0d35..a3fd1b83ee5025adb340922df226895230a464ed 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> RemoveLineageCommand::setParameters(){  
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
 vector<string> RemoveLineageCommand::setParameters(){  
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -38,9 +39,9 @@ vector<string> RemoveLineageCommand::setParameters(){
 string RemoveLineageCommand::getHelpString(){  
        try {
                string helpString = "";
 string RemoveLineageCommand::getHelpString(){  
        try {
                string helpString = "";
-               helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
+               helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
                helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
-               helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
+               helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
                helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
                helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
@@ -72,6 +73,7 @@ string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
@@ -94,6 +96,7 @@ RemoveLineageCommand::RemoveLineageCommand(){
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
@@ -131,6 +134,7 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -187,6 +191,14 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
                        }
 
                        
@@ -223,6 +235,19 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                                else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else { m->setTaxonomyFile(taxfile); }
                        
                                else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else { m->setTaxonomyFile(taxfile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      
                        if (temp == "not found") { 
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      
                        if (temp == "not found") { 
@@ -240,14 +265,16 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        }
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
                        }
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
                        
                
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
                        
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                        
                }
 
                        
                }
 
@@ -265,6 +292,12 @@ int RemoveLineageCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (m->control_pressed) { return 0; }
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
                
                //read through the correct file and output lines you want to keep
                if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
@@ -273,6 +306,7 @@ int RemoveLineageCommand::execute(){
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
+        if (countfile != "")           {               readCount();    }
                
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
                
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
@@ -309,6 +343,11 @@ int RemoveLineageCommand::execute(){
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                        }
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                        }
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
                }
                
                return 0;               
@@ -511,7 +550,52 @@ int RemoveLineageCommand::readName(){
                exit(1);
        }
 }
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+int RemoveLineageCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) == 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+            }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveLineageCommand", "readCount");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int RemoveLineageCommand::readGroup(){
        try {
 //**********************************************************************************************************************
 int RemoveLineageCommand::readGroup(){
        try {
index a5caec8e4c3e0f04ec7744489c3f27c0a495e38d..a756d24f583be82eac03634ebcd8496c4fd687b6 100644 (file)
@@ -34,12 +34,13 @@ class RemoveLineageCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames, listOfTaxons;
        private:
                set<string> names;
                vector<string> outputNames, listOfTaxons;
-               string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
+               string fastafile, namefile, groupfile, alignfile, listfile, countfile, taxfile, outputDir, taxons;
                bool abort, dups;
                
                int readFasta();
                int readName();
                int readGroup();
                bool abort, dups;
                
                int readFasta();
                int readName();
                int readGroup();
+        int readCount();
                int readAlign();
                int readList();
                int readTax();  
                int readAlign();
                int readList();
                int readTax();  
index 0d53c1a95ed5b968d0a8281f1f3d612d7d4c2047..1fb94462ac48fa13ac871c6b9d3cf4f3ae2ceff6 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> RemoveSeqsCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
 vector<string> RemoveSeqsCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -39,9 +40,9 @@ vector<string> RemoveSeqsCommand::setParameters(){
 string RemoveSeqsCommand::getHelpString(){     
        try {
                string helpString = "";
 string RemoveSeqsCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
+               helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
                helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
                helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
-               helpString += "The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
+               helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
                helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
                helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
                helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
                helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
                helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
                helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
@@ -70,6 +71,7 @@ string RemoveSeqsCommand::getOutputFileNameTag(string type, string inputName="")
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -93,6 +95,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(){
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
@@ -131,6 +134,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -203,6 +207,14 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
                        }
 
                        
@@ -259,13 +271,28 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                                else                            {  temp = "false"; usedDups = "";       }
                        }
                        dups = m->isTrue(temp);
                                else                            {  temp = "false"; usedDups = "";       }
                        }
                        dups = m->isTrue(temp);
-                       
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
-                       
-                       if ((fastafile != "") && (namefile == "")) {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+                       
+                       if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+                       
+            if (countfile == "") {
+                if ((fastafile != "") && (namefile == "")) {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
                }
 
        }
@@ -285,6 +312,12 @@ int RemoveSeqsCommand::execute(){
                names = m->readAccnos(accnosfile);
                
                if (m->control_pressed) { return 0; }
                names = m->readAccnos(accnosfile);
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();             }
                
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();             }
@@ -294,6 +327,7 @@ int RemoveSeqsCommand::execute(){
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
                if (qualfile != "")                     {               readQual();             }
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
                if (qualfile != "")                     {               readQual();             }
+        if (countfile != "")           {               readCount();            }
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
        
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
        
@@ -333,7 +367,12 @@ int RemoveSeqsCommand::execute(){
                        itTypes = outputTypes.find("qfile");
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
                        itTypes = outputTypes.find("qfile");
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
-                       }                       
+                       }       
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
                }
                
                return 0;               
@@ -463,6 +502,56 @@ int RemoveSeqsCommand::readQual(){
        }
 }
 //**********************************************************************************************************************
        }
 }
 //**********************************************************************************************************************
+int RemoveSeqsCommand::readCount(){
+       try {
+        
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) == 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+            }else { removedCount += thisTotal; }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveSeqsCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int RemoveSeqsCommand::readList(){
        try {
                string thisOutputDir = outputDir;
 int RemoveSeqsCommand::readList(){
        try {
                string thisOutputDir = outputDir;
index 474951a9feaee7d497417cf2a03f206367056241..151b413070bc64bafd80834e72228dabd1037c43 100644 (file)
@@ -34,13 +34,14 @@ class RemoveSeqsCommand : public Command {
        
        private:
                set<string> names;
        
        private:
                set<string> names;
-               string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+               string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
                vector<string> outputNames;
                
                int readFasta();
                int readName();
                int readGroup();
                bool abort, dups;
                vector<string> outputNames;
                
                int readFasta();
                int readName();
                int readGroup();
+        int readCount();
                int readAlign();
                int readList();
                int readTax();
                int readAlign();
                int readList();
                int readTax();
index 869df029fdd35efa6c3e9a9a22d46c6c4fea6667..4d04270ce649f8b2e579625eae7c074b322d10cb 100644 (file)
@@ -9,13 +9,15 @@
 
 #include "secondarystructurecommand.h"
 #include "sequence.hpp"
 
 #include "secondarystructurecommand.h"
 #include "sequence.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> AlignCheckCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pmap);
 
 //**********************************************************************************************************************
 vector<string> AlignCheckCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pmap);
-        CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -32,7 +34,7 @@ vector<string> AlignCheckCommand::setParameters(){
 string AlignCheckCommand::getHelpString(){     
        try {
                string helpString = "";
 string AlignCheckCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The align.check command reads a fasta file and map file as well as an optional name file.\n";
+               helpString += "The align.check command reads a fasta file and map file as well as an optional name or count file.\n";
                helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
                helpString += "The align.check command parameters are fasta and map, both are required.\n";
                helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
                helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
                helpString += "The align.check command parameters are fasta and map, both are required.\n";
                helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
@@ -135,6 +137,14 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
                        }
 
                        //check for required parameters
@@ -155,16 +165,25 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
                        
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
                                outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
                        }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
                                outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
                        }
                        
-                       if ((namefile == "") && (fastafile != "")){
-                               vector<string> files; files.push_back(fastafile); 
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if ((namefile == "") && (fastafile != "")){
+                    vector<string> files; files.push_back(fastafile); 
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
                }
 
        }
@@ -184,6 +203,11 @@ int AlignCheckCommand::execute(){
                readMap();
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
                readMap();
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
+        else if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameMap = ct.getNameMap();
+        }
                
                if (m->control_pressed) { return 0; }
                
                
                if (m->control_pressed) { return 0; }
                
@@ -217,7 +241,7 @@ int AlignCheckCommand::execute(){
                                if (haderror == 1) { m->control_pressed = true; break; }
                                
                                int num = 1;
                                if (haderror == 1) { m->control_pressed = true; break; }
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(seq.getName());
                                        
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(seq.getName());
                                        
@@ -274,7 +298,7 @@ int AlignCheckCommand::execute(){
                m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])); m->mothurOutEndLine();
                m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])); m->mothurOutEndLine();
                m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])); m->mothurOutEndLine();
                m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])); m->mothurOutEndLine();
                m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])); m->mothurOutEndLine();
                m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])); m->mothurOutEndLine();
-               if (namefile == "") {  m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); }
+               if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(count)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); }
                
                
                else { m->mothurOut("# of unique seqs:\t" + toString(count)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); }
                
                
index 110f019f3beeafdd2a877c423b250232a488e62c..becafc5bb3f29073d0e72859e495f0364c351dfb 100644 (file)
@@ -50,7 +50,7 @@ class AlignCheckCommand : public Command {
        
        private:
                vector<int> structMap;
        
        private:
                vector<int> structMap;
-               string mapfile, fastafile, outputDir, namefile;
+               string mapfile, fastafile, outputDir, namefile, countfile;
                bool abort;
                int seqLength, haderror;
                vector<string> outputNames;
                bool abort;
                int seqLength, haderror;
                vector<string> outputNames;
index 830643d12f2fc03d9f2985f2c722a891f0b95371..a9bb5737eac832a203969460f7f4e2759849a975 100644 (file)
@@ -8,13 +8,14 @@
  */
 
 #include "seqsummarycommand.h"
  */
 
 #include "seqsummarycommand.h"
-
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> SeqSummaryCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
 
 //**********************************************************************************************************************
 vector<string> SeqSummaryCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -33,8 +34,9 @@ string SeqSummaryCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The summary.seqs command reads a fastafile and summarizes the sequences.\n";
        try {
                string helpString = "";
                helpString += "The summary.seqs command reads a fastafile and summarizes the sequences.\n";
-               helpString += "The summary.seqs command parameters are fasta, name and processors, fasta is required, unless you have a valid current fasta file.\n";
+               helpString += "The summary.seqs command parameters are fasta, name, count and processors, fasta is required, unless you have a valid current fasta file.\n";
                helpString += "The name parameter allows you to enter a name file associated with your fasta file. \n";
                helpString += "The name parameter allows you to enter a name file associated with your fasta file. \n";
+        helpString += "The count parameter allows you to enter a count file associated with your fasta file. \n";
                helpString += "The summary.seqs command should be in the following format: \n";
                helpString += "summary.seqs(fasta=yourFastaFile, processors=2) \n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
                helpString += "The summary.seqs command should be in the following format: \n";
                helpString += "summary.seqs(fasta=yourFastaFile, processors=2) \n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
@@ -123,6 +125,14 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //initialize outputTypes
                        }
                        
                        //initialize outputTypes
@@ -142,6 +152,13 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                        if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
                        if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -153,11 +170,12 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
                        
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
-                       
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
        }
        catch(exception& e) {
                }
        }
        catch(exception& e) {
@@ -186,6 +204,11 @@ int SeqSummaryCommand::execute(){
                vector<int> longHomoPolymer;
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
                vector<int> longHomoPolymer;
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
+        else if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameMap = ct.getNameMap();
+        }
                
                if (m->control_pressed) { return 0; }
                        
                
                if (m->control_pressed) { return 0; }
                        
@@ -344,7 +367,7 @@ int SeqSummaryCommand::execute(){
                int size = startPosition.size();
                
                //find means
                int size = startPosition.size();
                
                //find means
-               float meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
+               double meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
                meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
                for (int i = 0; i < size; i++) {
                        meanStartPosition += startPosition[i];
                meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
                for (int i = 0; i < size; i++) {
                        meanStartPosition += startPosition[i];
@@ -353,6 +376,7 @@ int SeqSummaryCommand::execute(){
                        meanAmbigBases += ambigBases[i];
                        meanLongHomoPolymer += longHomoPolymer[i];
                }
                        meanAmbigBases += ambigBases[i];
                        meanLongHomoPolymer += longHomoPolymer[i];
                }
+                
                //this is an int divide so the remainder is lost
                meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
                                
                //this is an int divide so the remainder is lost
                meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
                                
@@ -380,7 +404,7 @@ int SeqSummaryCommand::execute(){
                m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
                m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
 
                m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
                m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
 
-               if (namefile == "") {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
+               if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
                
                if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
                else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
                
                if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
@@ -430,11 +454,11 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                        if (current.getName() != "") {
                                
                                int num = 1;
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
-                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your name or count file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
                                        else { num = it->second; }
                                }
                                
@@ -505,11 +529,11 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
                        if (current.getName() != "") {
                                
                                int num = 1;
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
-                                       if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your namefile, please correct." << endl; m->control_pressed = true; }
+                                       if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your name or count file, please correct." << endl; m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
                                        else { num = it->second; }
                                }
                                
@@ -626,14 +650,17 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                vector<seqSumData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
                vector<seqSumData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
-               
+        
+               bool hasNameMap = false;
+        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+        
                //Create processor worker threads.
                for( int i=0; i<processors-1; i++ ){
             
             string extension = "";
             if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        // Allocate memory for thread data.
                //Create processor worker threads.
                for( int i=0; i<processors-1; i++ ){
             
             string extension = "";
             if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        // Allocate memory for thread data.
-                       seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap);
+                       seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, hasNameMap, nameMap);
                        pDataArray.push_back(tempSum);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
                        pDataArray.push_back(tempSum);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
index 79e8be96974141da315d1038a51096695212ad5c..3926e25f3c2ebb484b357464d1514dfc957c4000 100644 (file)
@@ -34,7 +34,7 @@ public:
        void help() { m->mothurOut(getHelpString()); }          
 private:
        bool abort;
        void help() { m->mothurOut(getHelpString()); }          
 private:
        bool abort;
-       string fastafile, outputDir, namefile;
+       string fastafile, outputDir, namefile, countfile;
        int processors;
        vector<string> outputNames;
        map<string, int> nameMap;
        int processors;
        vector<string> outputNames;
        map<string, int> nameMap;
@@ -74,18 +74,18 @@ struct seqSumData {
        unsigned long long end;
        int count;
        MothurOut* m;
        unsigned long long end;
        int count;
        MothurOut* m;
-       string namefile;
+       bool hasNameMap;
        map<string, int> nameMap;
        
        
        seqSumData(){}
        map<string, int> nameMap;
        
        
        seqSumData(){}
-       seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
+       seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, bool na, map<string, int> nam) {
                filename = f;
                sumFile = sf;
                m = mout;
                start = st;
                end = en;
                filename = f;
                sumFile = sf;
                m = mout;
                start = st;
                end = en;
-               namefile = na;
+               hasNameMap = na;
                nameMap = nam;
                count = 0;
        }
                nameMap = nam;
                count = 0;
        }
@@ -123,11 +123,11 @@ static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){
                        if (current.getName() != "") {
                                
                                int num = 1;
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (pDataArray->namefile != "") {
+                               if (pDataArray->hasNameMap){
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                        
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                        
-                                       if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+                                       if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your name or count file, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
                                        else { num = it->second; }
                                }
                                
index 5d7971349656d4400e5d41a24c15e576ccdf57f7..5a073677804ae2e3176f08a3d4ff1e26a1bc5c03 100644 (file)
@@ -8,13 +8,14 @@
  */
 
 #include "summaryqualcommand.h"
  */
 
 #include "summaryqualcommand.h"
-
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> SummaryQualCommand::setParameters(){    
        try {
                CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pqual);
 
 //**********************************************************************************************************************
 vector<string> SummaryQualCommand::setParameters(){    
        try {
                CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pqual);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -32,9 +33,10 @@ vector<string> SummaryQualCommand::setParameters(){
 string SummaryQualCommand::getHelpString(){    
        try {
                string helpString = "";
 string SummaryQualCommand::getHelpString(){    
        try {
                string helpString = "";
-               helpString += "The summary.qual command reads a quality file and an optional name file, and summarizes the quality information.\n";
-               helpString += "The summary.tax command parameters are qfile, name and processors. qfile is required, unless you have a valid current quality file.\n";
+               helpString += "The summary.qual command reads a quality file and an optional name or count file, and summarizes the quality information.\n";
+               helpString += "The summary.tax command parameters are qfile, name, count and processors. qfile is required, unless you have a valid current quality file.\n";
                helpString += "The name parameter allows you to enter a name file associated with your quality file. \n";
                helpString += "The name parameter allows you to enter a name file associated with your quality file. \n";
+        helpString += "The count parameter allows you to enter a count file associated with your quality file. \n";
                helpString += "The summary.qual command should be in the following format: \n";
                helpString += "summary.qual(qfile=yourQualityFile) \n";
                helpString += "Note: No spaces between parameter labels (i.e. qfile), '=' and parameters (i.e.yourQualityFile).\n";     
                helpString += "The summary.qual command should be in the following format: \n";
                helpString += "summary.qual(qfile=yourQualityFile) \n";
                helpString += "Note: No spaces between parameter labels (i.e. qfile), '=' and parameters (i.e.yourQualityFile).\n";     
@@ -122,6 +124,14 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //initialize outputTypes
                        }
                        
                        //initialize outputTypes
@@ -141,6 +151,13 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                        if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
                        if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -152,10 +169,13 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);     
                        
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);     
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(qualfile);
-                               parser.getNameFile(files);
-                       }
+            
+                       if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(qualfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
        }
        catch(exception& e) {
                }
        }
        catch(exception& e) {
@@ -179,7 +199,12 @@ int SummaryQualCommand::execute(){
                if (m->control_pressed) { return 0; }
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
                if (m->control_pressed) { return 0; }
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
-               
+               else if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameMap = ct.getNameMap();
+        }
+        
                vector<unsigned long long> positions; 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                positions = m->divideFile(qualfile, processors);
                vector<unsigned long long> positions; 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                positions = m->divideFile(qualfile, processors);
@@ -257,7 +282,7 @@ int SummaryQualCommand::driverCreateSummary(vector<int>& position, vector<int>&
                        if (current.getName() != "") {
                                
                                int num = 1;
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
@@ -400,11 +425,14 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                DWORD   dwThreadIdArray[processors];
                HANDLE  hThreadArray[processors]; 
                
                DWORD   dwThreadIdArray[processors];
                HANDLE  hThreadArray[processors]; 
                
+        bool hasNameMap = false;
+        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+        
                //Create processor worker threads.
                for( int i=0; i<processors; i++ ){
                        
                        // Allocate memory for thread data.
                //Create processor worker threads.
                for( int i=0; i<processors; i++ ){
                        
                        // Allocate memory for thread data.
-                       seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
+                       seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, hasNameMap, nameMap);
                        pDataArray.push_back(tempSum);
                        processIDS.push_back(i);
         
                        pDataArray.push_back(tempSum);
                        processIDS.push_back(i);
         
@@ -457,7 +485,7 @@ int SummaryQualCommand::printQual(string sumFile, vector<int>& position, vector<
                        
                        if (m->control_pressed) { out.close(); return 0; }
                        
                        
                        if (m->control_pressed) { out.close(); return 0; }
                        
-                       float average = averageQ[i] / (float) position[i];
+                       double average = averageQ[i] / (float) position[i];
                        out << i << '\t' << position[i] << '\t' << average << '\t';
                        
                        for (int j = 0; j < 41; j++) {
                        out << i << '\t' << position[i] << '\t' << average << '\t';
                        
                        for (int j = 0; j < 41; j++) {
index 31390b4296ff7659fdfb2946d3f37ff33b2877b7..ac65938bc23075fc7db7cb40b9ce6d05d0d5d64e 100644 (file)
@@ -35,7 +35,7 @@ public:
        
 private:
        bool abort;
        
 private:
        bool abort;
-       string qualfile, outputDir, namefile;
+       string qualfile, outputDir, namefile, countfile;
        vector<string> outputNames;
        map<string, int> nameMap;
        int processors;
        vector<string> outputNames;
        map<string, int> nameMap;
        int processors;
@@ -62,20 +62,21 @@ struct seqSumQualData {
        vector<int> position;
        vector<int> averageQ;
        vector< vector<int> > scores; 
        vector<int> position;
        vector<int> averageQ;
        vector< vector<int> > scores; 
-       string filename, namefile
+       string filename; 
        unsigned long long start;
        unsigned long long end;
        int count;
        MothurOut* m;
        unsigned long long start;
        unsigned long long end;
        int count;
        MothurOut* m;
+    bool hasNameMap;
        map<string, int> nameMap;
        
        ~seqSumQualData(){}
        map<string, int> nameMap;
        
        ~seqSumQualData(){}
-       seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map<string, int> nam) {
+       seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, bool n, map<string, int> nam) {
                filename = f;
                m = mout;
                start = st;
                end = en;
                filename = f;
                m = mout;
                start = st;
                end = en;
-               namefile = n;
+               hasNameMap = n;
                nameMap = nam;
                count = 0;
        }
                nameMap = nam;
                count = 0;
        }
@@ -109,7 +110,7 @@ static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){
                        if (current.getName() != "") {
                        
                                int num = 1;
                        if (current.getName() != "") {
                        
                                int num = 1;
-                               if (pDataArray->namefile != "") {
+                               if (pDataArray->hasNameMap) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                        
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                        
index b29723c032814891f2f84dc8ba437ab789908773..29b8733721a2b710468f960ac474c99734a181df 100644 (file)
 #include "needlemanoverlap.hpp"
 #include "trimoligos.h"
 
 #include "needlemanoverlap.hpp"
 #include "trimoligos.h"
 
+
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::setParameters(){       
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
                CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::setParameters(){       
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
                CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
                CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
                CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
                CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
@@ -58,11 +60,12 @@ string TrimSeqsCommand::getHelpString(){
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
-               helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+               helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
                helpString += "The oligos parameter allows you to provide an oligos file.\n";
                helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
                helpString += "The oligos parameter allows you to provide an oligos file.\n";
                helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
                helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
                helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
@@ -111,6 +114,7 @@ string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "fasta")            {   outputFileName =  "fasta";   }
             else if (type == "group")            {   outputFileName =  "groups";   }
             else if (type == "name")            {   outputFileName =  "names";   }
             else if (type == "fasta")            {   outputFileName =  "fasta";   }
             else if (type == "group")            {   outputFileName =  "groups";   }
             else if (type == "name")            {   outputFileName =  "names";   }
+            else if (type == "count")            {   outputFileName =  "count.table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -133,6 +137,7 @@ TrimSeqsCommand::TrimSeqsCommand(){
                outputTypes["qfile"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
@@ -171,6 +176,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -208,6 +214,14 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                                
                        }
 
                                
                        }
 
@@ -279,6 +293,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        if (temp == "not found")        {       nameFile = "";          }
                        else if(temp == "not open")     {       nameFile = "";  abort = true;           }
                        else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
                        if (temp == "not found")        {       nameFile = "";          }
                        else if(temp == "not open")     {       nameFile = "";  abort = true;           }
                        else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, qThreshold);
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, qThreshold);
@@ -331,10 +352,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                abort = true;
                        }
                        
                                abort = true;
                        }
                        
-                       if (nameFile == "") {
-                               vector<string> files; files.push_back(fastaFile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if (nameFile == "") {
+                    vector<string> files; files.push_back(fastaFile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
                }
 
        }
@@ -385,13 +408,27 @@ int TrimSeqsCommand::execute(){
                        outputTypes["name"].push_back(trimNameFile);
                        outputTypes["name"].push_back(scrapNameFile); 
                }
                        outputTypes["name"].push_back(trimNameFile);
                        outputTypes["name"].push_back(scrapNameFile); 
                }
+        
+        string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count");
+               string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count");
+               
+               if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameCount = ct.getNameMap();
+                       outputNames.push_back(trimCountFile);
+                       outputNames.push_back(scrapCountFile);
+                       outputTypes["count"].push_back(trimCountFile);
+                       outputTypes["count"].push_back(scrapCountFile); 
+               }
+
                
                if (m->control_pressed) { return 0; }
                
                string outputGroupFileName;
                if(oligoFile != ""){
                        createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
                
                if (m->control_pressed) { return 0; }
                
                string outputGroupFileName;
                if(oligoFile != ""){
                        createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
-                       if (createGroup) {
+                       if ((createGroup) && (countfile == "")){
                                outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
                                outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
@@ -401,9 +438,9 @@ int TrimSeqsCommand::execute(){
                setLines(fastaFile, qFileName);
                
         if(processors == 1){
                setLines(fastaFile, qFileName);
                
         if(processors == 1){
-            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
         }else{
         }else{
-            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
+            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
         }      
                
                
         }      
                
                
@@ -446,35 +483,62 @@ int TrimSeqsCommand::execute(){
                        for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
                        outputNames = outputNames2;
                        
                        for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
                        outputNames = outputNames2;
                        
-                       for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
-                               ifstream in;
-                               m->openInputFile(it->first, in);
-                               
-                               ofstream out;
-                               string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
-                               outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
-                               m->openOutputFile(thisGroupName, out);
-                               
-                               while (!in.eof()){
-                                       if (m->control_pressed) { break; }
-                                       
-                                       Sequence currSeq(in); m->gobble(in);
-                                       out << currSeq.getName() << '\t' << it->second << endl;
+            for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
+                ifstream in;
+                m->openInputFile(it->first, in);
+                
+                ofstream out;
+                string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
+                if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
+                else {  thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
+                m->openOutputFile(thisGroupName, out);
+                
+                if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
+                
+                while (!in.eof()){
+                    if (m->control_pressed) { break; }
                     
                     
-                    if (nameFile != "") {
-                        map<string, string>::iterator itName = nameMap.find(currSeq.getName());
-                        if (itName != nameMap.end()) { 
-                            vector<string> thisSeqsNames; 
-                            m->splitAtChar(itName->second, thisSeqsNames, ',');
-                            for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
-                                out << thisSeqsNames[k] << '\t' << it->second << endl;
-                            }
-                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                  
+                    Sequence currSeq(in); m->gobble(in);
+                    if (countfile == "") {  
+                        out << currSeq.getName() << '\t' << it->second << endl;  
+                        
+                        if (nameFile != "") {
+                            map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                            if (itName != nameMap.end()) { 
+                                vector<string> thisSeqsNames; 
+                                m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                    out << thisSeqsNames[k] << '\t' << it->second << endl;
+                                }
+                            }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                      
+                        }
+                    }else { 
+                        map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
+                        if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
+                        else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
                     }
                     }
-                               }
-                               in.close();
-                               out.close();
-                       }
+                }
+                in.close();
+                out.close();
+            }
+            
+            if (countfile != "") { //create countfile with group info included
+                CountTable* ct = new CountTable();
+                ct->readTable(trimCountFile);
+                map<string, int> justTrimmedNames = ct->getNameMap();
+                delete ct;
+                
+                CountTable newCt;
+                for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
+                vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
+                for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
+                    newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
+                    map<string, string>::iterator it2 = groupMap.find(itNames->first);
+                    if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
+                    else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                }
+                newCt.printTable(trimCountFile);
+            }
                }
                
                if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
                }
                
                if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
@@ -511,6 +575,11 @@ int TrimSeqsCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
                }
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
 
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
 
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -527,8 +596,7 @@ int TrimSeqsCommand::execute(){
 }
                
 /**************************************************************************************/
 }
                
 /**************************************************************************************/
-
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {     
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {        
                
        try {
                
                
        try {
                
@@ -552,9 +620,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        m->openOutputFile(scrapNFileName, scrapNameFile);
                }
                
                        m->openOutputFile(scrapNFileName, scrapNameFile);
                }
                
+        ofstream trimCountFile;
+               ofstream scrapCountFile;
+               if(countfile != ""){
+                       m->openOutputFile(trimCFileName, trimCountFile);
+                       m->openOutputFile(scrapCFileName, scrapCountFile);
+            if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+               }
                
                ofstream outGroupsFile;
                
                ofstream outGroupsFile;
-               if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
+               if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
                if(allFiles){
                        for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
                if(allFiles){
                        for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
@@ -591,14 +666,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                
                        if (m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                
                        if (m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (createGroup) {       outGroupsFile.close();   }
-
-                               if(qFileName != ""){
-                                       qFile.close();
-                               }
-                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
-
-                               return 0;
+                               if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
+                if(qFileName != "")    {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
+                if(nameFile != "")     {       scrapNameFile.close(); trimNameFile.close();    }
+                if(countfile != "")    {       scrapCountFile.close(); trimCountFile.close();  }
+                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
                        }
                        
                        int success = 1;
                        }
                        
                        int success = 1;
@@ -720,6 +792,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
                                                if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
+                    
+                    int numRedundants = 0;
+                    if (countfile != "") {
+                        map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
+                        if (itCount != nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                            numRedundants = itCount->second-1;
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
+                    }
                                        
                                        if (createGroup) {
                                                if(barcodes.size() != 0){
                                        
                                        if (createGroup) {
                                                if(barcodes.size() != 0){
@@ -736,9 +817,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        
                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
                             
                                                        
                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
                             
-                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+                            else {   groupMap[currSeq.getName()] = thisGroup; }
                                                        
                                                        
-                            int numRedundants = 0;
                                                        if (nameFile != "") {
                                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                                if (itName != nameMap.end()) { 
                                                        if (nameFile != "") {
                                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                                if (itName != nameMap.end()) { 
@@ -786,6 +867,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
                                                if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
+                    if (countfile != "") {
+                        map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
+                        if (itCount != nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
+                    }
+                    
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);
@@ -819,6 +907,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                if (createGroup) {       outGroupsFile.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
                if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
                if (createGroup) {       outGroupsFile.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
                if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
+        if(countfile != "")    {       scrapCountFile.close(); trimCountFile.close();  }
                
                return count;
        }
                
                return count;
        }
@@ -830,7 +919,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
 /**************************************************************************************************/
 
 
 /**************************************************************************************************/
 
-int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
+int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
        try {
         
         int process = 1;
        try {
         
         int process = 1;
@@ -881,6 +970,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 (scrapQualFileName + toString(getpid()) + ".temp"),
                                                                 (trimNameFileName + toString(getpid()) + ".temp"),
                                                                 (scrapNameFileName + toString(getpid()) + ".temp"),
                                                                 (scrapQualFileName + toString(getpid()) + ".temp"),
                                                                 (trimNameFileName + toString(getpid()) + ".temp"),
                                                                 (scrapNameFileName + toString(getpid()) + ".temp"),
+                                 (trimCountFileName + toString(getpid()) + ".temp"),
+                                                                (scrapCountFileName + toString(getpid()) + ".temp"),
                                                                 (groupFile + toString(getpid()) + ".temp"),
                                                                 tempFASTAFileNames,
                                                                 tempPrimerQualFileNames,
                                                                 (groupFile + toString(getpid()) + ".temp"),
                                                                 tempFASTAFileNames,
                                                                 tempPrimerQualFileNames,
@@ -901,6 +992,11 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                        for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
                                                out << it->first << '\t' << it->second << endl;
                                        }
                                        for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
                                                out << it->first << '\t' << it->second << endl;
                                        }
+                    
+                    out << groupMap.size() << endl;
+                    for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
+                                               out << it->first << '\t' << it->second << endl;
+                                       }
                                        out.close();
                                }
                                exit(0);
                                        out.close();
                                }
                                exit(0);
@@ -923,8 +1019,12 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->openOutputFile(trimNameFileName, temp);              temp.close();
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
                        m->openOutputFile(trimNameFileName, temp);              temp.close();
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
+        if (countfile != "") {
+                       m->openOutputFile(trimCountFileName, temp);             temp.close();
+                       m->openOutputFile(scrapCountFileName, temp);            temp.close();
+               }
 
 
-               driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+               driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -974,13 +1074,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
 
             
                        trimData* tempTrim = new trimData(filename,
 
             
                        trimData* tempTrim = new trimData(filename,
-                                              qFileName, nameFile,
+                                              qFileName, nameFile, countfile,
                                               (trimFASTAFileName+extension),
                                               (scrapFASTAFileName+extension),
                                               (trimQualFileName+extension),
                                               (scrapQualFileName+extension),
                                               (trimNameFileName+extension),
                                               (scrapNameFileName+extension),
                                               (trimFASTAFileName+extension),
                                               (scrapFASTAFileName+extension),
                                               (trimQualFileName+extension),
                                               (scrapQualFileName+extension),
                                               (trimNameFileName+extension),
                                               (scrapNameFileName+extension),
+                                              (trimCountFileName+extension),
+                                              (scrapCountFileName+extension),
                                               (groupFile+extension),
                                               tempFASTAFileNames,
                                               tempPrimerQualFileNames,
                                               (groupFile+extension),
                                               tempFASTAFileNames,
                                               tempPrimerQualFileNames,
@@ -989,7 +1091,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
-                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
                        hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
                        pDataArray.push_back(tempTrim);
             
                        hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
@@ -1008,7 +1110,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
         
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
         
-               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
         processIDS.push_back(processors-1);
 
         
         processIDS.push_back(processors-1);
 
         
@@ -1022,6 +1124,11 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                 if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
                 else { groupCounts[it->first] += it->second; }
             }
                 if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
                 else { groupCounts[it->first] += it->second; }
             }
+            for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
+                map<string, string>::iterator it2 = groupMap.find(it->first);
+                if (it2 == groupMap.end()) {   groupMap[it->first] = it->second; }
+                else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
+            }
             CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
             CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
@@ -1052,8 +1159,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
                                m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
                        }
                                m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
                                m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
                        }
+            
+            if(countfile != ""){
+                               m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
+                               m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
+                               m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
+                               m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
+                       }
                        
                        
-                       if(createGroup){
+                       if((createGroup)&&(countfile == "")){
                                m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
                                m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
                        }
                                m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
                                m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
                        }
@@ -1091,14 +1205,27 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                in >> tempNum; m->gobble(in);
                                
                                if (tempNum != 0) {
                                in >> tempNum; m->gobble(in);
                                
                                if (tempNum != 0) {
-                                       while (!in.eof()) { 
-                                               in >> group >> tempNum; m->gobble(in);
+                                       for (int i = 0; i < tempNum; i++) { 
+                        int groupNum;
+                                               in >> group >> groupNum; m->gobble(in);
                         
                                                map<string, int>::iterator it = groupCounts.find(group);
                         
                                                map<string, int>::iterator it = groupCounts.find(group);
-                                               if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
-                                               else { groupCounts[it->first] += tempNum; }
+                                               if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
+                                               else { groupCounts[it->first] += groupNum; }
                                        }
                                }
                                        }
                                }
+                in >> tempNum; m->gobble(in);
+                if (tempNum != 0) {
+                                       for (int i = 0; i < tempNum; i++) { 
+                        string group, seqName;
+                                               in >> seqName >> group; m->gobble(in);
+                        
+                                               map<string, string>::iterator it = groupMap.find(seqName);
+                                               if (it == groupMap.end()) {     groupMap[seqName] = group; }
+                                               else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
+                                       }
+                               }
+                
                                in.close(); m->mothurRemove(tempFile);
                        }
             #endif
                                in.close(); m->mothurRemove(tempFile);
                        }
             #endif
@@ -1387,6 +1514,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        string fastaFileName = "";
                                        string qualFileName = "";
                                        string nameFileName = "";
                                        string fastaFileName = "";
                                        string qualFileName = "";
                                        string nameFileName = "";
+                    string countFileName = "";
                                        
                                        if(primerName == ""){
                                                comboGroupName = barcodeNameVector[itBar->second];
                                        
                                        if(primerName == ""){
                                                comboGroupName = barcodeNameVector[itBar->second];
@@ -1433,7 +1561,6 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                                nameFileNames[itBar->second][itPrimer->second] = nameFileName;
                                                m->openOutputFile(nameFileName, temp);          temp.close();
                                        }
                                                nameFileNames[itBar->second][itPrimer->second] = nameFileName;
                                                m->openOutputFile(nameFileName, temp);          temp.close();
                                        }
-                                       
                                }
                        }
                }
                                }
                        }
                }
index 957f37a65bfb1c177f000c9c6d031ddfc3784c81..8d9a57a54ffd0db053956200602f3aa06fb4de5b 100644 (file)
@@ -14,8 +14,8 @@
 #include "command.hpp"
 #include "sequence.hpp"
 #include "qualityscores.h"
 #include "command.hpp"
 #include "sequence.hpp"
 #include "qualityscores.h"
-#include "groupmap.h"
 #include "trimoligos.h"
 #include "trimoligos.h"
+#include "counttable.h"
 
 
 class TrimSeqsCommand : public Command {
 
 
 class TrimSeqsCommand : public Command {
@@ -36,16 +36,13 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       
-       GroupMap* groupMap;
-    
     struct linePair {
         unsigned long long start;
         unsigned long long end;
         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
         linePair() {}
     };
     struct linePair {
         unsigned long long start;
         unsigned long long end;
         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
         linePair() {}
     };
-
+    
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
@@ -55,7 +52,7 @@ private:
     string reverseOligo(string);
 
        bool abort, createGroup;
     string reverseOligo(string);
 
        bool abort, createGroup;
-       string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
+       string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
        bool flip, allFiles, qtrim, keepforward;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        
        bool flip, allFiles, qtrim, keepforward;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
@@ -75,13 +72,15 @@ private:
        vector<string> barcodeNameVector;       //needed here?
        map<string, int> groupCounts;  
        map<string, string> nameMap;
        vector<string> barcodeNameVector;       //needed here?
        map<string, int> groupCounts;  
        map<string, string> nameMap;
+    map<string, int> nameCount; //for countfile name -> repCount
+    map<string, string> groupMap; //for countfile name -> group
 
        vector<int> processIDS;   //processid
        vector<linePair> lines;
        vector<linePair> qLines;
        
 
        vector<int> processIDS;   //processid
        vector<linePair> lines;
        vector<linePair> qLines;
        
-       int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
-       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
+       int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
+       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
        int setLines(string, string);
 };
 
        int setLines(string, string);
 };
 
@@ -92,7 +91,7 @@ private:
 struct trimData {
     unsigned long long start, end;
     MothurOut* m;
 struct trimData {
     unsigned long long start, end;
     MothurOut* m;
-    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
+    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
        vector<vector<string> > fastaFileNames;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
        vector<vector<string> > fastaFileNames;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
@@ -105,6 +104,7 @@ struct trimData {
        map<string, int> barcodes;
     map<string, int> rbarcodes;
        map<string, int> primers;
        map<string, int> barcodes;
     map<string, int> rbarcodes;
        map<string, int> primers;
+    map<string, int> nameCount;
     vector<string>  linker;
     vector<string>  spacer;
        map<string, int> combos;
     vector<string>  linker;
     vector<string>  spacer;
        map<string, int> combos;
@@ -112,22 +112,26 @@ struct trimData {
        vector<string> barcodeNameVector;       
        map<string, int> groupCounts;  
        map<string, string> nameMap;
        vector<string> barcodeNameVector;       
        map<string, int> groupCounts;  
        map<string, string> nameMap;
+    map<string, string> groupMap;
     
        trimData(){}
     
        trimData(){}
-       trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
+       trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa, 
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa, 
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
-                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
+                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
         nameFile = nf;
         filename = fn;
         qFileName = qn;
         nameFile = nf;
+        countfile = cf;
         trimFileName = tn;
         scrapFileName = sn;
         trimQFileName = tqn;
         scrapQFileName = sqn;
         trimNFileName = tnn;
         scrapNFileName = snn;
         trimFileName = tn;
         scrapFileName = sn;
         trimQFileName = tqn;
         scrapQFileName = sqn;
         trimNFileName = tnn;
         scrapNFileName = snn;
+        trimCFileName = tcn;
+        scrapCFileName = scn;
         groupFileName = gn;
         fastaFileNames = ffn;
         qualFileNames = qfn;
         groupFileName = gn;
         fastaFileNames = ffn;
         qualFileNames = qfn;
@@ -137,6 +141,7 @@ struct trimData {
         qlineStart = qstart;
         qlineEnd = qend;
                m = mout;
         qlineStart = qstart;
         qlineEnd = qend;
                m = mout;
+        nameCount = ncount;
         
         pdiffs = pd;
         bdiffs = bd;
         
         pdiffs = pd;
         bdiffs = bd;
@@ -203,7 +208,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                
                
                ofstream outGroupsFile;
                
                
                ofstream outGroupsFile;
-               if (pDataArray->createGroup){   pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
+               if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
                if(pDataArray->allFiles){
                        for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
                if(pDataArray->allFiles){
                        for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
@@ -222,6 +227,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        }
                }
                
                        }
                }
                
+        ofstream trimCountFile;
+               ofstream scrapCountFile;
+               if(pDataArray->countfile != ""){
+                       pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
+                       pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
+            if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+               }
+        
                ifstream inFASTA;
                pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
                if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
                ifstream inFASTA;
                pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
                if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
@@ -248,7 +261,11 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                   
                        if (pDataArray->m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                   
                        if (pDataArray->m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (pDataArray->createGroup) {   outGroupsFile.close();   }
+                               if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
+                if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
+                if(pDataArray->nameFile != "") {       scrapNameFile.close(); trimNameFile.close();    }
+                if(pDataArray->countfile != "")        {       scrapCountFile.close(); trimCountFile.close();  }
+
                                if(pDataArray->qFileName != ""){ qFile.close(); }
                                return 0;
                        }
                                if(pDataArray->qFileName != ""){ qFile.close(); }
                                return 0;
                        }
@@ -399,6 +416,15 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
                                        
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
                                        
+                    int numRedundants = 0;
+                    if (pDataArray->countfile != "") {
+                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                        if (itCount != pDataArray->nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                            numRedundants = itCount->second-1;
+                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                    }
+                                       
                                        if (pDataArray->createGroup) {
                                                if(pDataArray->barcodes.size() != 0){
                                                        string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
                                        if (pDataArray->createGroup) {
                                                if(pDataArray->barcodes.size() != 0){
                                                        string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
@@ -412,9 +438,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                                } 
                                                        }
                                                        
                                                                } 
                                                        }
                                                        
-                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+                            else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
                                                        
                                                        
-                            int numRedundants = 0;
                                                        if (pDataArray->nameFile != "") {
                                                                map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
                                                                if (itName != pDataArray->nameMap.end()) { 
                                                        if (pDataArray->nameFile != "") {
                                                                map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
                                                                if (itName != pDataArray->nameMap.end()) { 
@@ -462,6 +488,12 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
                                                if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
+                    if (pDataArray->countfile != "") {
+                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                        if (itCount != pDataArray->nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                    }
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);