]> git.donarmstrong.com Git - mothur.git/commitdiff
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 14 Sep 2012 19:17:37 +0000 (15:17 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 14 Sep 2012 19:17:37 +0000 (15:17 -0400)
26 files changed:
classify.cpp
counttable.cpp
counttable.h
getgroupscommand.cpp
getlineagecommand.cpp
getlineagecommand.h
getseqscommand.cpp
getseqscommand.h
heatmapsimcommand.cpp
heatmapsimcommand.h
listseqscommand.cpp
listseqscommand.h
makefile
removegroupscommand.cpp
removelineagecommand.cpp
removelineagecommand.h
removeseqscommand.cpp
removeseqscommand.h
secondarystructurecommand.cpp
secondarystructurecommand.h
seqsummarycommand.cpp
seqsummarycommand.h
summaryqualcommand.cpp
summaryqualcommand.h
trimseqscommand.cpp
trimseqscommand.h

index ace89b9cbe9a4c0350ee14396b70340b1b7bc6b3..459c90f9ddd769628b0fc9061e7bd504a88b2900 100644 (file)
@@ -61,7 +61,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                        names.push_back(temp.getName());
                                        database->addSequence(temp);    
                                }
-                               database->generateDB();
+                               if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); }
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
                                database->readKmerDB(kmerFileTest);     
@@ -200,7 +201,8 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                }
                                fastaFile.close();
 
-                database->generateDB(); 
+                if ((method == "kmer") && (!shortcuts)) {;} //don't print
+                else {database->generateDB(); } 
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
index bc9d4da05966b6b3fc3ec5fe0e751c21be7f180f..cd623ecbcf5fbea1cdd25fa8c9c2b872746da162 100644 (file)
@@ -661,6 +661,22 @@ vector<string> CountTable::getNamesOfSeqs() {
        }
 }
 /************************************************************/
+//returns the names of all unique sequences in file mapped to their seqCounts
+map<string, int> CountTable::getNameMap() {
+    try {
+        map<string, int> names;
+        for (map<string, int>::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) {
+            names[it->first] = totals[it->second];
+        }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CountTable", "getNameMap");
+               exit(1);
+       }
+}
+/************************************************************/
 //returns the names of all unique sequences in file
 vector<string> CountTable::getNamesOfSeqs(string group) {
     try {
index 68ba8d211cae1a12aa1d39a76f720a0998a87ad9..b66c4c6ef5e90007025935c4f0820cedb11c624d 100644 (file)
@@ -83,6 +83,7 @@ class CountTable {
         vector<string> getNamesOfSeqs(string);
         int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
         ListVector getListVector();
+        map<string, int> getNameMap();
     
     private:
         string filename;
index 7585c122eeb3a212fab404240f5b6f6568574634..910a8720bc3ec88079d2663ebf819408436ca02a 100644 (file)
@@ -72,7 +72,7 @@ string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
index 1aba0fed4e6e772de07718a544335b6e6bb58db9..1cd139b5213733e759a7a32a1fbdf8221d09dfb6 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> GetLineageCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -38,9 +39,9 @@ vector<string> GetLineageCommand::setParameters(){
 string GetLineageCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
+               helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
-               helpString += "The get.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
+               helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
                helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
@@ -70,6 +71,7 @@ string GetLineageCommand::getOutputFileNameTag(string type, string inputName="")
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";                    }  
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";   }
@@ -94,6 +96,7 @@ GetLineageCommand::GetLineageCommand(){
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
@@ -131,6 +134,7 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
 
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -187,6 +191,14 @@ GetLineageCommand::GetLineageCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -230,6 +242,19 @@ GetLineageCommand::GetLineageCommand(string option)  {
                                else                            {  temp = "false"; usedDups = "";       }
                        }
                        dups = m->isTrue(temp);
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
                        
                        taxons = validParameter.validFile(parameters, "taxon", false);  
                        if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
@@ -240,12 +265,14 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        }
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -262,11 +289,18 @@ int GetLineageCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (taxfile != "")                      {               readTax();              }  //fills the set of names to get
                if (namefile != "")                     {               readName();             }
                if (fastafile != "")            {               readFasta();    }
+        if (countfile != "")           {               readCount();    }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
@@ -305,7 +339,12 @@ int GetLineageCommand::execute(){
                        itTypes = outputTypes.find("taxonomy");
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
-                       }                       
+                       }
+                       
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -353,7 +392,7 @@ int GetLineageCommand::readFasta(){
                in.close();     
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
                
                return 0;
@@ -365,6 +404,52 @@ int GetLineageCommand::readFasta(){
        }
 }
 //**********************************************************************************************************************
+int GetLineageCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) != 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+            }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+                      
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetLineageCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int GetLineageCommand::readList(){
        try {
                string thisOutputDir = outputDir;
@@ -425,7 +510,7 @@ int GetLineageCommand::readList(){
                in.close();     
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
                
                return 0;
@@ -510,7 +595,7 @@ int GetLineageCommand::readName(){
                in.close();
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
                
                return 0;
@@ -558,7 +643,7 @@ int GetLineageCommand::readGroup(){
                in.close();
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
                
                return 0;
@@ -814,7 +899,7 @@ int GetLineageCommand::readAlign(){
                in.close();
                out.close();
                
-               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
                outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
                
                return 0;
index 0ab042bac365b64840e32212a6705f62d46d5ff1..99bc0fa347313bb023e8bd1b41a5c3e2be5c93ed 100644 (file)
@@ -36,11 +36,12 @@ class GetLineageCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames, listOfTaxons;
-               string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
+               string fastafile, namefile, groupfile, alignfile, countfile, listfile, taxfile, outputDir, taxons;
                bool abort, dups;
                
                int readFasta();
                int readName();
+        int readCount();
                int readGroup();
                int readAlign();
                int readList();
index ccabafb6d19c008eb6957a35e79b5cd0623021d0..e0faef4cc07bd3fc6626fe186ec67fe05c512023 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> GetSeqsCommand::setParameters(){        
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -40,7 +41,7 @@ vector<string> GetSeqsCommand::setParameters(){
 string GetSeqsCommand::getHelpString(){        
        try {
                string helpString = "";
-               helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
+               helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
                helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
@@ -68,6 +69,7 @@ GetSeqsCommand::GetSeqsCommand(){
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
                outputTypes["accnosreport"] = tempOutNames;
        }
        catch(exception& e) {
@@ -88,6 +90,7 @@ string GetSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
@@ -135,6 +138,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        outputTypes["list"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["accnosreport"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -215,6 +219,14 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -270,17 +282,32 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (accnosfile2 == "not open") { abort = true; }
                        else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+
                        
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
                        dups = m->isTrue(temp);
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
-               
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+            
+            if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -300,11 +327,18 @@ int GetSeqsCommand::execute(){
                names = m->readAccnos(accnosfile);
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();                     }
                if (fastafile != "")            {               readFasta();            }
                if (groupfile != "")            {               readGroup();            }
+        if (countfile != "")           {               readCount();            }
                if (alignfile != "")            {               readAlign();            }
                if (listfile != "")                     {               readList();                     }
                if (taxfile != "")                      {               readTax();                      }
@@ -354,6 +388,10 @@ int GetSeqsCommand::execute(){
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
                        }
                        
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -493,6 +531,57 @@ int GetSeqsCommand::readQual(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int GetSeqsCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               int selectedCount = 0;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) != 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+                selectedCount+= thisTotal;
+            }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetSeqsCommand", "readCount");
+               exit(1);
+       }
+}
+
 //**********************************************************************************************************************
 int GetSeqsCommand::readList(){
        try {
index c71b5f2804a71190c6919f0dc80bf80d32f15508..60e471eebf7865d5924406ef7c720c3bdcdf4648 100644 (file)
@@ -35,7 +35,7 @@ class GetSeqsCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames;
-               string accnosfile, accnosfile2, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+               string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
     
         //for debug
@@ -44,6 +44,7 @@ class GetSeqsCommand : public Command {
                int readFasta();
                int readName();
                int readGroup();
+        int readCount();
                int readAlign();
                int readList();
                int readTax();
index 3de10e6a8cd9411d6829ac699ffd11991af81007..8a4a12bc70f2d707fb923e74036043f12c1dcd3f 100644 (file)
@@ -25,7 +25,8 @@ vector<string> HeatMapSimCommand::setParameters(){
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pshared);        
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnShared", "PhylipColumnShared", "ColumnName",false,false); parameters.push_back(pcolumn);          
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
@@ -48,9 +49,8 @@ string HeatMapSimCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The heatmap.sim command parameters are shared, phylip, column, name, groups, calc, fontsize and label.  shared or phylip or column and name are required unless valid current files exist.\n";
-               helpString += "There are two ways to use the heatmap.sim command. The first is with the read.otu command. \n";
-               helpString += "With the read.otu command you may use the groups, label and calc parameters. \n";
+               helpString += "The heatmap.sim command parameters are shared, phylip, column, name, count, groups, calc, fontsize and label.  shared or phylip or column and name are required unless valid current files exist.\n";
+               helpString += "There are two ways to use the heatmap.sim command. The first is with a shared file, and you may use the groups, label and calc parameter. \n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your heatmap.\n";
                helpString += "The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and is also separated by dashes.\n";
                helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n";
@@ -174,6 +174,14 @@ HeatMapSimCommand::HeatMapSimCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //required parameters
@@ -197,6 +205,12 @@ HeatMapSimCommand::HeatMapSimCommand(string option)  {
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        //error checking on files                       
                        if ((sharedfile == "") && ((phylipfile == "") && (columnfile == "")))   { 
@@ -224,8 +238,12 @@ HeatMapSimCommand::HeatMapSimCommand(string option)  {
                                        namefile = m->getNameFile(); 
                                        if (namefile != "") {  m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
                                        else { 
-                                               m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); 
-                                               abort = true; 
+                        countfile = m->getCountTableFile(); 
+                                               if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                        else { 
+                            m->mothurOut("You need to provide a name or count file if you are going to use the column format."); m->mothurOutEndLine(); 
+                            abort = true; 
+                        }      
                                        }       
                                }
                        }
@@ -520,20 +538,28 @@ int HeatMapSimCommand::runCommandDist() {
                        in.close();
                }else {
                        //read names file
-                       NameAssignment* nameMap = new NameAssignment(namefile);
-                       nameMap->readMap();
-                       
-                       //put names in order in vector
-                       for (int i = 0; i < nameMap->size(); i++) {
-                               names.push_back(nameMap->get(i));
-                       }
-                       
-                       //resize matrix
-                       matrix.resize(nameMap->size());
-                       for (int i = 0; i < nameMap->size(); i++) {
-                               matrix[i].resize(nameMap->size(), 0.0);
-                       }
-                       
+                       NameAssignment* nameMap;
+            CountTable ct; 
+            if (namefile != "") { 
+                nameMap = new NameAssignment(namefile);
+                nameMap->readMap();
+                
+                //put names in order in vector
+                for (int i = 0; i < nameMap->size(); i++) {
+                    names.push_back(nameMap->get(i));
+                }
+             }else if (countfile != "") {
+                nameMap = NULL;
+                ct.readTable(countfile);
+                names = ct.getNamesOfSeqs();
+            }
+                       
+            //resize matrix
+            matrix.resize(names.size());
+            for (int i = 0; i < names.size(); i++) {
+                matrix[i].resize(names.size(), 0.0);
+            }
+                                               
                        //read column file
                        string first, second;
                        double dist;
@@ -544,19 +570,26 @@ int HeatMapSimCommand::runCommandDist() {
                                
                                if (m->control_pressed) { return 0; }
                                
-                               map<string, int>::iterator itA = nameMap->find(first);
-                               map<string, int>::iterator itB = nameMap->find(second);
-                               
-                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1);  }
-                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1);  }
-                               
-                               //save distance
-                               matrix[itA->second][itB->second] = dist;
-                               matrix[itB->second][itA->second] = dist;
+                if (namefile != "") {
+                    map<string, int>::iterator itA = nameMap->find(first);
+                    map<string, int>::iterator itB = nameMap->find(second);
+                    
+                    if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + first + "' was not found in the names file, please correct\n"); exit(1);  }
+                    if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + second + "' was not found in the names file, please correct\n"); exit(1);  }
+                    
+                    //save distance
+                    matrix[itA->second][itB->second] = dist;
+                    matrix[itB->second][itA->second] = dist;
+                }else if (countfile != "") {
+                    int itA = ct.get(first);
+                    int itB = ct.get(second);
+                    matrix[itA][itB] = dist;
+                    matrix[itB][itA] = dist;
+                }
                        }
                        in.close();
                        
-                       delete nameMap;
+                       if (namefile != "") { delete nameMap; }
                }
                
                
index 7b74880955594da0bb863c4b1755cf32c7b429e7..2c3a470761d719af0bff13bc4d194e5164bd42e0 100644 (file)
@@ -43,7 +43,7 @@ private:
        OptionParser* parser;
        bool abort, allLines;
        set<string> labels; //holds labels to be used
-       string format, groups, label, calc, sharedfile, phylipfile, columnfile, namefile, outputDir, inputfile;
+       string format, groups, label, calc, sharedfile, phylipfile, columnfile, countfile, namefile, outputDir, inputfile;
        vector<string> Estimators, Groups, outputNames;
        int fontsize;
        
index bfbb0788c5092382f22bea643c5405674361dd67..7c3f07f96e092d81c307673192dad29a31fee091 100644 (file)
@@ -10,6 +10,7 @@
 #include "listseqscommand.h"
 #include "sequence.hpp"
 #include "listvector.hpp"
+#include "counttable.h"
 
 
 //**********************************************************************************************************************
@@ -17,6 +18,7 @@ vector<string> ListSeqsCommand::setParameters(){
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pcount);
                CommandParameter pgroup("group", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "FNGLT", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
@@ -37,8 +39,8 @@ vector<string> ListSeqsCommand::setParameters(){
 string ListSeqsCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The list.seqs command reads a fasta, name, group, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
-               helpString += "The list.seqs command parameters are fasta, name, group, list, taxonomy and alignreport.  You must provide one of these parameters.\n";
+               helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
+               helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport.  You must provide one of these parameters.\n";
                helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
                helpString += "Example list.seqs(fasta=amazon.fasta).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
@@ -164,6 +166,14 @@ ListSeqsCommand::ListSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
@@ -195,8 +205,13 @@ ListSeqsCommand::ListSeqsCommand(string option)  {
                        if (taxfile == "not open") { abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
                        else { m->setTaxonomyFile(taxfile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; }
+                       else if (countfile == "not found") {  countfile = "";  }
+                       else { m->setCountTableFile(countfile); }
                        
-                       if ((fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
+                       if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
                        
                        int okay = 1;
                        if (outputDir != "") { okay++; }
@@ -225,6 +240,7 @@ int ListSeqsCommand::execute(){
                else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
                else if (listfile != "")        {       inputFileName = listfile;       readList();             }
                else if (taxfile != "")         {       inputFileName = taxfile;        readTax();              }
+        else if (countfile != "")      {       inputFileName = countfile;      readCount();    }
                
                if (m->control_pressed) { outputTypes.clear();  return 0; }
                
@@ -293,12 +309,6 @@ int ListSeqsCommand::readFasta(){
                        
                        Sequence currSeq(in);
                        name = currSeq.getName();
-                       //if (lastName == "") { lastName = name; }
-                       //if (name != lastName) { count = 1; }
-               //      lastName = name;
-                       
-                       //Sequence newSeq(name+"_"+toString(count), currSeq.getAligned());
-                       //newSeq.printSequence(out);
                        
                        if (name != "") {  names.push_back(name);  }
                        
@@ -404,7 +414,24 @@ int ListSeqsCommand::readGroup(){
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+int ListSeqsCommand::readCount(){
+       try {
+               CountTable ct;
+               ct.readTable(countfile);
+        
+        if (m->control_pressed) { return 0; }
+        
+        names = ct.getNamesOfSeqs();
+        
+        return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListSeqsCommand", "readCount");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
 int ListSeqsCommand::readAlign(){
index 1a31a6dd432e837159be8d916f7ce2a25e4d4dec..8e4cce3c932100ff1ab88d49b346ff87e7791a06 100644 (file)
@@ -34,7 +34,7 @@ class ListSeqsCommand : public Command {
        
        private:
                vector<string> names, outputNames;
-               string fastafile, namefile, groupfile, alignfile, inputFileName, outputDir, listfile, taxfile;
+               string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile;
                bool abort;
                
                int readFasta();
@@ -43,6 +43,7 @@ class ListSeqsCommand : public Command {
                int readAlign();
                int readList();
                int readTax();
+        int readCount();
                
 };
 
index 32ede6e5fa70a2ab42f4aa6ebcc0382b32dc8d76..bc5a569e31cd2e12526bb20244adb75e58333ece 100644 (file)
--- a/makefile
+++ b/makefile
@@ -17,7 +17,7 @@ USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
 RELEASE_DATE = "\"7/9/2012\""
 VERSION = "\"1.26.0\""
-FORTAN_COMPILER = gfortran
+FORTAN_COMPILER = /usr/local/gfortran/bin/gfortran
 FORTRAN_FLAGS = 
 
 # Optimize to level 3:
index 86ddf946a9d51c1df7e43783e0470bf5aead6301..80aeb3fbad0495b3788e518812d6740f31ac71f8 100644 (file)
@@ -71,7 +71,7 @@ string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName="
             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
-            else if (type == "count")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
index 4cec90f567c764e8770987c150b67b5ececc0d35..a3fd1b83ee5025adb340922df226895230a464ed 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> RemoveLineageCommand::setParameters(){  
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -38,9 +39,9 @@ vector<string> RemoveLineageCommand::setParameters(){
 string RemoveLineageCommand::getHelpString(){  
        try {
                string helpString = "";
-               helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
+               helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
-               helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
+               helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
                helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
                helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
@@ -72,6 +73,7 @@ string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=
             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
@@ -94,6 +96,7 @@ RemoveLineageCommand::RemoveLineageCommand(){
                outputTypes["group"] = tempOutNames;
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
@@ -131,6 +134,7 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        outputTypes["group"] = tempOutNames;
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -187,6 +191,14 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -223,6 +235,19 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                                else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else { m->setTaxonomyFile(taxfile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+            
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      
                        if (temp == "not found") { 
@@ -240,14 +265,16 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        }
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
                        
-                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
-                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
-                               parser.getNameFile(files);
-                       }
+                       if (countfile == "") {
+                if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                    vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                    parser.getNameFile(files);
+                }
+            }
                        
                }
 
@@ -265,6 +292,12 @@ int RemoveLineageCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
@@ -273,6 +306,7 @@ int RemoveLineageCommand::execute(){
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
+        if (countfile != "")           {               readCount();    }
                
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
@@ -309,6 +343,11 @@ int RemoveLineageCommand::execute(){
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                        }
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -511,7 +550,52 @@ int RemoveLineageCommand::readName(){
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+int RemoveLineageCommand::readCount(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) == 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+            }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveLineageCommand", "readCount");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 int RemoveLineageCommand::readGroup(){
        try {
index a5caec8e4c3e0f04ec7744489c3f27c0a495e38d..a756d24f583be82eac03634ebcd8496c4fd687b6 100644 (file)
@@ -34,12 +34,13 @@ class RemoveLineageCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames, listOfTaxons;
-               string fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir, taxons;
+               string fastafile, namefile, groupfile, alignfile, listfile, countfile, taxfile, outputDir, taxons;
                bool abort, dups;
                
                int readFasta();
                int readName();
                int readGroup();
+        int readCount();
                int readAlign();
                int readList();
                int readTax();  
index 0d53c1a95ed5b968d0a8281f1f3d612d7d4c2047..1fb94462ac48fa13ac871c6b9d3cf4f3ae2ceff6 100644 (file)
@@ -15,8 +15,9 @@
 vector<string> RemoveSeqsCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
+        CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
+               CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
                CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
                CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
                CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
@@ -39,9 +40,9 @@ vector<string> RemoveSeqsCommand::setParameters(){
 string RemoveSeqsCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
+               helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
                helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
-               helpString += "The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
+               helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
                helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
                helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
                helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
@@ -70,6 +71,7 @@ string RemoveSeqsCommand::getOutputFileNameTag(string type, string inputName="")
             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
+            else if (type == "count")       {   outputFileName =  "pick.count.table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -93,6 +95,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(){
                outputTypes["alignreport"] = tempOutNames;
                outputTypes["list"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
@@ -131,6 +134,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        outputTypes["alignreport"] = tempOutNames;
                        outputTypes["list"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -203,6 +207,14 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -259,13 +271,28 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                                else                            {  temp = "false"; usedDups = "";       }
                        }
                        dups = m->isTrue(temp);
-                       
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
-                       
-                       if ((fastafile != "") && (namefile == "")) {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+            if (countfile == "not open") { countfile = ""; abort = true; }
+            else if (countfile == "not found") { countfile = "";  }    
+            else { m->setCountTableFile(countfile); }
+            
+            if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+            }
+                       
+                       if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+                       
+            if (countfile == "") {
+                if ((fastafile != "") && (namefile == "")) {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -285,6 +312,12 @@ int RemoveSeqsCommand::execute(){
                names = m->readAccnos(accnosfile);
                
                if (m->control_pressed) { return 0; }
+        
+        if (countfile != "") {
+            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
+                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
+            }
+        }
                
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();             }
@@ -294,6 +327,7 @@ int RemoveSeqsCommand::execute(){
                if (listfile != "")                     {               readList();             }
                if (taxfile != "")                      {               readTax();              }
                if (qualfile != "")                     {               readQual();             }
+        if (countfile != "")           {               readCount();            }
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
        
@@ -333,7 +367,12 @@ int RemoveSeqsCommand::execute(){
                        itTypes = outputTypes.find("qfile");
                        if (itTypes != outputTypes.end()) {
                                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
-                       }                       
+                       }       
+            
+            itTypes = outputTypes.find("count");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+                       }
                }
                
                return 0;               
@@ -463,6 +502,56 @@ int RemoveSeqsCommand::readQual(){
        }
 }
 //**********************************************************************************************************************
+int RemoveSeqsCommand::readCount(){
+       try {
+        
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               m->openInputFile(countfile, in);
+               
+               bool wroteSomething = false;
+               int removedCount = 0;
+               
+        string headers = m->getline(in); m->gobble(in);
+        out << headers << endl;
+        
+        string name, rest; int thisTotal;
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
+            
+            in >> name; m->gobble(in); 
+            in >> thisTotal; m->gobble(in);
+            rest = m->getline(in); m->gobble(in);
+            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
+            
+            if (names.count(name) == 0) {
+                out << name << '\t' << thisTotal << '\t' << rest << endl;
+                wroteSomething = true;
+            }else { removedCount += thisTotal; }
+        }
+        in.close();
+               out.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
+        
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveSeqsCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int RemoveSeqsCommand::readList(){
        try {
                string thisOutputDir = outputDir;
index 474951a9feaee7d497417cf2a03f206367056241..151b413070bc64bafd80834e72228dabd1037c43 100644 (file)
@@ -34,13 +34,14 @@ class RemoveSeqsCommand : public Command {
        
        private:
                set<string> names;
-               string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+               string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
                vector<string> outputNames;
                
                int readFasta();
                int readName();
                int readGroup();
+        int readCount();
                int readAlign();
                int readList();
                int readTax();
index 869df029fdd35efa6c3e9a9a22d46c6c4fea6667..4d04270ce649f8b2e579625eae7c074b322d10cb 100644 (file)
@@ -9,13 +9,15 @@
 
 #include "secondarystructurecommand.h"
 #include "sequence.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> AlignCheckCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pmap);
-        CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -32,7 +34,7 @@ vector<string> AlignCheckCommand::setParameters(){
 string AlignCheckCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The align.check command reads a fasta file and map file as well as an optional name file.\n";
+               helpString += "The align.check command reads a fasta file and map file as well as an optional name or count file.\n";
                helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
                helpString += "The align.check command parameters are fasta and map, both are required.\n";
                helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
@@ -135,6 +137,14 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
@@ -155,16 +165,25 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
                        
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+            
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
                                outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
                        }
                        
-                       if ((namefile == "") && (fastafile != "")){
-                               vector<string> files; files.push_back(fastafile); 
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if ((namefile == "") && (fastafile != "")){
+                    vector<string> files; files.push_back(fastafile); 
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -184,6 +203,11 @@ int AlignCheckCommand::execute(){
                readMap();
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
+        else if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameMap = ct.getNameMap();
+        }
                
                if (m->control_pressed) { return 0; }
                
@@ -217,7 +241,7 @@ int AlignCheckCommand::execute(){
                                if (haderror == 1) { m->control_pressed = true; break; }
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(seq.getName());
                                        
@@ -274,7 +298,7 @@ int AlignCheckCommand::execute(){
                m->mothurOut("75%-tile:\t" + toString(pound[ptile75]) + "\t" + toString(dash[ptile75]) + "\t" + toString(plus[ptile75]) + "\t" + toString(equal[ptile75]) + "\t" + toString(loop[ptile75]) + "\t" + toString(tilde[ptile75]) + "\t" + toString(total[ptile75])); m->mothurOutEndLine();
                m->mothurOut("97.5%-tile:\t" + toString(pound[ptile97_5]) + "\t" + toString(dash[ptile97_5]) + "\t" + toString(plus[ptile97_5]) + "\t" + toString(equal[ptile97_5]) + "\t" + toString(loop[ptile97_5]) + "\t" + toString(tilde[ptile97_5]) + "\t" + toString(total[ptile97_5])); m->mothurOutEndLine();
                m->mothurOut("Maximum:\t" + toString(pound[ptile100]) + "\t" + toString(dash[ptile100]) + "\t" + toString(plus[ptile100]) + "\t" + toString(equal[ptile100]) + "\t" + toString(loop[ptile100]) + "\t" + toString(tilde[ptile100]) + "\t" + toString(total[ptile100])); m->mothurOutEndLine();
-               if (namefile == "") {  m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); }
+               if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(count)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(count)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); }
                
                
index 110f019f3beeafdd2a877c423b250232a488e62c..becafc5bb3f29073d0e72859e495f0364c351dfb 100644 (file)
@@ -50,7 +50,7 @@ class AlignCheckCommand : public Command {
        
        private:
                vector<int> structMap;
-               string mapfile, fastafile, outputDir, namefile;
+               string mapfile, fastafile, outputDir, namefile, countfile;
                bool abort;
                int seqLength, haderror;
                vector<string> outputNames;
index 830643d12f2fc03d9f2985f2c722a891f0b95371..a9bb5737eac832a203969460f7f4e2759849a975 100644 (file)
@@ -8,13 +8,14 @@
  */
 
 #include "seqsummarycommand.h"
-
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> SeqSummaryCommand::setParameters(){     
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -33,8 +34,9 @@ string SeqSummaryCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The summary.seqs command reads a fastafile and summarizes the sequences.\n";
-               helpString += "The summary.seqs command parameters are fasta, name and processors, fasta is required, unless you have a valid current fasta file.\n";
+               helpString += "The summary.seqs command parameters are fasta, name, count and processors, fasta is required, unless you have a valid current fasta file.\n";
                helpString += "The name parameter allows you to enter a name file associated with your fasta file. \n";
+        helpString += "The count parameter allows you to enter a count file associated with your fasta file. \n";
                helpString += "The summary.seqs command should be in the following format: \n";
                helpString += "summary.seqs(fasta=yourFastaFile, processors=2) \n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
@@ -123,6 +125,14 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //initialize outputTypes
@@ -142,6 +152,13 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                        if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -153,11 +170,12 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
-                       
+            if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
                }
        }
        catch(exception& e) {
@@ -186,6 +204,11 @@ int SeqSummaryCommand::execute(){
                vector<int> longHomoPolymer;
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
+        else if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameMap = ct.getNameMap();
+        }
                
                if (m->control_pressed) { return 0; }
                        
@@ -344,7 +367,7 @@ int SeqSummaryCommand::execute(){
                int size = startPosition.size();
                
                //find means
-               float meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
+               double meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
                meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
                for (int i = 0; i < size; i++) {
                        meanStartPosition += startPosition[i];
@@ -353,6 +376,7 @@ int SeqSummaryCommand::execute(){
                        meanAmbigBases += ambigBases[i];
                        meanLongHomoPolymer += longHomoPolymer[i];
                }
+                
                //this is an int divide so the remainder is lost
                meanStartPosition /= (float) size; meanEndPosition /= (float) size; meanLongHomoPolymer /= (float) size; meanSeqLength /= (float) size; meanAmbigBases /= (float) size;
                                
@@ -380,7 +404,7 @@ int SeqSummaryCommand::execute(){
                m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
                m->mothurOut("Mean:\t" + toString(meanStartPosition) + "\t" + toString(meanEndPosition) + "\t" + toString(meanSeqLength) + "\t" + toString(meanAmbigBases) + "\t" + toString(meanLongHomoPolymer)); m->mothurOutEndLine();
 
-               if (namefile == "") {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
+               if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
                else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
                
                if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
@@ -430,11 +454,11 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
-                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       if (it == nameMap.end()) { m->mothurOut("[ERROR]: '" + current.getName() + "' is not in your name or count file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
@@ -505,11 +529,11 @@ int SeqSummaryCommand::MPICreateSummary(int start, int num, vector<int>& startPo
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
-                                       if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your namefile, please correct." << endl; m->control_pressed = true; }
+                                       if (it == nameMap.end()) { cout << "[ERROR]: " << current.getName() << " is not in your name or count file, please correct." << endl; m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
@@ -626,14 +650,17 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                vector<seqSumData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
-               
+        
+               bool hasNameMap = false;
+        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+        
                //Create processor worker threads.
                for( int i=0; i<processors-1; i++ ){
             
             string extension = "";
             if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        // Allocate memory for thread data.
-                       seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, namefile, nameMap);
+                       seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, hasNameMap, nameMap);
                        pDataArray.push_back(tempSum);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
index 79e8be96974141da315d1038a51096695212ad5c..3926e25f3c2ebb484b357464d1514dfc957c4000 100644 (file)
@@ -34,7 +34,7 @@ public:
        void help() { m->mothurOut(getHelpString()); }          
 private:
        bool abort;
-       string fastafile, outputDir, namefile;
+       string fastafile, outputDir, namefile, countfile;
        int processors;
        vector<string> outputNames;
        map<string, int> nameMap;
@@ -74,18 +74,18 @@ struct seqSumData {
        unsigned long long end;
        int count;
        MothurOut* m;
-       string namefile;
+       bool hasNameMap;
        map<string, int> nameMap;
        
        
        seqSumData(){}
-       seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, string na, map<string, int> nam) {
+       seqSumData(string f, string sf, MothurOut* mout, unsigned long long st, unsigned long long en, bool na, map<string, int> nam) {
                filename = f;
                sumFile = sf;
                m = mout;
                start = st;
                end = en;
-               namefile = na;
+               hasNameMap = na;
                nameMap = nam;
                count = 0;
        }
@@ -123,11 +123,11 @@ static DWORD WINAPI MySeqSumThreadFunction(LPVOID lpParam){
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (pDataArray->namefile != "") {
+                               if (pDataArray->hasNameMap){
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                        
-                                       if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
+                                       if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your name or count file, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
                                        else { num = it->second; }
                                }
                                
index 5d7971349656d4400e5d41a24c15e576ccdf57f7..5a073677804ae2e3176f08a3d4ff1e26a1bc5c03 100644 (file)
@@ -8,13 +8,14 @@
  */
 
 #include "summaryqualcommand.h"
-
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> SummaryQualCommand::setParameters(){    
        try {
                CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pqual);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -32,9 +33,10 @@ vector<string> SummaryQualCommand::setParameters(){
 string SummaryQualCommand::getHelpString(){    
        try {
                string helpString = "";
-               helpString += "The summary.qual command reads a quality file and an optional name file, and summarizes the quality information.\n";
-               helpString += "The summary.tax command parameters are qfile, name and processors. qfile is required, unless you have a valid current quality file.\n";
+               helpString += "The summary.qual command reads a quality file and an optional name or count file, and summarizes the quality information.\n";
+               helpString += "The summary.tax command parameters are qfile, name, count and processors. qfile is required, unless you have a valid current quality file.\n";
                helpString += "The name parameter allows you to enter a name file associated with your quality file. \n";
+        helpString += "The count parameter allows you to enter a count file associated with your quality file. \n";
                helpString += "The summary.qual command should be in the following format: \n";
                helpString += "summary.qual(qfile=yourQualityFile) \n";
                helpString += "Note: No spaces between parameter labels (i.e. qfile), '=' and parameters (i.e.yourQualityFile).\n";     
@@ -122,6 +124,14 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //initialize outputTypes
@@ -141,6 +151,13 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                        if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = "";  }   
                        else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -152,10 +169,13 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);     
                        
-                       if (namefile == "") {
-                               vector<string> files; files.push_back(qualfile);
-                               parser.getNameFile(files);
-                       }
+            
+                       if (countfile == "") {
+                if (namefile == "") {
+                    vector<string> files; files.push_back(qualfile);
+                    parser.getNameFile(files);
+                }
+            }
                }
        }
        catch(exception& e) {
@@ -179,7 +199,12 @@ int SummaryQualCommand::execute(){
                if (m->control_pressed) { return 0; }
                
                if (namefile != "") { nameMap = m->readNames(namefile); }
-               
+               else if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameMap = ct.getNameMap();
+        }
+        
                vector<unsigned long long> positions; 
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                positions = m->divideFile(qualfile, processors);
@@ -257,7 +282,7 @@ int SummaryQualCommand::driverCreateSummary(vector<int>& position, vector<int>&
                        if (current.getName() != "") {
                                
                                int num = 1;
-                               if (namefile != "") {
+                               if ((namefile != "") || (countfile != "")) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = nameMap.find(current.getName());
                                        
@@ -400,11 +425,14 @@ int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vect
                DWORD   dwThreadIdArray[processors];
                HANDLE  hThreadArray[processors]; 
                
+        bool hasNameMap = false;
+        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
+        
                //Create processor worker threads.
                for( int i=0; i<processors; i++ ){
                        
                        // Allocate memory for thread data.
-                       seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
+                       seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, hasNameMap, nameMap);
                        pDataArray.push_back(tempSum);
                        processIDS.push_back(i);
         
@@ -457,7 +485,7 @@ int SummaryQualCommand::printQual(string sumFile, vector<int>& position, vector<
                        
                        if (m->control_pressed) { out.close(); return 0; }
                        
-                       float average = averageQ[i] / (float) position[i];
+                       double average = averageQ[i] / (float) position[i];
                        out << i << '\t' << position[i] << '\t' << average << '\t';
                        
                        for (int j = 0; j < 41; j++) {
index 31390b4296ff7659fdfb2946d3f37ff33b2877b7..ac65938bc23075fc7db7cb40b9ce6d05d0d5d64e 100644 (file)
@@ -35,7 +35,7 @@ public:
        
 private:
        bool abort;
-       string qualfile, outputDir, namefile;
+       string qualfile, outputDir, namefile, countfile;
        vector<string> outputNames;
        map<string, int> nameMap;
        int processors;
@@ -62,20 +62,21 @@ struct seqSumQualData {
        vector<int> position;
        vector<int> averageQ;
        vector< vector<int> > scores; 
-       string filename, namefile
+       string filename; 
        unsigned long long start;
        unsigned long long end;
        int count;
        MothurOut* m;
+    bool hasNameMap;
        map<string, int> nameMap;
        
        ~seqSumQualData(){}
-       seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map<string, int> nam) {
+       seqSumQualData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, bool n, map<string, int> nam) {
                filename = f;
                m = mout;
                start = st;
                end = en;
-               namefile = n;
+               hasNameMap = n;
                nameMap = nam;
                count = 0;
        }
@@ -109,7 +110,7 @@ static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){
                        if (current.getName() != "") {
                        
                                int num = 1;
-                               if (pDataArray->namefile != "") {
+                               if (pDataArray->hasNameMap) {
                                        //make sure this sequence is in the namefile, else error 
                                        map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
                                        
index b29723c032814891f2f84dc8ba437ab789908773..29b8733721a2b710468f960ac474c99734a181df 100644 (file)
 #include "needlemanoverlap.hpp"
 #include "trimoligos.h"
 
+
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::setParameters(){       
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
                CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
                CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
                CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
@@ -58,11 +60,12 @@ string TrimSeqsCommand::getHelpString(){
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
-               helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+               helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
                helpString += "The oligos parameter allows you to provide an oligos file.\n";
                helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
                helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
                helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
                helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
@@ -111,6 +114,7 @@ string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){
             else if (type == "fasta")            {   outputFileName =  "fasta";   }
             else if (type == "group")            {   outputFileName =  "groups";   }
             else if (type == "name")            {   outputFileName =  "names";   }
+            else if (type == "count")            {   outputFileName =  "count.table";   }
             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
         }
         return outputFileName;
@@ -133,6 +137,7 @@ TrimSeqsCommand::TrimSeqsCommand(){
                outputTypes["qfile"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
+        outputTypes["count"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
@@ -171,6 +176,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        outputTypes["qfile"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
+            outputTypes["count"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -208,6 +214,14 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                                
                        }
 
@@ -279,6 +293,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        if (temp == "not found")        {       nameFile = "";          }
                        else if(temp == "not open")     {       nameFile = "";  abort = true;           }
                        else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true; countfile = ""; }  
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
                        
                        temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
                        m->mothurConvert(temp, qThreshold);
@@ -331,10 +352,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                abort = true;
                        }
                        
-                       if (nameFile == "") {
-                               vector<string> files; files.push_back(fastaFile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") {
+                if (nameFile == "") {
+                    vector<string> files; files.push_back(fastaFile);
+                    parser.getNameFile(files);
+                }
+            }
                }
 
        }
@@ -385,13 +408,27 @@ int TrimSeqsCommand::execute(){
                        outputTypes["name"].push_back(trimNameFile);
                        outputTypes["name"].push_back(scrapNameFile); 
                }
+        
+        string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count");
+               string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count");
+               
+               if (countfile != "") {
+            CountTable ct;
+            ct.readTable(countfile);
+            nameCount = ct.getNameMap();
+                       outputNames.push_back(trimCountFile);
+                       outputNames.push_back(scrapCountFile);
+                       outputTypes["count"].push_back(trimCountFile);
+                       outputTypes["count"].push_back(scrapCountFile); 
+               }
+
                
                if (m->control_pressed) { return 0; }
                
                string outputGroupFileName;
                if(oligoFile != ""){
                        createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
-                       if (createGroup) {
+                       if ((createGroup) && (countfile == "")){
                                outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
                                outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
                        }
@@ -401,9 +438,9 @@ int TrimSeqsCommand::execute(){
                setLines(fastaFile, qFileName);
                
         if(processors == 1){
-            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+            driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
         }else{
-            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
+            createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
         }      
                
                
@@ -446,35 +483,62 @@ int TrimSeqsCommand::execute(){
                        for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
                        outputNames = outputNames2;
                        
-                       for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
-                               ifstream in;
-                               m->openInputFile(it->first, in);
-                               
-                               ofstream out;
-                               string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
-                               outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
-                               m->openOutputFile(thisGroupName, out);
-                               
-                               while (!in.eof()){
-                                       if (m->control_pressed) { break; }
-                                       
-                                       Sequence currSeq(in); m->gobble(in);
-                                       out << currSeq.getName() << '\t' << it->second << endl;
+            for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
+                ifstream in;
+                m->openInputFile(it->first, in);
+                
+                ofstream out;
+                string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
+                if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
+                else {  thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
+                m->openOutputFile(thisGroupName, out);
+                
+                if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
+                
+                while (!in.eof()){
+                    if (m->control_pressed) { break; }
                     
-                    if (nameFile != "") {
-                        map<string, string>::iterator itName = nameMap.find(currSeq.getName());
-                        if (itName != nameMap.end()) { 
-                            vector<string> thisSeqsNames; 
-                            m->splitAtChar(itName->second, thisSeqsNames, ',');
-                            for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
-                                out << thisSeqsNames[k] << '\t' << it->second << endl;
-                            }
-                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                  
+                    Sequence currSeq(in); m->gobble(in);
+                    if (countfile == "") {  
+                        out << currSeq.getName() << '\t' << it->second << endl;  
+                        
+                        if (nameFile != "") {
+                            map<string, string>::iterator itName = nameMap.find(currSeq.getName());
+                            if (itName != nameMap.end()) { 
+                                vector<string> thisSeqsNames; 
+                                m->splitAtChar(itName->second, thisSeqsNames, ',');
+                                for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
+                                    out << thisSeqsNames[k] << '\t' << it->second << endl;
+                                }
+                            }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                      
+                        }
+                    }else { 
+                        map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
+                        if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
+                        else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
                     }
-                               }
-                               in.close();
-                               out.close();
-                       }
+                }
+                in.close();
+                out.close();
+            }
+            
+            if (countfile != "") { //create countfile with group info included
+                CountTable* ct = new CountTable();
+                ct->readTable(trimCountFile);
+                map<string, int> justTrimmedNames = ct->getNameMap();
+                delete ct;
+                
+                CountTable newCt;
+                for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
+                vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
+                for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
+                    newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
+                    map<string, string>::iterator it2 = groupMap.find(itNames->first);
+                    if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
+                    else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+                }
+                newCt.printTable(trimCountFile);
+            }
                }
                
                if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
@@ -511,6 +575,11 @@ int TrimSeqsCommand::execute(){
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
                }
+        
+        itTypes = outputTypes.find("count");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+               }
 
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -527,8 +596,7 @@ int TrimSeqsCommand::execute(){
 }
                
 /**************************************************************************************/
-
-int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {     
+int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {        
                
        try {
                
@@ -552,9 +620,16 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        m->openOutputFile(scrapNFileName, scrapNameFile);
                }
                
+        ofstream trimCountFile;
+               ofstream scrapCountFile;
+               if(countfile != ""){
+                       m->openOutputFile(trimCFileName, trimCountFile);
+                       m->openOutputFile(scrapCFileName, scrapCountFile);
+            if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+               }
                
                ofstream outGroupsFile;
-               if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
+               if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
                if(allFiles){
                        for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
@@ -591,14 +666,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                
                        if (m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (createGroup) {       outGroupsFile.close();   }
-
-                               if(qFileName != ""){
-                                       qFile.close();
-                               }
-                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
-
-                               return 0;
+                               if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
+                if(qFileName != "")    {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
+                if(nameFile != "")     {       scrapNameFile.close(); trimNameFile.close();    }
+                if(countfile != "")    {       scrapCountFile.close(); trimCountFile.close();  }
+                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
                        }
                        
                        int success = 1;
@@ -720,6 +792,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
+                    
+                    int numRedundants = 0;
+                    if (countfile != "") {
+                        map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
+                        if (itCount != nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                            numRedundants = itCount->second-1;
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
+                    }
                                        
                                        if (createGroup) {
                                                if(barcodes.size() != 0){
@@ -736,9 +817,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        
                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
                             
-                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+                            else {   groupMap[currSeq.getName()] = thisGroup; }
                                                        
-                            int numRedundants = 0;
                                                        if (nameFile != "") {
                                                                map<string, string>::iterator itName = nameMap.find(currSeq.getName());
                                                                if (itName != nameMap.end()) { 
@@ -786,6 +867,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
                                        }
+                    if (countfile != "") {
+                        map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
+                        if (itCount != nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                        }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
+                    }
+                    
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);
@@ -819,6 +907,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                if (createGroup) {       outGroupsFile.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
                if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
+        if(countfile != "")    {       scrapCountFile.close(); trimCountFile.close();  }
                
                return count;
        }
@@ -830,7 +919,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
 
 /**************************************************************************************************/
 
-int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
+int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
        try {
         
         int process = 1;
@@ -881,6 +970,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                                                 (scrapQualFileName + toString(getpid()) + ".temp"),
                                                                 (trimNameFileName + toString(getpid()) + ".temp"),
                                                                 (scrapNameFileName + toString(getpid()) + ".temp"),
+                                 (trimCountFileName + toString(getpid()) + ".temp"),
+                                                                (scrapCountFileName + toString(getpid()) + ".temp"),
                                                                 (groupFile + toString(getpid()) + ".temp"),
                                                                 tempFASTAFileNames,
                                                                 tempPrimerQualFileNames,
@@ -901,6 +992,11 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                        for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
                                                out << it->first << '\t' << it->second << endl;
                                        }
+                    
+                    out << groupMap.size() << endl;
+                    for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
+                                               out << it->first << '\t' << it->second << endl;
+                                       }
                                        out.close();
                                }
                                exit(0);
@@ -923,8 +1019,12 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->openOutputFile(trimNameFileName, temp);              temp.close();
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
+        if (countfile != "") {
+                       m->openOutputFile(trimCountFileName, temp);             temp.close();
+                       m->openOutputFile(scrapCountFileName, temp);            temp.close();
+               }
 
-               driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
+               driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -974,13 +1074,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
 
             
                        trimData* tempTrim = new trimData(filename,
-                                              qFileName, nameFile,
+                                              qFileName, nameFile, countfile,
                                               (trimFASTAFileName+extension),
                                               (scrapFASTAFileName+extension),
                                               (trimQualFileName+extension),
                                               (scrapQualFileName+extension),
                                               (trimNameFileName+extension),
                                               (scrapNameFileName+extension),
+                                              (trimCountFileName+extension),
+                                              (scrapCountFileName+extension),
                                               (groupFile+extension),
                                               tempFASTAFileNames,
                                               tempPrimerQualFileNames,
@@ -989,7 +1091,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
-                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
+                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
                        hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
@@ -1008,7 +1110,7 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                        m->openOutputFile(scrapNameFileName, temp);             temp.close();
                }
         
-               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
+               driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
         processIDS.push_back(processors-1);
 
         
@@ -1022,6 +1124,11 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                 if (it2 == groupCounts.end()) {        groupCounts[it->first] = it->second; }
                 else { groupCounts[it->first] += it->second; }
             }
+            for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
+                map<string, string>::iterator it2 = groupMap.find(it->first);
+                if (it2 == groupMap.end()) {   groupMap[it->first] = it->second; }
+                else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
+            }
             CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
@@ -1052,8 +1159,15 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
                                m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
                        }
+            
+            if(countfile != ""){
+                               m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
+                               m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
+                               m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
+                               m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
+                       }
                        
-                       if(createGroup){
+                       if((createGroup)&&(countfile == "")){
                                m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
                                m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
                        }
@@ -1091,14 +1205,27 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                in >> tempNum; m->gobble(in);
                                
                                if (tempNum != 0) {
-                                       while (!in.eof()) { 
-                                               in >> group >> tempNum; m->gobble(in);
+                                       for (int i = 0; i < tempNum; i++) { 
+                        int groupNum;
+                                               in >> group >> groupNum; m->gobble(in);
                         
                                                map<string, int>::iterator it = groupCounts.find(group);
-                                               if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
-                                               else { groupCounts[it->first] += tempNum; }
+                                               if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
+                                               else { groupCounts[it->first] += groupNum; }
                                        }
                                }
+                in >> tempNum; m->gobble(in);
+                if (tempNum != 0) {
+                                       for (int i = 0; i < tempNum; i++) { 
+                        string group, seqName;
+                                               in >> seqName >> group; m->gobble(in);
+                        
+                                               map<string, string>::iterator it = groupMap.find(seqName);
+                                               if (it == groupMap.end()) {     groupMap[seqName] = group; }
+                                               else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
+                                       }
+                               }
+                
                                in.close(); m->mothurRemove(tempFile);
                        }
             #endif
@@ -1387,6 +1514,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        string fastaFileName = "";
                                        string qualFileName = "";
                                        string nameFileName = "";
+                    string countFileName = "";
                                        
                                        if(primerName == ""){
                                                comboGroupName = barcodeNameVector[itBar->second];
@@ -1433,7 +1561,6 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                                nameFileNames[itBar->second][itPrimer->second] = nameFileName;
                                                m->openOutputFile(nameFileName, temp);          temp.close();
                                        }
-                                       
                                }
                        }
                }
index 957f37a65bfb1c177f000c9c6d031ddfc3784c81..8d9a57a54ffd0db053956200602f3aa06fb4de5b 100644 (file)
@@ -14,8 +14,8 @@
 #include "command.hpp"
 #include "sequence.hpp"
 #include "qualityscores.h"
-#include "groupmap.h"
 #include "trimoligos.h"
+#include "counttable.h"
 
 
 class TrimSeqsCommand : public Command {
@@ -36,16 +36,13 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       
-       GroupMap* groupMap;
-    
     struct linePair {
         unsigned long long start;
         unsigned long long end;
         linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
         linePair() {}
     };
-
+    
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
@@ -55,7 +52,7 @@ private:
     string reverseOligo(string);
 
        bool abort, createGroup;
-       string fastaFile, oligoFile, qFileName, groupfile, nameFile, outputDir;
+       string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
        bool flip, allFiles, qtrim, keepforward;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
@@ -75,13 +72,15 @@ private:
        vector<string> barcodeNameVector;       //needed here?
        map<string, int> groupCounts;  
        map<string, string> nameMap;
+    map<string, int> nameCount; //for countfile name -> repCount
+    map<string, string> groupMap; //for countfile name -> group
 
        vector<int> processIDS;   //processid
        vector<linePair> lines;
        vector<linePair> qLines;
        
-       int driverCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
-       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
+       int driverCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >, linePair, linePair);    
+       int createProcessesCreateTrim(string, string, string, string, string, string, string, string, string, string, string, vector<vector<string> >, vector<vector<string> >, vector<vector<string> >);
        int setLines(string, string);
 };
 
@@ -92,7 +91,7 @@ private:
 struct trimData {
     unsigned long long start, end;
     MothurOut* m;
-    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, groupFileName, nameFile;
+    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
        vector<vector<string> > fastaFileNames;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
@@ -105,6 +104,7 @@ struct trimData {
        map<string, int> barcodes;
     map<string, int> rbarcodes;
        map<string, int> primers;
+    map<string, int> nameCount;
     vector<string>  linker;
     vector<string>  spacer;
        map<string, int> combos;
@@ -112,22 +112,26 @@ struct trimData {
        vector<string> barcodeNameVector;       
        map<string, int> groupCounts;  
        map<string, string> nameMap;
+    map<string, string> groupMap;
     
        trimData(){}
-       trimData(string fn, string qn, string nf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
+       trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
                       int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, map<string, int> rbar, vector<string> revP, vector<string> li, vector<string> spa, 
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
-                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm) {
+                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
         nameFile = nf;
+        countfile = cf;
         trimFileName = tn;
         scrapFileName = sn;
         trimQFileName = tqn;
         scrapQFileName = sqn;
         trimNFileName = tnn;
         scrapNFileName = snn;
+        trimCFileName = tcn;
+        scrapCFileName = scn;
         groupFileName = gn;
         fastaFileNames = ffn;
         qualFileNames = qfn;
@@ -137,6 +141,7 @@ struct trimData {
         qlineStart = qstart;
         qlineEnd = qend;
                m = mout;
+        nameCount = ncount;
         
         pdiffs = pd;
         bdiffs = bd;
@@ -203,7 +208,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                
                
                ofstream outGroupsFile;
-               if (pDataArray->createGroup){   pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
+               if ((pDataArray->createGroup) && (pDataArray->countfile == "")){        pDataArray->m->openOutputFile(pDataArray->groupFileName, outGroupsFile);   }
                if(pDataArray->allFiles){
                        for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file
                                for (int j = 0; j < pDataArray->fastaFileNames[i].size(); j++) { //clears old file
@@ -222,6 +227,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        }
                }
                
+        ofstream trimCountFile;
+               ofstream scrapCountFile;
+               if(pDataArray->countfile != ""){
+                       pDataArray->m->openOutputFile(pDataArray->trimCFileName, trimCountFile);
+                       pDataArray->m->openOutputFile(pDataArray->scrapCFileName, scrapCountFile);
+            if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
+               }
+        
                ifstream inFASTA;
                pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
                if ((pDataArray->lineStart == 0) || (pDataArray->lineStart == 1)) {
@@ -248,7 +261,11 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                   
                        if (pDataArray->m->control_pressed) { 
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
-                               if (pDataArray->createGroup) {   outGroupsFile.close();   }
+                               if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
+                if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
+                if(pDataArray->nameFile != "") {       scrapNameFile.close(); trimNameFile.close();    }
+                if(pDataArray->countfile != "")        {       scrapCountFile.close(); trimCountFile.close();  }
+
                                if(pDataArray->qFileName != ""){ qFile.close(); }
                                return 0;
                        }
@@ -399,6 +416,15 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
                                        
+                    int numRedundants = 0;
+                    if (pDataArray->countfile != "") {
+                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                        if (itCount != pDataArray->nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                            numRedundants = itCount->second-1;
+                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                    }
+                                       
                                        if (pDataArray->createGroup) {
                                                if(pDataArray->barcodes.size() != 0){
                                                        string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
@@ -412,9 +438,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                                } 
                                                        }
                                                        
-                                                       outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
+                            else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
                                                        
-                            int numRedundants = 0;
                                                        if (pDataArray->nameFile != "") {
                                                                map<string, string>::iterator itName = pDataArray->nameMap.find(currSeq.getName());
                                                                if (itName != pDataArray->nameMap.end()) { 
@@ -462,6 +488,12 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                                if (itName != pDataArray->nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
                                                else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); }
                                        }
+                    if (pDataArray->countfile != "") {
+                        map<string, int>::iterator itCount = pDataArray->nameCount.find(currSeq.getName());
+                        if (itCount != pDataArray->nameCount.end()) { 
+                            trimCountFile << itCount->first << '\t' << itCount->second << endl;
+                        }else { pDataArray->m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); pDataArray->m->mothurOutEndLine(); }
+                    }
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
                                        currSeq.setUnaligned(origSeq);
                                        currSeq.setAligned(origSeq);