]> git.donarmstrong.com Git - mothur.git/commitdiff
added primer.design command. fixed bug with linux unifrac subsampling, metastats...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 25 Jan 2013 20:46:33 +0000 (15:46 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 25 Jan 2013 20:46:33 +0000 (15:46 -0500)
29 files changed:
Mothur.xcodeproj/project.pbxproj
classifyseqscommand.cpp
commandfactory.cpp
consensusseqscommand.cpp
countseqscommand.cpp
counttable.cpp
engine.cpp
flowdata.cpp
flowdata.h
groupmap.cpp
makecontigscommand.cpp
makecontigscommand.h
matrixoutputcommand.cpp
metastatscommand.cpp
mothurout.cpp
mothurout.h
newcommandtemplate.cpp
primerdesigncommand.cpp [new file with mode: 0644]
primerdesigncommand.h [new file with mode: 0644]
qualityscores.cpp
qualityscores.h
screenseqscommand.cpp
sequence.cpp
sequence.hpp
summarysharedcommand.cpp
treegroupscommand.cpp
unifracunweightedcommand.cpp
unifracweightedcommand.cpp
venn.cpp

index ea4f7edffad3749bd6c87275a3f94d5d61ce8264..6fed2b9d42591e6709f9316961652e61c725b813 100644 (file)
@@ -39,6 +39,7 @@
                A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
                A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
                A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
+               A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; };
                A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
                A74D59A4159A1E2000043046 /* counttable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D59A3159A1E2000043046 /* counttable.cpp */; };
                A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; };
                A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kruskalwalliscommand.h; sourceTree = "<group>"; };
                A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = "<group>"; };
                A74A9A9E148E881E00AB5E3E /* spline.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = spline.cpp; sourceTree = "<group>"; };
+               A74C06E616A9C097008390A3 /* primerdesigncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = primerdesigncommand.h; sourceTree = "<group>"; };
+               A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = primerdesigncommand.cpp; sourceTree = "<group>"; };
                A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
                A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
                A74D59A3159A1E2000043046 /* counttable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = counttable.cpp; sourceTree = "<group>"; };
                                A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */,
                                A7E9B79812D37EC400DA6239 /* preclustercommand.h */,
                                A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */,
+                               A74C06E616A9C097008390A3 /* primerdesigncommand.h */,
+                               A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */,
                                A7E9B7A212D37EC400DA6239 /* quitcommand.h */,
                                A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */,
                                A7E9B7AC12D37EC400DA6239 /* rarefactcommand.h */,
                                834D9D5C1656DEC800E7FAB9 /* regularizeddecisiontree.cpp in Sources */,
                                A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */,
                                A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */,
+                               A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 6ec2d3886a218405f8132a03fe1c59e0b8c29c20..114036b2e89ea47a6a3aed1c9e08b514835ccd24 100644 (file)
@@ -782,7 +782,7 @@ int ClassifySeqsCommand::execute(){
                        }
 #endif
                        
-                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
+                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
                 outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
             }else { m->mothurRemove(newaccnosFile); }
 
index c8421d29c513f65bfe8684fe10a09e8c439d7915..ce51d7228b415228da4c6c15121001989d03b75a 100644 (file)
 #include "sffmultiplecommand.h"
 #include "classifysharedcommand.h"
 #include "filtersharedcommand.h"
+#include "primerdesigncommand.h"
 
 /*******************************************************/
 
@@ -297,6 +298,7 @@ CommandFactory::CommandFactory(){
        commands["quit"]                                = "MPIEnabled"; 
     commands["classify.shared"]                = "classify.shared"; 
     commands["filter.shared"]          = "filter.shared"; 
+    commands["primer.design"]          = "primer.design"; 
     
 
 }
@@ -513,6 +515,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "sff.multiple")          {      command = new SffMultipleCommand(optionString);             }
         else if(commandName == "classify.shared")       {      command = new ClassifySharedCommand(optionString);          }
         else if(commandName == "filter.shared")         {      command = new FilterSharedCommand(optionString);            }
+        else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -670,6 +673,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "sff.multiple")          {      pipecommand = new SffMultipleCommand(optionString);             }
         else if(commandName == "classify.shared")       {      pipecommand = new ClassifySharedCommand(optionString);          }
         else if(commandName == "filter.shared")         {      pipecommand = new FilterSharedCommand(optionString);            }
+        else if(commandName == "primer.design")         {      pipecommand = new PrimerDesignCommand(optionString);            }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -813,6 +817,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "sff.multiple")          {      shellcommand = new SffMultipleCommand();            }
         else if(commandName == "classify.shared")       {      shellcommand = new ClassifySharedCommand();         }
         else if(commandName == "filter.shared")         {      shellcommand = new FilterSharedCommand();           }
+        else if(commandName == "primer.design")         {      shellcommand = new PrimerDesignCommand();           }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index f0fc6bff9da6d6d8b0ed8b368ad2422f25cd732a..9bfdc9caba1e6bef2fe14d6eb469af2218bd29d8 100644 (file)
@@ -219,6 +219,8 @@ int ConsensusSeqsCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
+        int start = time(NULL);
+        
                readFasta();
                
                if (m->control_pressed) { return 0; }
@@ -391,6 +393,8 @@ int ConsensusSeqsCommand::execute(){
                        delete input;
                }
                
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the consensus sequences.");
+        
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
index 3bafbcd71752b48e675e0d85fcf037cc896056dc..dfa012eeff0f0f49c778c00b2098348a489f1855 100644 (file)
@@ -236,7 +236,11 @@ int CountSeqsCommand::processSmall(string outputFileName){
                        
                        string firstCol, secondCol;
                        in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
-                       
+            //cout << firstCol << '\t' << secondCol << endl;
+            m->checkName(firstCol);
+            m->checkName(secondCol);
+                       //cout << firstCol << '\t' << secondCol << endl;
+           
                        vector<string> names;
                        m->splitAtChar(secondCol, names, ',');
                        
@@ -435,6 +439,8 @@ map<int, string> CountSeqsCommand::processNameFile(string name) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    m->checkName(firstCol);
+                    m->checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     m->splitAtComma(secondCol, theseNames);
@@ -456,6 +462,8 @@ map<int, string> CountSeqsCommand::processNameFile(string name) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    m->checkName(firstCol);
+                    m->checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     m->splitAtComma(secondCol, theseNames);
@@ -507,6 +515,7 @@ map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& n
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    m->checkName(firstCol);
                     it = groupIndex.find(secondCol);
                     if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
                         groupIndex[secondCol] = count;
@@ -529,6 +538,7 @@ map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& n
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    m->checkName(firstCol);
                     it = groupIndex.find(secondCol);
                     if (it == groupIndex.end()) { //add group, assigning the group and number so we can use vectors above
                         groupIndex[secondCol] = count;
index 2ab0e345dbd110cc5c0402d0caa36becc1820bd2..ad0b2dadfcba7eb2f866b44d3d9b3011242b096a 100644 (file)
@@ -131,6 +131,9 @@ int CountTable::createTable(string namefile, string groupfile, bool createGroup)
             string firstCol, secondCol;
             in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
             
+            m->checkName(firstCol);
+            m->checkName(secondCol);
+            
             vector<string> names;
             m->splitAtChar(secondCol, names, ',');
             
index 48f782ab2a590d71a292e7b83125ba8ce0f3fffe..e4e107168c5723c82b90a7f103ccb8d9ee83cb60 100644 (file)
@@ -65,7 +65,9 @@ bool InteractEngine::getInput(){
                                if (pid == 0) {
                                
                        #endif
-                       
+                    
+                       if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'.  I changed them to '_' to avoid problems in your downstream analysis.\n"); }
+                    
                        mout->mothurOutEndLine();
                        
                        input = getCommand();   
@@ -115,6 +117,7 @@ bool InteractEngine::getInput(){
                                        //cout << pid << " is in execute " << commandName << endl;
                                        #endif
                                        //executes valid command
+                    mout->changedSeqNames = false;
                                        mout->runParse = true;
                                        mout->clearGroups();
                                        mout->clearAllGroups();
@@ -276,7 +279,7 @@ bool BatchEngine::getInput(){
                        
                        
                        if (input[0] != '#') {
-                               
+                               if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'.  I changed them to '_' to avoid problems in your downstream analysis.\n"); }
                                mout->mothurOutEndLine();
                                mout->mothurOut("mothur > " + input);
                                mout->mothurOutEndLine();
@@ -300,6 +303,7 @@ bool BatchEngine::getInput(){
                                                if ((cFactory->MPIEnabled(commandName)) || (pid == 0)) {
                                        #endif
                                        //executes valid command
+                    mout->changedSeqNames = false;
                                        mout->runParse = true;
                                        mout->clearGroups();
                                        mout->clearAllGroups();
@@ -413,6 +417,8 @@ bool ScriptEngine::getInput(){
                        input = getNextCommand(listOfCommands); 
                        
                        if (input == "") { input = "quit()"; }
+                    
+            if (mout->changedSeqNames) { mout->mothurOut("[WARNING]: your sequence names contained ':'.  I changed them to '_' to avoid problems in your downstream analysis.\n"); }
                        
                        if (mout->gui) {
                                if ((input.find("quit") != string::npos) || (input.find("set.logfile") != string::npos)) {}
@@ -468,6 +474,7 @@ bool ScriptEngine::getInput(){
                                                        //cout << pid << " is in execute" << endl;      
                                        #endif
                                        //executes valid command
+                    mout->changedSeqNames = false;
                                        mout->runParse = true;
                                        mout->clearGroups();
                                        mout->clearAllGroups();
index 1fe7d7faf1e31ee486890980c4731ddebfdca8fb..5dc7dc3e6c780171f1ee5cacdb1d766864f18a18 100644 (file)
@@ -42,15 +42,14 @@ FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP, string
 bool FlowData::getNext(ifstream& flowFile){
        
        try {
-               flowFile >> seqName >> endFlow; 
-        if (seqName.length() != 0) {
-            //cout << "in Flowdata " + seqName << endl;
+        seqName = getSequenceName(flowFile);
+               flowFile >> endFlow;    
+        if (!m->control_pressed) {
             for(int i=0;i<numFlows;i++)        {       flowFile >> flowData[i];        }
-            //cout << "in Flowdata read " << seqName + " done" << endl;
             updateEndFlow(); 
             translateFlow();
             m->gobble(flowFile);
-               }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
             
                if(flowFile){   return 1;       }
                else            {       return 0;       }
@@ -61,6 +60,26 @@ bool FlowData::getNext(ifstream& flowFile){
        }
        
 }
+//********************************************************************************************************************
+string FlowData::getSequenceName(ifstream& flowFile) {
+       try {
+               string name = "";
+               
+        flowFile >> name;
+               
+               if (name.length() != 0) { 
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+        }else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "FlowData", "getSequenceName");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 
index 100765370ca5429e75b8688a6abc51d79f17edc5..c7fd08a0a0100841e2805062ae5f44f7e5821d6d 100644 (file)
@@ -38,6 +38,7 @@ private:
        string seqName, locationString, sequence, baseFlow;
        int numFlows, maxFlows, endFlow;
        vector<float> flowData;
+    string getSequenceName(ifstream&);
 };
 
 #endif
index fb2495c13fe8c09095221fbff3fb79c29c9d129a..e5d8427200aed0a61aaf04a0e7084ed0e17a0a78 100644 (file)
@@ -20,7 +20,6 @@
 
 /************************************************************/
  GroupMap::~GroupMap(){}
-
 /************************************************************/
 int GroupMap::readMap() {
     try {
@@ -45,6 +44,7 @@ int GroupMap::readMap() {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -69,7 +69,7 @@ int GroupMap::readMap() {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -114,7 +114,7 @@ int GroupMap::readDesignMap() {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -139,7 +139,7 @@ int GroupMap::readDesignMap() {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -188,7 +188,7 @@ int GroupMap::readMap(string filename) {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -213,7 +213,7 @@ int GroupMap::readMap(string filename) {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -261,7 +261,7 @@ int GroupMap::readDesignMap(string filename) {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -286,7 +286,7 @@ int GroupMap::readDesignMap(string filename) {
                     setNamesOfGroups(seqGroup);
                     
                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
-                    
+                    m->checkName(seqName);
                     it = groupmap.find(seqName);
                     
                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
@@ -325,7 +325,7 @@ string GroupMap::getGroup(string sequenceName) {
 
 void GroupMap::setGroup(string sequenceName, string groupN) {
        setNamesOfGroups(groupN);
-       
+       m->checkName(sequenceName);
        it = groupmap.find(sequenceName);
        
        if (it != groupmap.end()) {  m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
index 32e2d68a3d1ebe51e0acc8fa64b7a499324fd60f..42406998133eba4ef7c9a6e6cc0d049e3cdcbe19 100644 (file)
@@ -33,6 +33,7 @@ vector<string> MakeContigsCommand::setParameters(){
                CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "","",false,false); parameters.push_back(pgapextend);
         CommandParameter pthreshold("threshold", "Number", "", "40", "", "", "","",false,false); parameters.push_back(pthreshold);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
@@ -51,13 +52,14 @@ string MakeContigsCommand::getHelpString(){
                string helpString = "";
                helpString += "The make.contigs command reads a file, forward fastq file and a reverse fastq file or forward fasta and reverse fasta files and outputs new fasta.  It will also provide new quality files if the fastq or file parameter is used.\n";
         helpString += "If an oligos file is provided barcodes and primers will be trimmed, and a group file will be created.\n";
-               helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
+               helpString += "The make.contigs command parameters are ffastq, rfastq, oligos, format, tdiffs, bdiffs, ldiffs, sdiffs, pdiffs, align, match, mismatch, gapopen, gapextend, allfiles and processors.\n";
                helpString += "The ffastq and rfastq, file, or ffasta and rfasta parameters are required.\n";
         helpString += "The file parameter is 2 column file containing the forward fastq files in the first column and their matching reverse fastq files in the second column.  Mothur will process each pair and create a combined fasta and qual file with all the sequences.\n";
         helpString += "The ffastq and rfastq parameters are used to provide a forward fastq and reverse fastq file to process.  If you provide one, you must provide the other.\n";
         helpString += "The ffasta and rfasta parameters are used to provide a forward fasta and reverse fasta file to process.  If you provide one, you must provide the other.\n";
         helpString += "The fqfile and rqfile parameters are used to provide a forward quality and reverse quality files to process with the ffasta and rfasta parameters.  If you provide one, you must provide the other.\n";
-               helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
+               helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa or illumina, default=sanger.\n";
+        helpString += "The align parameter allows you to specify the alignment method to use.  Your options are: gotoh and needleman. The default is needleman.\n";
         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
@@ -317,6 +319,19 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        
                        align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
                        if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+            
+            format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
+            
+            if ((format != "sanger") && (format != "illumina") && (format != "solexa"))  { 
+                               m->mothurOut(format + " is not a valid format. Your format choices are sanger, solexa and illumina, aborting." ); m->mothurOutEndLine();
+                               abort=true;
+                       }
+            
+            //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
+            for (int i = -64; i < 65; i++) { 
+                char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
+                convertTable.push_back(temp);
+            }
         }
                
        }
@@ -1508,15 +1523,8 @@ fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
         if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
         if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
         
-        vector<int> qualScores;
-               int controlChar = int('!');
-               for (int i = 0; i < quality.length(); i++) { 
-                       int temp = int(quality[i]);
-                       temp -= controlChar;
-                       
-                       qualScores.push_back(temp);
-               }
-    
+        vector<int> qualScores = convertQual(quality);
+        
         read.name = name;
         read.sequence = sequence;
         read.scores = qualScores;
@@ -1912,6 +1920,34 @@ string MakeContigsCommand::reverseOligo(string oligo){
        }
 }
 //**********************************************************************************************************************
+vector<int> MakeContigsCommand::convertQual(string qual) {
+       try {
+               vector<int> qualScores;
+               
+               for (int i = 0; i < qual.length(); i++) { 
+            
+            int temp = 0;
+            temp = int(qual[i]);
+            if (format == "illumina") {
+                temp -= 64; //char '@'
+            }else if (format == "solexa") {
+                temp = int(convertTable[temp]); //convert to sanger
+                temp -= int('!'); //char '!'
+            }else {
+                temp -= int('!'); //char '!'
+            }
+                       qualScores.push_back(temp);
+               }
+               
+               return qualScores;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "convertQual");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
 
 
 
index 2732d68a959934425430853ac67b5c8414aeffd5..43105b8236371f6e2feb3c09140e9b71b8e721bf 100644 (file)
@@ -60,7 +60,7 @@ public:
     
 private:
     bool abort, allFiles, createGroup;
-    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file;
+    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format;
        float match, misMatch, gapOpen, gapExtend;
        int processors, longestBase, threshold, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
     vector<string> outputNames;
@@ -70,11 +70,13 @@ private:
     vector<string>  linker;
     vector<string>  spacer;
        vector<string> primerNameVector;        
-       vector<string> barcodeNameVector;       
+       vector<string> barcodeNameVector;
+       vector<char> convertTable;
     
        map<string, int> groupCounts; 
     map<string, string> groupMap;
     
+    vector<int> convertQual(string);
     fastqRead readFastq(ifstream&, bool&);
     vector< vector< vector<string> > > preProcessData(unsigned long int&);
     vector< vector<string> > readFileNames(string);
index d2c29bdee9b6fc84481f89775f497b3165b72b4b..3844e18c1e687ef47fae1dda09d10e61973ab798 100644 (file)
@@ -691,70 +691,10 @@ int MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
                
         if (iters != 0) {
             //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals, mode);
             
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(matrixCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            if (mode == "average") {
-                for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                    for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                        for (int j = 0; j < calcAverages[i].size(); j++) {
-                            calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                            if (m->debug) {  m->mothurOut("[DEBUG]: Totaling for average calc: iter = " + toString(thisIter) + ", " + thisLookup[calcDistsTotals[thisIter][i][j].seq1]->getGroup() + " - " + thisLookup[calcDistsTotals[thisIter][i][j].seq2]->getGroup() + " distance = " + toString(calcDistsTotals[thisIter][i][j].dist) + ". New total = " + toString(calcAverages[i][j].dist) + ".\n");  }
-                        }
-                    }
-                }
-                
-                for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist /= (float) iters;
-                    }
-                }
-            }else { //find median
-                for (int i = 0; i < calcAverages.size(); i++) { //for each calc
-                    for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
-                        vector<double> dists;
-                        for (int thisIter = 0; thisIter < iters; thisIter++) { //for each subsample
-                            dists.push_back(calcDistsTotals[thisIter][i][j].dist);
-                        }
-                        sort(dists.begin(), dists.end());
-                        calcAverages[i][j].dist = dists[(iters/2)];
-                    }
-                }
-            }
             //find standard deviation
-            vector< vector<seqDist>  > stdDev; stdDev.resize(matrixCalculators.size());
-            for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
-                stdDev[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].seq1 = calcDistsTotals[0][i][j].seq1;
-                    stdDev[i][j].seq2 = calcDistsTotals[0][i][j].seq2;
-                    stdDev[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-                for (int i = 0; i < stdDev.size(); i++) {  
-                    for (int j = 0; j < stdDev[i].size(); j++) {
-                        stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
-                    }
-                }
-            }
-
-            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].dist /= (float) iters;
-                    stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
-                }
-            }
+            vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages);
             
             //print results
             for (int i = 0; i < calcDists.size(); i++) {
index 3eaee9652fa230281e47d77bfcf0430b3be4afa5..8e61b37ee0e5639c76d3adf3f23ae0b4eefd234f 100644 (file)
@@ -65,7 +65,7 @@ string MetaStatsCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "metastats") {  pattern = "[filename],[distance],[groups],metastats"; } 
+        if (type == "metastats") {  pattern = "[filename],[distance],[group],metastats"; } 
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
index 240e7ec2e5ac24ec3e637b78ae28dd7608a0389d..54cdd33bbe72cfecb241ed0d24d537bf4654af06 100644 (file)
@@ -1606,6 +1606,7 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
                     //are there confidence scores, if so remove them
                     if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
                     map<string, string>::iterator itTax = taxMap.find(firstCol);
@@ -1633,6 +1634,7 @@ int MothurOut::readTax(string namefile, map<string, string>& taxMap) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
                     //are there confidence scores, if so remove them
                     if (secondCol.find_first_of('(') != -1) {  removeConfidences(secondCol);   }
                     map<string, string>::iterator itTax = taxMap.find(firstCol);
@@ -1684,6 +1686,9 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1702,10 +1707,13 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, bool red
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
-                    for (int i = 0; i < theseNames.size(); i++) {  nameMap[theseNames[i]] = firstCol;  }
+                    for (int i = 0; i < theseNames.size(); i++) {   nameMap[theseNames[i]] = firstCol;  }
                     pairDone = false; 
                 }
             }  
@@ -1743,6 +1751,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     nameMap[secondCol] = firstCol;
                     pairDone = false; 
                 }
@@ -1758,6 +1768,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, int flip
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     nameMap[secondCol] = firstCol;
                     pairDone = false; 
                 }
@@ -1797,6 +1809,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1816,6 +1830,8 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap, map<stri
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     //parse names into vector
                     vector<string> theseNames;
                     splitAtComma(secondCol, theseNames);
@@ -1857,7 +1873,10 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
             }
                }
                in.close();
@@ -1869,7 +1888,10 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                 if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
-                if (pairDone) { nameMap[firstCol] = secondCol; pairDone = false; }
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    nameMap[firstCol] = secondCol; pairDone = false; }
             }
         }
                
@@ -1905,6 +1927,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     vector<string> temp;
                     splitAtComma(secondCol, temp);
                     nameMap[firstCol] = temp;
@@ -1922,6 +1946,8 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     vector<string> temp;
                     splitAtComma(secondCol, temp);
                     nameMap[firstCol] = temp;
@@ -1963,9 +1989,73 @@ map<string, int> MothurOut::readNames(string namefile) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
+               }
+        in.close();
+        
+        if (rest != "") {
+            vector<string> pieces = splitWhiteSpace(rest);
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
+                    int num = getNumNames(secondCol);
+                    nameMap[firstCol] = num;
+                    pairDone = false;  
+                } 
+            }
+        }
+               
+               return nameMap;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+map<string, int> MothurOut::readNames(string namefile, unsigned long int& numSeqs) { 
+       try {
+               map<string, int> nameMap;
+        numSeqs = 0;
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+               
+        string rest = "";
+        char buffer[4096];
+        bool pairDone = false;
+        bool columnOne = true;
+        string firstCol, secondCol;
+        
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+            in.read(buffer, 4096);
+            vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
+            
+            for (int i = 0; i < pieces.size(); i++) {
+                if (columnOne) {  firstCol = pieces[i]; columnOne=false; }
+                else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
+                
+                if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     nameMap[firstCol] = num;
                     pairDone = false;  
+                    numSeqs += num;
                 } 
             }
                }
@@ -1978,9 +2068,12 @@ map<string, int> MothurOut::readNames(string namefile) {
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     nameMap[firstCol] = num;
                     pairDone = false;  
+                    numSeqs += num;
                 } 
             }
         }
@@ -1993,6 +2086,19 @@ map<string, int> MothurOut::readNames(string namefile) {
                exit(1);
        }
 }
+/************************************************************/
+int MothurOut::checkName(string& name) {
+    try {
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+        }        
+        return 0;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "checkName");
+               exit(1);
+       }
+}
 /**********************************************************************************************************************/
 int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) { 
        try {
@@ -2019,6 +2125,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     
                     map<string, string>::iterator it = fastamap.find(firstCol);
@@ -2044,6 +2152,8 @@ int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, m
                 else  { secondCol = pieces[i]; pairDone = true; columnOne=true; }
                 
                 if (pairDone) { 
+                    checkName(firstCol);
+                    checkName(secondCol);
                     int num = getNumNames(secondCol);
                     
                     map<string, string>::iterator it = fastamap.find(firstCol);
@@ -2083,13 +2193,13 @@ set<string> MothurOut::readAccnos(string accnosfile){
             in.read(buffer, 4096);
             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
             
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  }
         }
                in.close();     
                
         if (rest != "") {
             vector<string> pieces = splitWhiteSpace(rest);
-            for (int i = 0; i < pieces.size(); i++) {  names.insert(pieces[i]);  } 
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.insert(pieces[i]);  } 
         }
                return names;
        }
@@ -2115,13 +2225,13 @@ int MothurOut::readAccnos(string accnosfile, vector<string>& names){
             in.read(buffer, 4096);
             vector<string> pieces = splitWhiteSpace(rest, buffer, in.gcount());
             
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
         }
                in.close();     
         
         if (rest != "") {
             vector<string> pieces = splitWhiteSpace(rest);
-            for (int i = 0; i < pieces.size(); i++) {  names.push_back(pieces[i]);  }
+            for (int i = 0; i < pieces.size(); i++) {  checkName(pieces[i]); names.push_back(pieces[i]);  }
         }
                
                return 0;
@@ -2997,6 +3107,180 @@ vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists,
                exit(1);
        }
 }
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals, string mode) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        if (mode == "average") {
+            for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+            }
+            
+            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+            }
+        }else { //find median
+            for (int i = 0; i < calcAverages.size(); i++) { //for each calc
+                for (int j = 0; j < calcAverages[i].size(); j++) {  //for each comparison
+                    vector<double> dists;
+                    for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //for each subsample
+                        dists.push_back(calcDistsTotals[thisIter][i][j].dist);
+                    }
+                    sort(dists.begin(), dists.end());
+                    calcAverages[i][j].dist = dists[(calcDistsTotals.size()/2)];
+                }
+            }
+        }
+
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getAverages(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist>  > calcAverages; //calcAverages.resize(calcDistsTotals[0].size()); 
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            //calcAverages[i].resize(calcDistsTotals[0][i].size());
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            calcAverages.push_back(temp);
+        }
+        
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //sum all groups dists for each calculator
+                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
+                    for (int j = 0; j < calcAverages[i].size(); j++) {
+                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
+                    }
+                }
+        }
+            
+        for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    calcAverages[i][j].dist /= (float) calcDistsTotals.size();
+                }
+        }
+        
+        return calcAverages;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals) {
+       try{
+        
+        vector< vector<seqDist> > calcAverages = getAverages(calcDistsTotals);
+        
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector< vector<seqDist> > MothurOut::getStandardDeviation(vector< vector< vector<seqDist> > >& calcDistsTotals, vector< vector<seqDist> >& calcAverages) {
+       try{
+        //find standard deviation
+        vector< vector<seqDist>  > stdDev;  
+        for (int i = 0; i < calcDistsTotals[0].size(); i++) {  //initialize sums to zero.
+            vector<seqDist> temp;
+            for (int j = 0; j < calcDistsTotals[0][i].size(); j++) {
+                seqDist tempDist;
+                tempDist.seq1 = calcDistsTotals[0][i][j].seq1;
+                tempDist.seq2 = calcDistsTotals[0][i][j].seq2;
+                tempDist.dist = 0.0;
+                temp.push_back(tempDist);
+            }
+            stdDev.push_back(temp);
+        }
+        
+        for (int thisIter = 0; thisIter < calcDistsTotals.size(); thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+            for (int i = 0; i < stdDev.size(); i++) {  
+                for (int j = 0; j < stdDev[i].size(); j++) {
+                    stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
+                }
+            }
+        }
+        
+        for (int i = 0; i < stdDev.size(); i++) {  //finds average.
+            for (int j = 0; j < stdDev[i].size(); j++) {
+                stdDev[i][j].dist /= (float) calcDistsTotals.size();
+                stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
+            }
+        }
+        
+        return stdDev;
+    }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "getAverages");                
+               exit(1);
+       }
+}
+
 /**************************************************************************************************/
 bool MothurOut::isContainingOnlyDigits(string input) {
        try{
index 84f9be31f82ebc2c27aa19563fb5366519e1cba8..ffdcaf63d704d087eccf27892bf6797765955a0e 100644 (file)
@@ -69,7 +69,7 @@ class MothurOut {
                vector<string> binLabelsInFile;
                vector<string> currentBinLabels;
                string saveNextLabel, argv, sharedHeaderMode, groupMode;
-               bool printedHeaders, commandInputsConvertError;
+               bool printedHeaders, commandInputsConvertError, changedSeqNames;
                
                //functions from mothur.h
                //file operations
@@ -102,6 +102,7 @@ class MothurOut {
         set<string> readAccnos(string);
         int readAccnos(string, vector<string>&);
         map<string, int> readNames(string);
+        map<string, int> readNames(string, unsigned long int&);
         int readTax(string, map<string, string>&);
         int readNames(string, map<string, string>&, map<string, int>&);
                int readNames(string, map<string, string>&);
@@ -146,6 +147,7 @@ class MothurOut {
         string removeQuotes(string);
         string makeList(vector<string>&);
         bool isSubset(vector<string>, vector<string>); //bigSet, subset
+        int checkName(string&);
                
                //math operation
                int factorial(int num);
@@ -158,6 +160,10 @@ class MothurOut {
         vector<double> getStandardDeviation(vector< vector<double> >&);
         vector<double> getStandardDeviation(vector< vector<double> >&, vector<double>&);
         vector<double> getAverages(vector< vector<double> >&);
+        vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&);
+        vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&, vector< vector<seqDist> >&);
+        vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&, string);
+        vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&);
 
                int control_pressed;
                bool executing, runParse, jumble, gui, mothurCalling, debug;
@@ -252,6 +258,7 @@ class MothurOut {
             debug = false;
                        sharedHeaderMode = "";
             groupMode = "group";
+            changedSeqNames = false;
                }
                ~MothurOut();
 
index b2426f5c1bf0cf38306bea7bf756c01743a21f5a..3c893d8b44a04feddefb5ec18bebdc9350064656 100644 (file)
@@ -183,7 +183,7 @@ NewCommand::NewCommand(string option)  {
             
             ///variables for examples below that you will most likely want to put in the header for 
             //use by the other class functions.
-            string phylipfile, columnfile, namefile, fastafile, sharedfile, method;
+            string phylipfile, columnfile, namefile, fastafile, sharedfile, method, countfile;
             int processors;
             bool useTiming, allLines;
             vector<string> Estimators, Groups;
@@ -304,10 +304,13 @@ NewCommand::NewCommand(string option)  {
             //saved by mothur that is associated with the other files you are using as inputs.  
             //You can do so by adding the files associated with the namefile to the files vector and then asking parser to check.  
             //This saves our users headaches over file mismatches because they forgot to include the namefile, :)
-            if (namefile == "") {
-                               vector<string> files; files.push_back(fastafile);
-                               parser.getNameFile(files);
-                       }
+            if (countfile == "") { 
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
+
                        
                }
                
diff --git a/primerdesigncommand.cpp b/primerdesigncommand.cpp
new file mode 100644 (file)
index 0000000..f6ec7b5
--- /dev/null
@@ -0,0 +1,1239 @@
+//
+//  primerdesigncommand.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/18/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "primerdesigncommand.h"
+
+//**********************************************************************************************************************
+vector<string> PrimerDesignCommand::setParameters(){   
+       try {
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","summary-list",false,true,true); parameters.push_back(plist);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,true, true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
+        CommandParameter plength("length", "Number", "", "18", "", "", "","",false,false); parameters.push_back(plength);
+        CommandParameter pmintm("mintm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmintm);
+        CommandParameter pmaxtm("maxtm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxtm);
+        CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors);
+        CommandParameter potunumber("otunumber", "Number", "", "-1", "", "", "","",false,true,true); parameters.push_back(potunumber);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
+        CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string PrimerDesignCommand::getHelpString(){   
+       try {
+               string helpString = "";
+               helpString += "The primer.design allows you to design sequence fragments that are specific to particular OTUs.\n";
+               helpString += "The primer.design command parameters are: list, fasta, name, count, otunumber, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
+               helpString += "The list parameter allows you to provide a list file and is required.\n";
+        helpString += "The fasta parameter allows you to provide a fasta file and is required.\n";
+        helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
+        helpString += "The count parameter allows you to provide a count file associated with your fasta file.\n";
+        helpString += "The label parameter is used to indicate the label you want to use from your list file.\n";
+        helpString += "The otunumber parameter is used to indicate the otu you want to use from your list file. It is required.\n";
+        helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The length parameter is used to indicate the length of the primer. The default is 18.\n";
+        helpString += "The mintm parameter is used to indicate minimum melting temperature.\n";
+        helpString += "The maxtm parameter is used to indicate maximum melting temperature.\n";
+        helpString += "The processors parameter allows you to indicate the number of processors you want to use. Default=1.\n";
+        helpString += "The cutoff parameter allows you set a percentage of sequences that support the base. For example: cutoff=97 would only return a sequence that only showed ambiguities for bases that were not supported by at least 97% of sequences.\n";
+               helpString += "The primer.desing command should be in the following format: primer.design(list=yourListFile, fasta=yourFastaFile, name=yourNameFile)\n";
+               helpString += "primer.design(list=final.an.list, fasta=final.fasta, name=final.names, label=0.03)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string PrimerDesignCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "fasta") {  pattern = "[filename],[distance],otu.cons.fasta"; } 
+        else if (type == "summary") {  pattern = "[filename],[distance],primer.summary"; }
+        else if (type == "list") {  pattern = "[filename],pick,[extension]"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "PrimerDesignCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+PrimerDesignCommand::PrimerDesignCommand(){    
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["summary"] = tempOutNames; 
+        outputTypes["fasta"] = tempOutNames;
+        outputTypes["list"] = tempOutNames;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+PrimerDesignCommand::PrimerDesignCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+            vector<string> tempOutNames;
+            outputTypes["summary"] = tempOutNames; 
+            outputTypes["fasta"] = tempOutNames;
+            outputTypes["list"] = tempOutNames;
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+            }
+                        
+                       //check for parameters
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }   
+                       else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { countfile = ""; abort = true; }
+                       else if (countfile == "not found") { countfile = "";  } 
+                       else { m->setCountTableFile(countfile); }
+            
+            //get fastafile - it is required
+            fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { fastafile = ""; abort=true;  }
+                       else if (fastafile == "not found") {  
+                fastafile = m->getFastaFile(); 
+                               if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+            }else  { m->setFastaFile(fastafile); }
+            
+            //get listfile - it is required
+            listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { listfile = ""; abort=true;  }
+                       else if (listfile == "not found") {  
+                listfile = m->getListFile(); 
+                               if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+            }else  { m->setListFile(listfile); }
+
+            
+                       if ((namefile != "") && (countfile != "")) {
+                m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+            }
+                       
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = m->hasPath(listfile); //if user entered a file with a path then preserve it 
+                       }
+            
+            string temp = validParameter.validFile(parameters, "cutoff", false);  if (temp == "not found") { temp = "100"; }
+                       m->mothurConvert(temp, cutoff); 
+            
+            temp = validParameter.validFile(parameters, "pdiffs", false);  if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs); 
+            
+            temp = validParameter.validFile(parameters, "length", false);  if (temp == "not found") { temp = "18"; }
+                       m->mothurConvert(temp, length); 
+            
+            temp = validParameter.validFile(parameters, "mintm", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, minTM); 
+            
+            temp = validParameter.validFile(parameters, "maxtm", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, maxTM); 
+            
+            temp = validParameter.validFile(parameters, "otunumber", false);  if (temp == "not found") { temp = "-1"; }
+                       m->mothurConvert(temp, otunumber); 
+            if (otunumber < 1) {  m->mothurOut("[ERROR]: You must provide an OTU number, aborting.\n"); abort = true; }
+            
+            temp = validParameter.validFile(parameters, "processors", false);  if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       m->mothurConvert(temp, processors);
+            
+            label = validParameter.validFile(parameters, "label", false);                      
+                       if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+        
+            if (countfile == "") { 
+                if (namefile == "") {
+                    vector<string> files; files.push_back(fastafile);
+                    parser.getNameFile(files);
+                }
+            }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "PrimerDesignCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PrimerDesignCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        int start = time(NULL);
+        //////////////////////////////////////////////////////////////////////////////
+        //              get file inputs                                             //
+        //////////////////////////////////////////////////////////////////////////////
+        
+        //reads list file and selects the label the users specified or the first label
+        getListVector();
+        if (otunumber > list->getNumBins()) { m->mothurOut("[ERROR]: You selected an OTU number larger than the number of OTUs you have in your list file, quitting.\n"); return 0; }
+        
+        map<string, int> nameMap;
+        unsigned long int numSeqs;  //used to sanity check the files. numSeqs = total seqs for namefile and uniques for count.
+                                    //list file should have all seqs if namefile was used to create it and only uniques in count file was used.
+        
+        if (namefile != "")         {  nameMap = m->readNames(namefile, numSeqs);       }
+        else if (countfile != "")   {  nameMap = readCount(numSeqs);                    }
+        else { numSeqs = list->getNumSeqs();  }
+        
+        //sanity check
+        if (numSeqs != list->getNumSeqs()) {
+            if (namefile != "")         {  m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your name file contains " + toString(numSeqs) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name file when you clustered? \n");   }
+            else if (countfile != "") {
+                m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your count file contains " + toString(numSeqs) + " unique sequences, aborting. Do you have the correct files? Perhaps you forgot to include the count file when you clustered? \n");  
+            }
+            m->control_pressed = true;
+        }
+        
+        if (m->control_pressed) { delete list; return 0; }
+        
+        //////////////////////////////////////////////////////////////////////////////
+        //              process data                                                //
+        //////////////////////////////////////////////////////////////////////////////
+        m->mothurOut("\nFinding consensus sequences for each otu..."); cout.flush();
+        
+        vector<Sequence> conSeqs = createProcessesConSeqs(nameMap, numSeqs);
+        
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[distance]"] = list->getLabel();
+        string consFastaFile = getOutputFileName("fasta", variables);
+        outputNames.push_back(consFastaFile); outputTypes["fasta"].push_back(consFastaFile);
+        ofstream out;
+        m->openOutputFile(consFastaFile, out);
+        for (int i = 0; i < conSeqs.size(); i++) {  conSeqs[i].printSequence(out);  }
+        out.close();
+        
+        m->mothurOut("Done.\n\n");
+        
+        set<string> primers = getPrimer(conSeqs[otunumber-1]);  
+        
+        if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
+        
+        string consSummaryFile = getOutputFileName("summary", variables);
+        outputNames.push_back(consSummaryFile); outputTypes["summary"].push_back(consSummaryFile);
+        ofstream outSum;
+        m->openOutputFile(consSummaryFile, outSum);
+        
+        outSum << "PrimerOtu: " << otunumber << " Members: " << list->get(otunumber-1) << endl << "Primers\tminTm\tmaxTm" << endl;
+        
+        //find min and max melting points
+        vector<double> minTms;
+        vector<double> maxTms;
+        string primerString = "";
+        for (set<string>::iterator it = primers.begin(); it != primers.end();) {
+            
+            double minTm, maxTm;
+            findMeltingPoint(*it, minTm, maxTm);
+            if ((minTM == -1) && (maxTM == -1)) { //user did not set min or max Tm so save this primer
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else if ((minTM == -1) && (maxTm <= maxTM)){ //user set max and no min, keep if below max
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else if ((maxTM == -1) && (minTm >= minTM)){ //user set min and no max, keep if above min
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else if ((maxTm <= maxTM) && (minTm >= minTM)) { //keep if above min and below max
+                minTms.push_back(minTm);
+                maxTms.push_back(maxTm);
+                outSum << *it << '\t' << minTm << '\t' << maxTm << endl;
+                it++;
+            }else { primers.erase(it++);  } //erase because it didn't qualify
+        }
+        
+        outSum << "\nOTUNumber\tPrimer\tStart\tEnd\tLength\tMismatches\tminTm\tmaxTm\n";
+        outSum.close();
+        
+        //check each otu's conseq for each primer in otunumber
+        set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs);
+        
+        if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
+        
+        //print new list file
+        map<string, string> mvariables; 
+        mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        mvariables["[extension]"] = m->getExtension(listfile);
+        string newListFile = getOutputFileName("list", mvariables);
+        outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
+        ofstream outList;
+        m->openOutputFile(newListFile, outList);
+        
+        outList << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+        for (int j = 0; j < list->getNumBins(); j++) {
+            if (m->control_pressed) { break; }
+            //good otus
+            if (otuToRemove.count(j) == 0) {  
+                string bin = list->get(j);
+                if (bin != "") {  outList << bin << '\t';  } 
+            }
+        }
+        outList << endl;
+        outList.close();
+        
+        if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
+        
+        delete list;
+        
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(list->getNumBins()) + " OTUs.\n");
+        
+        
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "execute");
+               exit(1);
+       }
+}
+//********************************************************************/
+//used http://www.biophp.org/minitools/melting_temperature/ as a reference to substitute degenerate bases 
+// in order to find the min and max Tm values.
+//Tm =  64.9°C + 41°C x (number of G’s and C’s in the primer â€“ 16.4)/N
+
+/* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+
+int PrimerDesignCommand::findMeltingPoint(string primer, double& minTm, double& maxTm){
+    try {
+        string minTmprimer = primer;
+        string maxTmprimer = primer;
+        
+        //find minimum Tm string substituting for degenerate bases
+        for (int i = 0; i < minTmprimer.length(); i++) {
+            minTmprimer[i] = toupper(minTmprimer[i]);
+            
+            if (minTmprimer[i] == 'Y') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'R') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'W') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'K') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'M') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'D') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'V') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'H') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'B') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'N') { minTmprimer[i] = 'A'; }
+            else if (minTmprimer[i] == 'S') { minTmprimer[i] = 'G'; }
+        }
+        
+        //find maximum Tm string substituting for degenerate bases
+        for (int i = 0; i < maxTmprimer.length(); i++) {
+            maxTmprimer[i] = toupper(maxTmprimer[i]);
+            
+            if (maxTmprimer[i] == 'Y') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'R') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'W') { maxTmprimer[i] = 'A'; }
+            else if (maxTmprimer[i] == 'K') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'M') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'D') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'V') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'H') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'B') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'N') { maxTmprimer[i] = 'G'; }
+            else if (maxTmprimer[i] == 'S') { maxTmprimer[i] = 'G'; }
+        }
+        
+        int numGC = 0;
+        for (int i = 0; i < minTmprimer.length(); i++) {
+            if (minTmprimer[i] == 'G')       { numGC++; }
+            else if (minTmprimer[i] == 'C')  { numGC++; }
+        }
+        
+        minTm = 64.9 + 41 * (numGC - 16.4) / (double) minTmprimer.length();
+        
+        numGC = 0;
+        for (int i = 0; i < maxTmprimer.length(); i++) {
+            if (maxTmprimer[i] == 'G')       { numGC++; }
+            else if (maxTmprimer[i] == 'C')  { numGC++; }
+        }
+        
+        maxTm = 64.9 + 41 * (numGC - 16.4) / (double) maxTmprimer.length();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "findMeltingPoint");
+               exit(1);
+       }
+}
+//********************************************************************/
+//search for a primer over the sequence string
+bool PrimerDesignCommand::findPrimer(string rawSequence, string primer, vector<int>& primerStart, vector<int>& primerEnd, vector<int>& mismatches){
+       try {
+        bool foundAtLeastOne = false;  //innocent til proven guilty
+        
+        //look for exact match
+        if(rawSequence.length() < primer.length()) {  return false;  }
+                       
+        //search for primer
+        for (int j = 0; j < rawSequence.length()-length; j++){
+            
+            if (m->control_pressed) {  return foundAtLeastOne; }
+            
+            string rawChunk = rawSequence.substr(j, length);
+            
+            int numDiff = countDiffs(primer, rawChunk);
+           
+            if(numDiff <= pdiffs){
+                primerStart.push_back(j);
+                primerEnd.push_back(j+length);
+                mismatches.push_back(numDiff);
+                foundAtLeastOne = true;
+            }
+        }
+               
+               return foundAtLeastOne;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "findPrimer");
+               exit(1);
+       }
+}
+//********************************************************************/
+//find all primers for the given sequence
+set<string> PrimerDesignCommand::getPrimer(Sequence primerSeq){
+       try {
+        set<string> primers;
+        
+        string rawSequence = primerSeq.getUnaligned();
+        
+        for (int j = 0; j < rawSequence.length()-length; j++){
+            if (m->control_pressed) { break; }
+            
+            string primer = rawSequence.substr(j, length);
+            primers.insert(primer);
+        }
+        
+        return primers;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getPrimer");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs) {
+       try {
+               
+               vector<int> processIDS;
+               int process = 1;
+        set<int> otusToRemove;
+        int numBinsProcessed = 0;
+               
+               //sanity check
+        int numBins = conSeqs.size();
+               if (numBins < processors) { processors = numBins; }
+               
+               //divide the otus between the processors
+               vector<linePair> lines;
+               int numOtusPerProcessor = numBins / processors;
+               for (int i = 0; i < processors; i++) {
+                       int startIndex =  i * numOtusPerProcessor;
+                       int endIndex = (i+1) * numOtusPerProcessor;
+                       if(i == (processors - 1)){      endIndex = numBins;     }
+                       lines.push_back(linePair(startIndex, endIndex));
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                //clear old file because we append in driver
+                m->mothurRemove(newSummaryFile + toString(getpid()) + ".temp");
+                
+                               otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed);
+                
+                string tempFile = toString(getpid()) + ".otus2Remove.temp";
+                ofstream outTemp;
+                m->openOutputFile(tempFile, outTemp);
+                
+                outTemp << numBinsProcessed << endl;
+                outTemp << otusToRemove.size() << endl;
+                for (set<int>::iterator it = otusToRemove.begin(); it != otusToRemove.end(); it++) { outTemp << *it << endl; }
+                outTemp.close();
+                
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+            string tempFile = toString(processIDS[i]) +  ".otus2Remove.temp";
+            ifstream intemp;
+            m->openInputFile(tempFile, intemp);
+            
+            int num;
+            intemp >> num; m->gobble(intemp);
+            if (num != (lines[i+1].end - lines[i+1].start)) { m->mothurOut("[ERROR]: process " + toString(processIDS[i]) + " did not complete processing all OTUs assigned to it, quitting.\n"); m->control_pressed = true; }
+            intemp >> num; m->gobble(intemp);
+            for (int k = 0; k < num; k++) {
+                int otu;
+                intemp >> otu; m->gobble(intemp);
+                otusToRemove.insert(otu); 
+            }
+            intemp.close();
+            m->mothurRemove(tempFile);
+        }
+
+        
+    #else
+               
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the primerDesignData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<primerDesignData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = toString(i) + ".temp";
+                       m->mothurRemove(newSummaryFile+extension);
+            
+                       primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, otunumber, length, i);
+                       pDataArray.push_back(tempPrimer);
+                       processIDS.push_back(i);
+                       
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyPrimerThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+               }
+               
+        
+               //using the main process as a worker saves time and memory
+               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+            for (set<int>::iterator it = pDataArray[i]->otusToRemove.begin(); it != pDataArray[i]->otusToRemove.end(); it++) { 
+                               otusToRemove.insert(*it);  
+                       }
+            int num = pDataArray[i]->numBinsProcessed;
+            if (num != (lines[processIDS[i]].end - lines[processIDS[i]].start)) { m->mothurOut("[ERROR]: process " + toString(processIDS[i]) + " did not complete processing all OTUs assigned to it, quitting.\n"); m->control_pressed = true; }
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+               
+#endif         
+               
+               //append output files
+               for(int i=0;i<processIDS.size();i++){
+                       m->appendFiles((newSummaryFile + toString(processIDS[i]) + ".temp"), newSummaryFile);
+                       m->mothurRemove((newSummaryFile + toString(processIDS[i]) + ".temp"));
+               }
+               
+               return otusToRemove;    
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed){
+       try {
+        set<int> otuToRemove;
+        
+        ofstream outSum;
+        m->openOutputFileAppend(summaryFileName, outSum);
+        
+        for (int i = start; i < end; i++) {
+        
+            if (m->control_pressed) { break; }
+            
+            if (i != (otunumber-1)) {
+                int primerIndex = 0;
+                for (set<string>::iterator it = primers.begin(); it != primers.end(); it++) {
+                    vector<int> primerStarts;
+                    vector<int> primerEnds;
+                    vector<int> mismatches;
+                    
+                    bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
+                    
+                    //if we found it report to the table
+                    if (found) {
+                        for (int j = 0; j < primerStarts.size(); j++) {
+                            outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << length << '\t' << mismatches[j] << '\t' << minTms[primerIndex] << '\t' << maxTms[primerIndex] << endl;
+                        }
+                        otuToRemove.insert(i);
+                    }
+                    primerIndex++;
+                }
+            }
+            numBinsProcessed++;
+        }
+        outSum.close();
+        
+        
+        return otuToRemove;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "driver");
+               exit(1);
+       }
+}
+/**************************************************************************************************/ 
+vector< vector< vector<unsigned int> > > PrimerDesignCommand::driverGetCounts(map<string, int>& nameMap, unsigned long int& fastaCount, vector<unsigned int>& otuCounts, unsigned long long& start, unsigned long long& end){
+    try {
+        vector< vector< vector<unsigned int> > > counts;
+        map<string, int> seq2Bin;
+        alignedLength = 0;
+        
+        ifstream in;
+               m->openInputFile(fastafile, in);
+        
+               in.seekg(start);
+        
+               bool done = false;
+               fastaCount = 0;
+        
+               while (!done) {
+            if (m->control_pressed) { in.close(); return counts; }
+            
+                       Sequence seq(in); m->gobble(in);
+            
+                       if (seq.getName() != "") {
+                if (fastaCount == 0) { alignedLength = seq.getAligned().length(); initializeCounts(counts, alignedLength, seq2Bin, nameMap, otuCounts); }
+                else if (alignedLength != seq.getAligned().length()) {
+                    m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned."); m->mothurOutEndLine(); m->control_pressed = true; break;
+                }
+                
+                int num = 1;
+                map<string, int>::iterator itCount;
+                if (namefile != "") { 
+                    itCount = nameMap.find(seq.getName());
+                    if (itCount == nameMap.end()) {  m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your name file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break; }
+                    else { num = itCount->second; }
+                    fastaCount+=num;
+                }else if (countfile != "") {
+                    itCount = nameMap.find(seq.getName());
+                    if (itCount == nameMap.end()) {  m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your count file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break; }
+                    else { num = itCount->second; }
+                    fastaCount++;
+                }else {
+                    fastaCount++;
+                }
+                
+                //increment counts
+                itCount = seq2Bin.find(seq.getName());
+                if (itCount == seq2Bin.end()) {
+                    if ((namefile != "") || (countfile != "")) {
+                        m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting. Perhaps you forgot to include your name or count file while clustering.\n"); m->mothurOutEndLine(); m->control_pressed = true; break;
+                    }else{
+                        m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your list file, aborting."); m->mothurOutEndLine(); m->control_pressed = true; break;
+                    }
+                }else {
+                    otuCounts[itCount->second] += num;
+                    string aligned = seq.getAligned();
+                    for (int i = 0; i < alignedLength; i++) {
+                        char base = toupper(aligned[i]);
+                        if (base == 'A') { counts[itCount->second][i][0]+=num; }
+                        else if (base == 'T') { counts[itCount->second][i][1]+=num; }
+                        else if (base == 'G') { counts[itCount->second][i][2]+=num; }
+                        else if (base == 'C') { counts[itCount->second][i][3]+=num; }
+                        else { counts[itCount->second][i][4]+=num; }
+                    }
+                }
+
+            }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+            unsigned long long pos = in.tellg();
+            if ((pos == -1) || (pos >= end)) { break; }
+#else
+            if (in.eof()) { break; }
+#endif
+               }
+                               
+               in.close();
+        
+        return counts;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "driverGetCounts");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+vector<Sequence> PrimerDesignCommand::createProcessesConSeqs(map<string, int>& nameMap, unsigned long int& numSeqs) {
+       try {
+        vector< vector< vector<unsigned int> > > counts;
+        vector<unsigned int> otuCounts;
+               vector<int> processIDS;
+               int process = 1;
+        unsigned long int fastaCount = 0;
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
+               
+        vector<unsigned long long> positions; 
+        vector<fastaLinePair> lines;
+        positions = m->divideFile(fastafile, processors);
+        for (int i = 0; i < (positions.size()-1); i++) {       lines.push_back(fastaLinePair(positions[i], positions[(i+1)])); }
+
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[process].start, lines[process].end);
+                
+                string tempFile = toString(getpid()) + ".cons_counts.temp";
+                ofstream outTemp;
+                m->openOutputFile(tempFile, outTemp);
+                
+                outTemp << fastaCount << endl;
+                //pass counts
+                outTemp << counts.size() << endl;
+                for (int i = 0; i < counts.size(); i++) { 
+                    outTemp << counts[i].size() << endl;
+                    for (int j = 0; j < counts[i].size(); j++) { 
+                        for (int k = 0; k < 5; k++) {  outTemp << counts[i][j][k] << '\t'; }
+                        outTemp << endl;
+                    }
+                }
+                //pass otuCounts
+                outTemp << otuCounts.size() << endl;
+                for (int i = 0; i < otuCounts.size(); i++) { outTemp << otuCounts[i] << '\t'; }
+                outTemp << endl;
+                outTemp.close();
+                
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               counts = driverGetCounts(nameMap, fastaCount, otuCounts, lines[0].start, lines[0].end);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+            string tempFile = toString(processIDS[i]) +  ".cons_counts.temp";
+            ifstream intemp;
+            m->openInputFile(tempFile, intemp);
+            
+            unsigned long int num;
+            intemp >> num; m->gobble(intemp); fastaCount += num;
+            intemp >> num; m->gobble(intemp);
+            if (num != counts.size()) { m->mothurOut("[ERROR]: " + tempFile + " was not built correctly by the child process, quitting.\n"); m->control_pressed = true; }
+            else {
+                //read counts
+                for (int k = 0; k < num; k++) {
+                    int alength;
+                    intemp >> alength; m->gobble(intemp);
+                    if (alength != alignedLength) {  m->mothurOut("[ERROR]: your sequences are not all the same length. primer.design requires sequences to be aligned."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    else {
+                        for (int j = 0; j < alength; j++) {
+                            for (int l = 0; l < 5; l++) {  unsigned int numTemp; intemp >> numTemp; m->gobble(intemp); counts[k][j][l] += numTemp;  }
+                        }
+                    }
+                }
+                //read otuCounts
+                intemp >> num; m->gobble(intemp);
+                for (int k = 0; k < num; k++) {  
+                    unsigned int numTemp; intemp >> numTemp; m->gobble(intemp); 
+                    otuCounts[k] += numTemp; 
+                }
+            }
+            intemp.close();
+            m->mothurRemove(tempFile);
+        }
+        
+        
+#else
+               counts = driverGetCounts(nameMap, fastaCount, otuCounts, 0, 1000);      
+#endif         
+        
+        //you will have a nameMap error if there is a namefile or countfile, but if those aren't given we want to make sure the fasta and list file match.
+        if (fastaCount != numSeqs) {
+            if ((namefile == "") && (countfile == ""))   {  m->mothurOut("[ERROR]: Your list file contains " + toString(list->getNumSeqs()) + " sequences, and your fasta file contains " + toString(fastaCount) + " sequences, aborting. Do you have the correct files? Perhaps you forgot to include the name or count file? \n");   }
+            m->control_pressed = true;
+        }
+        
+               vector<Sequence> conSeqs;
+        
+        if (m->control_pressed) { return conSeqs; }
+        
+               //build consensus seqs
+        string snumBins = toString(counts.size());
+        for (int i = 0; i < counts.size(); i++) {
+            if (m->control_pressed) { break; }
+            
+            string otuLabel = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) { 
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
+            }
+            otuLabel += sbinNumber; 
+            
+            string cons = "";
+            for (int j = 0; j < counts[i].size(); j++) {
+                cons += getBase(counts[i][j], otuCounts[i]);
+            }
+            Sequence consSeq(otuLabel, cons);
+            conSeqs.push_back(consSeq);
+        }
+        
+        if (m->control_pressed) { conSeqs.clear(); return conSeqs; }
+        
+        return conSeqs;
+       
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "createProcessesConSeqs");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+char PrimerDesignCommand::getBase(vector<unsigned int> counts, int size){  //A,T,G,C,Gap
+       try{
+               /* A = adenine
+         * C = cytosine
+         * G = guanine
+         * T = thymine
+         * R = G A (purine)
+         * Y = T C (pyrimidine)
+         * K = G T (keto)
+         * M = A C (amino)
+         * S = G C (strong bonds)
+         * W = A T (weak bonds)
+         * B = G T C (all but A)
+         * D = G A T (all but C)
+         * H = A C T (all but G)
+         * V = G C A (all but T)
+         * N = A G C T (any) */
+               
+               char conBase = 'N';
+               
+               //zero out counts that don't make the cutoff
+               float percentage = (100.0 - cutoff) / 100.0;
+        
+               for (int i = 0; i < counts.size(); i++) {
+            float countPercentage = counts[i] / (float) size;
+                       if (countPercentage < percentage) { counts[i] = 0; }
+               }
+               
+               //any
+               if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'n'; }
+               //any no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'N'; }
+               //all but T
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'v'; }  
+               //all but T no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'V'; }  
+               //all but G
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'h'; }  
+               //all but G no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'H'; }  
+               //all but C
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'd'; }  
+               //all but C no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'D'; }  
+               //all but A
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'b'; }  
+               //all but A no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'B'; }  
+               //W = A T (weak bonds)
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'w'; }  
+               //W = A T (weak bonds) no gap
+               else if ((counts[0] != 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'W'; }  
+               //S = G C (strong bonds)
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 's'; }  
+               //S = G C (strong bonds) no gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'S'; }  
+               //M = A C (amino)
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'm'; }  
+               //M = A C (amino) no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'M'; }  
+               //K = G T (keto)
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'k'; }  
+               //K = G T (keto) no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'K'; }  
+               //Y = T C (pyrimidine)
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'y'; }  
+               //Y = T C (pyrimidine) no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'Y'; }  
+               //R = G A (purine)
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'r'; }  
+               //R = G A (purine) no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'R'; }  
+               //only A
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'a'; }  
+               //only A no gap
+               else if ((counts[0] != 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'A'; }  
+               //only T
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 't'; }  
+               //only T no gap
+               else if ((counts[0] == 0) && (counts[1] != 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'T'; }  
+               //only G
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = 'g'; }  
+               //only G no gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] != 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'G'; }  
+               //only C
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] != 0)) {  conBase = 'c'; }  
+               //only C no gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] != 0) && (counts[4] == 0)) {  conBase = 'C'; }  
+               //only gap
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] != 0)) {  conBase = '-'; }
+               //cutoff removed all counts
+               else if ((counts[0] == 0) && (counts[1] == 0) && (counts[2] == 0) && (counts[3] == 0) && (counts[4] == 0)) {  conBase = 'N'; }
+               else{ m->mothurOut("[ERROR]: cannot find consensus base."); m->mothurOutEndLine(); }
+               
+               return conBase;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getBase");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int PrimerDesignCommand::initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>& seq2Bin, map<string, int>& nameMap, vector<unsigned int>& otuCounts){
+       try {
+        counts.clear();
+        otuCounts.clear();
+        seq2Bin.clear();
+        
+        //vector< vector< vector<unsigned int> > > counts - otu < spot_in_alignment < counts_for_A,T,G,C,Gap > > >
+        for (int i = 0; i < list->getNumBins(); i++) {
+            string binNames = list->get(i);
+            vector<string> names;
+            m->splitAtComma(binNames, names);
+            otuCounts.push_back(0);
+            
+            //lets be smart and only map the unique names if a name or count file was given to save search time and memory
+            if ((namefile != "") || (countfile != "")) {
+                for (int j = 0; j < names.size(); j++) {
+                    map<string, int>::iterator itNames = nameMap.find(names[j]);
+                    if (itNames != nameMap.end()) { //add name because its a unique one
+                        seq2Bin[names[j]] = i;
+                    }
+                }
+            }else { //map everyone
+                for (int j = 0; j < names.size(); j++) { seq2Bin[names[j]] = i;  }
+            }
+            
+            vector<unsigned int> temp; temp.resize(5, 0); //A,T,G,C,Gap
+            vector< vector<unsigned int> > temp2;
+            for (int j = 0; j < length; j++) {
+                temp2.push_back(temp);
+            }
+            counts.push_back(temp2);
+        }
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "initializeCounts");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+map<string, int> PrimerDesignCommand::readCount(unsigned long int& numSeqs){
+       try {
+        map<string, int> nameMap;
+        
+        CountTable ct;
+        ct.readTable(countfile);
+        vector<string> namesOfSeqs = ct.getNamesOfSeqs();
+        numSeqs = ct.getNumUniqueSeqs();
+        
+        for (int i = 0; i < namesOfSeqs.size(); i++) {
+            if (m->control_pressed) { break; }
+            
+            nameMap[namesOfSeqs[i]] = ct.getNumSeqs(namesOfSeqs[i]);
+        }
+        
+        return nameMap;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "readCount");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int PrimerDesignCommand::getListVector(){
+       try {
+               InputData input(listfile, "list");
+               list = input.getListVector();
+               string lastLabel = list->getLabel();
+               
+               if (label == "") { label = lastLabel;  return 0; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  return 0;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input.getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       delete list; 
+                       list = input.getListVector();
+               }
+               
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       delete list; 
+                       list = input.getListVector(lastLabel);
+               }       
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "getListVector"); 
+               exit(1);
+       }
+}
+//********************************************************************/
+/* A = adenine
+ * C = cytosine
+ * G = guanine
+ * T = thymine
+ * R = G A (purine)
+ * Y = T C (pyrimidine)
+ * K = G T (keto)
+ * M = A C (amino)
+ * S = G C (strong bonds)
+ * W = A T (weak bonds)
+ * B = G T C (all but A)
+ * D = G A T (all but C)
+ * H = A C T (all but G)
+ * V = G C A (all but T)
+ * N = A G C T (any) */
+int PrimerDesignCommand::countDiffs(string oligo, string seq){
+       try {
+               
+               int length = oligo.length();
+               int countDiffs = 0;
+               
+               for(int i=0;i<length;i++){
+            
+                       oligo[i] = toupper(oligo[i]);
+            seq[i] = toupper(seq[i]);
+            
+                       if(oligo[i] != seq[i]){
+                if(oligo[i] == 'A' && (seq[i] != 'A' && seq[i] != 'M' && seq[i] != 'R' && seq[i] != 'W' && seq[i] != 'D' && seq[i] != 'H' && seq[i] != 'V'))       {   countDiffs++;   }
+                else if(oligo[i] == 'C' && (seq[i] != 'C' && seq[i] != 'Y' && seq[i] != 'M' && seq[i] != 'S' && seq[i] != 'B' && seq[i] != 'H' && seq[i] != 'V'))       {      countDiffs++;   }
+                else if(oligo[i] == 'G' && (seq[i] != 'G' && seq[i] != 'R' && seq[i] != 'K' && seq[i] != 'S' && seq[i] != 'B' && seq[i] != 'D' && seq[i] != 'V'))       {      countDiffs++;   }
+                else if(oligo[i] == 'T' && (seq[i] != 'T' && seq[i] != 'Y' && seq[i] != 'K' && seq[i] != 'W' && seq[i] != 'B' && seq[i] != 'D' && seq[i] != 'H'))       {      countDiffs++;   }
+                else if((oligo[i] == '.' || oligo[i] == '-'))           {      countDiffs++;   }
+                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                         {      countDiffs++;   }
+                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                        {   countDiffs++;   }
+                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))       {   countDiffs++;   }
+                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))       {   countDiffs++;   }
+                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))       {   countDiffs++;   }
+                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))       {   countDiffs++;   }       
+            }
+                       
+               }
+               
+               return countDiffs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "countDiffs");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
+
diff --git a/primerdesigncommand.h b/primerdesigncommand.h
new file mode 100644 (file)
index 0000000..78c12de
--- /dev/null
@@ -0,0 +1,219 @@
+//
+//  primerdesigncommand.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 1/18/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_primerdesigncommand_h
+#define Mothur_primerdesigncommand_h
+
+#include "command.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
+#include "sequence.hpp"
+#include "alignment.hpp"
+#include "needlemanoverlap.hpp"
+
+/**************************************************************************************************/
+
+class PrimerDesignCommand : public Command {
+public:
+    PrimerDesignCommand(string);
+    PrimerDesignCommand();
+    ~PrimerDesignCommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "primer.design";               }
+    string getCommandCategory()                { return "OTU-Based Approaches";                } 
+    
+    string getOutputPattern(string);
+       string getHelpString(); 
+    string getCitation() { return "http://www.mothur.org/wiki/Primer.design"; }
+    string getDescription()            { return "design sequence fragments that are specific to particular OTUs"; }
+    
+    int execute(); 
+    void help() { m->mothurOut(getHelpString()); }     
+    
+private:
+    
+    struct linePair {
+               int start;
+               int end;
+               linePair(int i, int j) : start(i), end(j) {}
+       };
+    struct fastaLinePair {
+               unsigned long long start;
+               unsigned long long end;
+               fastaLinePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
+       };
+    
+    bool abort, allLines, large;
+    int cutoff, pdiffs, length, otunumber, processors, alignedLength;
+    string outputDir, listfile, namefile, countfile, fastafile, label;
+    double minTM, maxTM;
+    ListVector* list;
+    vector<string> outputNames;
+
+    int initializeCounts(vector< vector< vector<unsigned int> > >& counts, int length, map<string, int>&, map<string, int>&, vector<unsigned int>&);
+    map<string, int> readCount(unsigned long int&);
+    char getBase(vector<unsigned int> counts, int size);
+    int getListVector();
+    int countDiffs(string, string);
+    set<string> getPrimer(Sequence);
+    bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
+    int findMeltingPoint(string primer, double&, double&);
+    
+    set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&);
+    set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&);
+    vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
+    vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
+    
+};
+
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct primerDesignData {
+       string summaryFileName;
+       MothurOut* m;
+       int start;
+       int end;
+       int pdiffs, threadID, otunumber, length;
+       set<string> primers;
+       vector<double> minTms, maxTms;
+    set<int> otusToRemove;
+    vector<Sequence> consSeqs;
+    int numBinsProcessed;
+       
+       primerDesignData(){}
+       primerDesignData(string sf, MothurOut* mout, int st, int en, vector<double> min, vector<double> max, set<string> pri, vector<Sequence> seqs, int d, int otun, int l, int tid) {
+               summaryFileName = sf;
+               m = mout;
+               start = st;
+               end = en;
+               pdiffs = d;
+        minTms = min;
+        maxTms = max;
+        primers = pri;
+        consSeqs = seqs;
+        otunumber = otun;
+        length = l;
+               threadID = tid;
+        numBinsProcessed = 0;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPrimerThreadFunction(LPVOID lpParam){ 
+       primerDesignData* pDataArray;
+       pDataArray = (primerDesignData*)lpParam;
+       
+       try {
+               ofstream outSum;
+        pDataArray->m->openOutputFileAppend(pDataArray->summaryFileName, outSum);
+        
+        for (int i = pDataArray->start; i < pDataArray->end; i++) {
+            
+            if (pDataArray->m->control_pressed) { break; }
+            
+            if (i != (pDataArray->otunumber-1)) {
+                int primerIndex = 0;
+                for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
+                    vector<int> primerStarts;
+                    vector<int> primerEnds;
+                    vector<int> mismatches;
+                    
+                    //bool found = findPrimer(conSeqs[i].getUnaligned(), (*it), primerStarts, primerEnds, mismatches);
+                    ///////////////////////////////////////////////////////////////////////////////////////////////////
+                    bool found = false;  //innocent til proven guilty
+                    
+                    string rawSequence = pDataArray->consSeqs[i].getUnaligned();
+                    string primer = *it;
+                    
+                    //look for exact match
+                    if(rawSequence.length() < primer.length()) {  found = false;  }
+                    else {
+                        //search for primer
+                        for (int j = 0; j < rawSequence.length()-pDataArray->length; j++){
+                            
+                            if (pDataArray->m->control_pressed) {  found = false; break; }
+                            
+                            string rawChunk = rawSequence.substr(j, pDataArray->length);
+                            
+                            //int numDiff = countDiffs(primer, rawchuck);
+                            ///////////////////////////////////////////////////////////////////////
+                            int numDiff = 0;
+                            string oligo = primer;
+                            string seq = rawChunk;
+                            
+                            for(int k=0;k<pDataArray->length;k++){
+                                
+                                oligo[k] = toupper(oligo[k]);
+                                seq[k] = toupper(seq[k]);
+                               
+                                if(oligo[k] != seq[k]){
+            
+                                    if((oligo[k] == 'N' || oligo[k] == 'I') && (seq[k] == 'N'))                                {       numDiff++;      }
+                                    else if(oligo[k] == 'R' && (seq[k] != 'A' && seq[k] != 'G'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'Y' && (seq[k] != 'C' && seq[k] != 'T'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'M' && (seq[k] != 'C' && seq[k] != 'A'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'K' && (seq[k] != 'T' && seq[k] != 'G'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'W' && (seq[k] != 'T' && seq[k] != 'A'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'S' && (seq[k] != 'C' && seq[k] != 'G'))                                       {       numDiff++;      }
+                                    else if(oligo[k] == 'B' && (seq[k] != 'C' && seq[k] != 'T' && seq[k] != 'G'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'D' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'G'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'H' && (seq[k] != 'A' && seq[k] != 'T' && seq[k] != 'C'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'V' && (seq[k] != 'A' && seq[k] != 'C' && seq[k] != 'G'))      {       numDiff++;      }
+                                    else if(oligo[k] == 'A' && (seq[k] != 'A' && seq[k] != 'M' && seq[k] != 'R' && seq[k] != 'W' && seq[k] != 'D' && seq[k] != 'H' && seq[k] != 'V'))       {  numDiff++;      }
+                                    else if(oligo[k] == 'C' && (seq[k] != 'C' && seq[k] != 'Y' && seq[k] != 'M' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'H' && seq[k] != 'V'))       {  numDiff++;      }
+                                    else if(oligo[k] == 'G' && (seq[k] != 'G' && seq[k] != 'R' && seq[k] != 'K' && seq[k] != 'S' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'V'))       {  numDiff++;      }
+                                    else if(oligo[k] == 'T' && (seq[k] != 'T' && seq[k] != 'Y' && seq[k] != 'K' && seq[k] != 'W' && seq[k] != 'B' && seq[k] != 'D' && seq[k] != 'H'))       {  numDiff++;      }
+                                    else if((oligo[k] == '.' || oligo[k] == '-'))           {  numDiff++;      }
+                                }
+                            }
+                            ///////////////////////////////////////////////////////////////////////
+                            
+                            if(numDiff <= pDataArray->pdiffs){
+                                primerStarts.push_back(j);
+                                primerEnds.push_back(j+pDataArray->length);
+                                mismatches.push_back(numDiff);
+                                found = true;
+                            }
+                        }
+                    }
+                    ///////////////////////////////////////////////////////////////////////////////////////////////////
+                    
+                    //if we found it report to the table
+                    if (found) {
+                        for (int j = 0; j < primerStarts.size(); j++) {
+                            outSum << (i+1) << '\t' << *it << '\t' << primerStarts[j] << '\t' << primerEnds[j] << '\t' << pDataArray->length << '\t' << mismatches[j] << '\t' << pDataArray->minTms[primerIndex] << '\t' << pDataArray->maxTms[primerIndex] << endl;
+                        }
+                        pDataArray->otusToRemove.insert(i);
+                    }
+                    primerIndex++;
+                }
+            }
+            pDataArray->numBinsProcessed++;
+        }
+        outSum.close();
+        
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "PrimerDesignCommand", "MyPrimerThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+/**************************************************************************************************/
+
+
+
+
+
+#endif
index 0b7bd0625b914d8a69b298a1b0e88187ddf6757c..4998b3d1c4dabd18b97fb8f33e36860127e757e5 100644 (file)
@@ -30,89 +30,37 @@ QualityScores::QualityScores(ifstream& qFile){
        try {
                
                m = MothurOut::getInstance();
-               
-               seqName = "";
+
                int score;
+               seqName = getSequenceName(qFile);
+               
+               if (!m->control_pressed) {
+            string qScoreString = m->getline(qFile);
+            //cout << qScoreString << endl;
+            while(qFile.peek() != '>' && qFile.peek() != EOF){
+                if (m->control_pressed) { break; }
+                string temp = m->getline(qFile);
+                //cout << temp << endl;
+                qScoreString +=  ' ' + temp;
+            }
+            //cout << "done reading " << endl; 
+            istringstream qScoreStringStream(qScoreString);
+            int count = 0;
+            while(!qScoreStringStream.eof()){
+                if (m->control_pressed) { break; }
+                string temp;
+                qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
+                
+                //check temp to make sure its a number
+                if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
+                convert(temp, score);
+                
+                //cout << count << '\t' << score << endl;
+                qScores.push_back(score);
+                count++;
+            }
+        }
                
-               qFile >> seqName; 
-               m->getline(qFile);
-               //cout << seqName << endl;      
-               if (seqName == "")      {
-                       m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
-                       m->mothurOutEndLine(); 
-               }
-               else{
-                       seqName = seqName.substr(1);
-               }
-               
-               string qScoreString = m->getline(qFile);
-               //cout << qScoreString << endl;
-               while(qFile.peek() != '>' && qFile.peek() != EOF){
-                       if (m->control_pressed) { break; }
-                       string temp = m->getline(qFile);
-                       //cout << temp << endl;
-                       qScoreString +=  ' ' + temp;
-               }
-               //cout << "done reading " << endl;      
-               istringstream qScoreStringStream(qScoreString);
-               int count = 0;
-               while(!qScoreStringStream.eof()){
-                       if (m->control_pressed) { break; }
-                       string temp;
-                       qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
-                       
-                       //check temp to make sure its a number
-                       if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
-                       convert(temp, score);
-                       
-                       //cout << count << '\t' << score << endl;
-                       qScores.push_back(score);
-                       count++;
-               }
-               //qScores.pop_back();
-               
-//             string scores = "";
-//             
-//             while(!qFile.eof()){    
-//                     
-//                     qFile >> seqName; 
-//                     
-//                     //get name
-//                     if (seqName.length() != 0) { 
-//                             seqName = seqName.substr(1);
-//                             while (!qFile.eof())    {       
-//                                     char c = qFile.get(); 
-//                                     //gobble junk on line
-//                                     if (c == 10 || c == 13){        break;  }
-//                             } 
-//                             m->gobble(qFile);
-//                     }
-//                     
-//                     //get scores
-//                     while(qFile){
-//                             char letter=qFile.get();
-//                             if((letter == '>')){    qFile.putback(letter);  break;  }
-//                             else if (isprint(letter)) { scores += letter; }
-//                     }
-//                     m->gobble(qFile);
-//                     
-//                     break;
-//             }
-//             
-//             //convert scores string to qScores
-//             istringstream qScoreStringStream(scores);
-//             
-//             int score;
-//             while(!qScoreStringStream.eof()){
-//                     
-//                     if (m->control_pressed) { break; }
-//                     
-//                     qScoreStringStream >> score;
-//                     qScores.push_back(score);
-//             }
-//             
-//             qScores.pop_back();
-
                seqLength = qScores.size();
                //cout << "seqlength = " << seqLength << '\t' << count << endl;
                
@@ -123,7 +71,46 @@ QualityScores::QualityScores(ifstream& qFile){
        }                                                       
        
 }
-
+//********************************************************************************************************************
+string QualityScores::getSequenceName(ifstream& qFile) {
+       try {
+               string name = "";
+               
+        qFile >> name;
+        m->getline(qFile);
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+            
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+            
+        }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "QualityScores", "getSequenceName");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+void QualityScores::setName(string name) {
+       try {
+      
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+        }     
+    
+        seqName = name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "QualityScores", "setName");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
 string QualityScores::getName(){
index 77c3ee0d52beefad645e7ecd87ff19327b137c2d..87699739d34ad012b121a295be53f976b122a5b7 100644 (file)
@@ -36,7 +36,7 @@ public:
        void updateQScoreErrorMap(map<char, vector<int> >&, string, int, int, int);
        void updateForwardMap(vector<vector<int> >&, int, int, int);
        void updateReverseMap(vector<vector<int> >&, int, int, int);
-    void setName(string n) { seqName = n; }
+    void setName(string n)
     void setScores(vector<int> qs) { qScores = qs; seqLength = qScores.size(); }
     
        
@@ -48,6 +48,8 @@ private:
        
        string seqName;
        int seqLength;
+    
+    string getSequenceName(ifstream&);
 };
        
 /**************************************************************************************************/
index 5a9c0c8320b7305834cdf4bb544effd515728417..2524e652667bcacf8c76e89635e3c4e3f89ccc51 100644 (file)
@@ -588,7 +588,7 @@ int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
                while(!inputNames.eof()){
                        if (m->control_pressed) { goodNameOut.close();  inputNames.close(); m->mothurRemove(goodNameFile);  return 0; }
 
-                       inputNames >> seqName >> seqList;
+                       inputNames >> seqName; m->gobble(inputNames); inputNames >> seqList;
                        it = badSeqNames.find(seqName);
                                
                        if(it != badSeqNames.end()){
@@ -636,7 +636,7 @@ int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
                        while(!inputGroups.eof()){
                                if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodNameFile);  m->mothurRemove(goodGroupFile); return 0; }
 
-                               inputGroups >> seqName >> group;
+                               inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
                                
                                it = badSeqGroups.find(seqName);
                                
@@ -970,7 +970,7 @@ int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
                while(!inputGroups.eof()){
                        if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
 
-                       inputGroups >> seqName >> group;
+                       inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
                        it = badSeqNames.find(seqName);
                        
                        if(it != badSeqNames.end()){
@@ -1157,7 +1157,7 @@ int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
                while(!input.eof()){
                        if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
                        
-                       input >> seqName >> tax;
+                       input >> seqName; m->gobble(input); input >> tax;
                        it = badSeqNames.find(seqName);
                        
                        if(it != badSeqNames.end()){ badSeqNames.erase(it); }
index 96662bc36d4b77e1cb8b7a884f4e8460969033f4..93ecb70f066fd8a1da6415453347aac66c57d0dd 100644 (file)
@@ -20,6 +20,10 @@ Sequence::Sequence(string newName, string sequence) {
                m = MothurOut::getInstance();
                initialize();   
                name = newName;
+        
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+        }
                
                //setUnaligned removes any gap characters for us
                setUnaligned(sequence);
@@ -36,6 +40,10 @@ Sequence::Sequence(string newName, string sequence, string justUnAligned) {
                m = MothurOut::getInstance();
                initialize();   
                name = newName;
+        
+        for (int i = 0; i < name.length(); i++) {
+            if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+        }
                
                //setUnaligned removes any gap characters for us
                setUnaligned(sequence);
@@ -53,11 +61,9 @@ Sequence::Sequence(istringstream& fastaString){
                m = MothurOut::getInstance();
        
                initialize();
-               fastaString >> name;
-               
-               if (name.length() != 0) { 
+        name = getSequenceName(fastaString);
                
-                       name = name.substr(1);
+               if (!m->control_pressed) { 
                        string sequence;
                
                        //read comments
@@ -84,8 +90,7 @@ Sequence::Sequence(istringstream& fastaString){
                        setUnaligned(sequence); 
                        
                        if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
-               
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
                
        }
        catch(exception& e) {
@@ -100,11 +105,9 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                m = MothurOut::getInstance();
        
                initialize();
-               fastaString >> name;
-               
-               if (name.length() != 0) { 
+               name = getSequenceName(fastaString);
                
-                       name = name.substr(1);
+               if (!m->control_pressed) { 
                        string sequence;
                
                        //read comments
@@ -131,7 +134,7 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                        
                        if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
                        
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaString.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
                
        }
        catch(exception& e) {
@@ -147,11 +150,9 @@ Sequence::Sequence(ifstream& fastaFile){
        try {
                m = MothurOut::getInstance();
                initialize();
-               fastaFile >> name;
-               
-               if (name.length() != 0) { 
+               name = getSequenceName(fastaFile);
                
-                       name = name.substr(1); 
+               if (!m->control_pressed) { 
                        
                        string sequence;
                
@@ -181,7 +182,7 @@ Sequence::Sequence(ifstream& fastaFile){
                        
                        if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
                        
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
 
        }
        catch(exception& e) {
@@ -195,13 +196,11 @@ Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
        try {
                m = MothurOut::getInstance();
                initialize();
-               fastaFile >> name;
         extraInfo = "";
                
-               if (name.length() != 0) { 
-            
-                       name = name.substr(1); 
-                       
+               name = getSequenceName(fastaFile);
+               
+               if (!m->control_pressed) {                      
                        string sequence;
             
                        //read comments
@@ -233,8 +232,7 @@ Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
                        setUnaligned(sequence); 
                        
                        if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
-                       
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
         
        }
        catch(exception& e) {
@@ -248,10 +246,9 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
        try {
                m = MothurOut::getInstance();
                initialize();
-               fastaFile >> name;
+               name = getSequenceName(fastaFile);
                
-               if (name.length() != 0) { 
-                       name = name.substr(1);
+               if (!m->control_pressed) { 
                        string sequence;
                        
                        //read comments
@@ -279,7 +276,7 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                        
                        if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
                        
-               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+               }
                
        }
        catch(exception& e) {
@@ -287,7 +284,54 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                exit(1);
        }                                                       
 }
-
+//********************************************************************************************************************
+string Sequence::getSequenceName(ifstream& fastaFile) {
+       try {
+               string name = "";
+               
+        fastaFile >> name;
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+            
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+            
+        }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "getSequenceName");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+string Sequence::getSequenceName(istringstream& fastaFile) {
+       try {
+               string name = "";
+               
+        fastaFile >> name;
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+            
+            for (int i = 0; i < name.length(); i++) {
+                if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
+            }
+            
+        }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
+        
+               return name;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "getSequenceName");
+               exit(1);
+       }
+}
 //********************************************************************************************************************
 string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
        try {
index db4c4f32b9992a27f63e15908d881a77d5507980..312e49dcce47e7373b782a110030b46a9617be0a 100644 (file)
@@ -69,6 +69,8 @@ private:
        string getCommentString(ifstream&);
        string getSequenceString(istringstream&, int&);
        string getCommentString(istringstream&);
+    string getSequenceName(ifstream&);
+    string getSequenceName(istringstream&);
        string name;
        string unaligned;
        string aligned;
index fdec2ec483c67f1d37c23477d1411b5c97e8a215..9893ea747a1a58799280b259c0950f47837b1af7 100644 (file)
@@ -809,58 +809,10 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
 
         if (iters != 0) {
             //we need to find the average distance and standard deviation for each groups distance
-            
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(sumCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                    }
-                }
-            }
-            
-            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].dist /= (float) iters;
-                }
-            }
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
             
             //find standard deviation
-            vector< vector<seqDist>  > stdDev; stdDev.resize(sumCalculators.size());
-            for (int i = 0; i < stdDev.size(); i++) {  //initialize sums to zero.
-                stdDev[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].seq1 = calcDists[i][j].seq1;
-                    stdDev[i][j].seq2 = calcDists[i][j].seq2;
-                    stdDev[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-                for (int i = 0; i < stdDev.size(); i++) {  
-                    for (int j = 0; j < stdDev[i].size(); j++) {
-                        stdDev[i][j].dist += ((calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist) * (calcDistsTotals[thisIter][i][j].dist - calcAverages[i][j].dist));
-                    }
-                }
-            }
-            
-            for (int i = 0; i < stdDev.size(); i++) {  //finds average.
-                for (int j = 0; j < stdDev[i].size(); j++) {
-                    stdDev[i][j].dist /= (float) iters;
-                    stdDev[i][j].dist = sqrt(stdDev[i][j].dist);
-                }
-            }
+            vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
             
             //print results
             for (int i = 0; i < calcDists.size(); i++) {
index 0d014599fa794217233694f8a28ed707b99c86eb..7803d89497a7fface1856dff7d374cb563a6328d 100644 (file)
@@ -917,31 +917,7 @@ int TreeGroupCommand::process(vector<SharedRAbundVector*> thisLookup) {
                
         if (iters != 1) {
             //we need to find the average distance and standard deviation for each groups distance
-            
-            vector< vector<seqDist>  > calcAverages; calcAverages.resize(treeCalculators.size()); 
-            for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                calcAverages[i].resize(calcDistsTotals[0][i].size());
-                
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].seq1 = calcDists[i][j].seq1;
-                    calcAverages[i][j].seq2 = calcDists[i][j].seq2;
-                    calcAverages[i][j].dist = 0.0;
-                }
-            }
-            
-            for (int thisIter = 0; thisIter < iters; thisIter++) { //sum all groups dists for each calculator
-                for (int i = 0; i < calcAverages.size(); i++) {  //initialize sums to zero.
-                    for (int j = 0; j < calcAverages[i].size(); j++) {
-                        calcAverages[i][j].dist += calcDistsTotals[thisIter][i][j].dist;
-                    }
-                }
-            }
-            
-            for (int i = 0; i < calcAverages.size(); i++) {  //finds average.
-                for (int j = 0; j < calcAverages[i].size(); j++) {
-                    calcAverages[i][j].dist /= (float) iters;
-                }
-            }
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);  
             
             //create average tree for each calc
             for (int i = 0; i < calcDists.size(); i++) {
index 19a2ece3bb4253e6da6199515d9c203d2d656400..ab8afd07ffd96728e6204d2f9b2bbd2adc719787 100644 (file)
@@ -482,39 +482,10 @@ int UnifracUnweightedCommand::getAverageSTDMatrices(vector< vector<double> >& di
        try {
         //we need to find the average distance and standard deviation for each groups distance
         //finds sum
-        vector<double> averages; //averages.resize(numComp, 0.0);
-        for (int i = 0; i < numComp; i++) { averages.push_back(0.0); }
-        
-        if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); }
-        
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
-            for (int i = 0; i < dists[thisIter].size(); i++) {  
-                averages[i] += dists[thisIter][i];
-            }
-        }
-        
-        if (m->debug) { m->mothurOut("[DEBUG]: numcomparisons = " + toString(numComp) + ", subsampleIters = " + toString(subsampleIters) + "\n"); }
-        
-        //finds average.
-        for (int i = 0; i < averages.size(); i++) {  
-            averages[i] /= (float) subsampleIters; 
-            if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", averages[i] = " + toString(averages[i]) + "\n"); }
-        }
+        vector<double> averages = m->getAverages(dists);
         
         //find standard deviation
-        vector<double> stdDev; //stdDev.resize(numComp, 0.0);
-        for (int i = 0; i < numComp; i++) { stdDev.push_back(0.0); }
-        
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-            for (int j = 0; j < dists[thisIter].size(); j++) {
-                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
-            }
-        }
-        for (int i = 0; i < stdDev.size(); i++) {  
-            stdDev[i] /= (float) subsampleIters; 
-            stdDev[i] = sqrt(stdDev[i]);
-            if (m->debug) { m->mothurOut("[DEBUG]: i = " + toString(i) + ", stdDev[i] = " + toString(stdDev[i]) + "\n"); }
-        }
+        vector<double> stdDev = m->getStandardDeviation(dists, averages);
         
         //make matrix with scores in it
         vector< vector<double> > avedists;     //avedists.resize(m->getNumGroups());
index 47adc9a55b4ba929796fca2993138b89584bdee1..94ae125962c85c2cc0b7f100bf5178b89da89bdd 100644 (file)
@@ -465,40 +465,28 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector<double> >& dist
         //we need to find the average distance and standard deviation for each groups distance
         
         //finds sum
-        vector<double> averages; averages.resize(numComp, 0); 
-        for (int thisIter = 0; thisIter < subsampleIters; thisIter++) {
-            for (int i = 0; i < dists[thisIter].size(); i++) {  
-                averages[i] += dists[thisIter][i];
-            }
-        }
-        
-        //finds average.
-        for (int i = 0; i < averages.size(); i++) {  averages[i] /= (float) subsampleIters; }
+        vector<double> averages = m->getAverages(dists);        
         
         //find standard deviation
-        vector<double> stdDev; stdDev.resize(numComp, 0);
-                
-        for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
-            for (int j = 0; j < dists[thisIter].size(); j++) {
-                stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
-            }
-        }
-        for (int i = 0; i < stdDev.size(); i++) {  
-            stdDev[i] /= (float) subsampleIters; 
-            stdDev[i] = sqrt(stdDev[i]);
-        }
+        vector<double> stdDev = m->getStandardDeviation(dists, averages);
         
         //make matrix with scores in it
-        vector< vector<double> > avedists;     avedists.resize(m->getNumGroups());
+        vector< vector<double> > avedists;     //avedists.resize(m->getNumGroups());
         for (int i = 0; i < m->getNumGroups(); i++) {
-            avedists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            avedists.push_back(temp);
         }
         
         //make matrix with scores in it
-        vector< vector<double> > stddists;     stddists.resize(m->getNumGroups());
+        vector< vector<double> > stddists;     //stddists.resize(m->getNumGroups());
         for (int i = 0; i < m->getNumGroups(); i++) {
-            stddists[i].resize(m->getNumGroups(), 0.0);
+            vector<double> temp;
+            for (int j = 0; j < m->getNumGroups(); j++) { temp.push_back(0.0); }
+            //stddists[i].resize(m->getNumGroups(), 0.0);
+            stddists.push_back(temp);
         }
+
         
         //flip it so you can print it
         int count = 0;
index 2824ca8f6921c52993857472399beced96d4c26f..66dbb8eac3ee13c069b85374503dd87cc0ae538f 100644 (file)
--- a/venn.cpp
+++ b/venn.cpp
@@ -161,7 +161,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                //in essence you want to run it like a single 
                                if (vCalcs[i]->getName() == "sharedsobs") {
                                        singleCalc = new Sobs();
-                    if (sharedOtus) {
+                    if (sharedOtus &&  (labels.size() != 0)) {
                         string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
                         
                         outputNames.push_back(filenameShared);
@@ -482,7 +482,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[1]);
                     vector<string> labels;
                                        vector<double> sharedab =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() << '\t' << labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -494,7 +494,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.clear(); 
                                        subset.push_back(lookup[0]); subset.push_back(lookup[2]);
                                        vector<double> sharedac =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -506,7 +506,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.clear(); 
                                        subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                        vector<double> sharedbc =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -519,7 +519,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.clear(); 
                                        subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                        vector<double> sharedabc =  vCalcs[i]->getValues(subset, labels);
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -674,7 +674,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[1]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedAB = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -687,7 +687,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[2]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedAC = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -700,7 +700,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[3]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedAD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -713,7 +713,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedBC = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -726,7 +726,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[1]); subset.push_back(lookup[3]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedBD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[1]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -739,7 +739,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[2]); subset.push_back(lookup[3]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedCD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[2]->getGroup() + "-" + lookup[3]->getGroup() << '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -754,7 +754,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedABC = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup()+ "-" + lookup[2]->getGroup()<< '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -767,7 +767,7 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedACD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
@@ -780,12 +780,12 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[1]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
                                        data = vCalcs[i]->getValues(subset,labels);
                                        sharedBCD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[1]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
                         }
-                        if (labels.size() != 0) { outShared << labels[labels.size()-1]; }
+                        outShared << labels[labels.size()-1]; 
                         outShared << endl;
                     }
                //cout << "num bcd = " << sharedBCD << endl;            
@@ -793,19 +793,19 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[3]);
                                        data = vCalcs[i]->getValues(subset, labels);
                                        sharedABD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";
                         }
-                        if (labels.size() != 0) { outShared << labels[labels.size()-1]; }
+                        outShared << labels[labels.size()-1]; 
                         outShared << endl;
                     }
 //cout << "num abd = " << sharedABD << endl;
                                        //get estimate for all four
                                        data = vCalcs[i]->getValues(lookup, labels);
                                        sharedABCD = data[0];
-                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
+                    if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs") &&  (labels.size() != 0)) {
                         outShared << lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup()+ "-" + lookup[3]->getGroup()<< '\t'<< labels.size() << '\t';
                         for (int k = 0; k < labels.size()-1; k++) {
                             outShared << labels[k] << ",";