]> git.donarmstrong.com Git - mothur.git/commitdiff
Merge remote-tracking branch 'mothur/master'
authorPat Schloss <pschloss@umich.edu>
Wed, 8 Jan 2014 22:17:38 +0000 (17:17 -0500)
committerPat Schloss <pschloss@umich.edu>
Wed, 8 Jan 2014 22:17:38 +0000 (17:17 -0500)
112 files changed:
Mothur.xcodeproj/project.pbxproj
binsequencecommand.cpp
chimerauchimecommand.cpp
classifyotucommand.cpp
classifyseqscommand.h
clustercommand.cpp
clusterdoturcommand.cpp
clustersplitcommand.cpp
collectsharedcommand.cpp
commandfactory.cpp
commandfactory.hpp
commandoptionparser.cpp
consensusseqscommand.cpp
cooccurrencecommand.h
corraxescommand.cpp
countseqscommand.cpp
createdatabasecommand.cpp
datavector.hpp
deuniqueseqscommand.cpp
deuniqueseqscommand.h
engine.cpp
filtersharedcommand.cpp
getcoremicrobiomecommand.cpp
getgroupscommand.cpp
getlineagecommand.cpp
getlistcountcommand.cpp
getmetacommunitycommand.cpp
getmetacommunitycommand.h
getotulabelscommand.cpp
getoturepcommand.cpp
getoturepcommand.h
getotuscommand.cpp
getrelabundcommand.cpp
getseqscommand.cpp
getseqscommand.h
getsharedotucommand.cpp
hclustercommand.cpp
heatmap.cpp
indicatorcommand.cpp
kruskalwalliscommand.cpp
lefsecommand.cpp
listotulabelscommand.cpp
listseqscommand.cpp
listseqscommand.h
listvector.cpp
listvector.hpp
makebiomcommand.cpp
makebiomcommand.h
makecontigscommand.cpp
makefile
makelefsecommand.cpp
mergegroupscommand.cpp
metastatscommand.cpp
mgclustercommand.cpp
mothurmetastats.cpp
mothurout.cpp
mothurout.h
nmdscommand.cpp
normalizesharedcommand.cpp
otuassociationcommand.cpp
otuhierarchycommand.cpp
parsefastaqcommand.cpp
parsefastaqcommand.h
parselistscommand.cpp
parselistscommand.h
pcrseqscommand.h
phylotree.cpp
phylotree.h
phylotypecommand.cpp
prcseqscommand.cpp
preclustercommand.cpp
primerdesigncommand.cpp
primerdesigncommand.h
qualityscores.cpp
randomforest.cpp
removegroupscommand.cpp
removelineagecommand.cpp
removeotulabelscommand.cpp
removeotuscommand.cpp
removerarecommand.cpp
removeseqscommand.cpp
removeseqscommand.h
screenseqscommand.cpp
sensspeccommand.cpp
seqnoise.cpp
sequence.cpp
sequence.hpp
setcurrentcommand.cpp
sffinfocommand.cpp [changed mode: 0644->0755]
sffinfocommand.h
sffmultiplecommand.cpp
sffmultiplecommand.h
sharedcommand.cpp
sharedcommand.h
sharedlistvector.cpp
sharedlistvector.h
sharedordervector.cpp
sharedrabundfloatvector.cpp
sharedrabundvector.cpp
sharedsobscollectsummary.cpp
sparcccommand.cpp
splitabundcommand.cpp
sracommand.cpp [new file with mode: 0644]
sracommand.h [new file with mode: 0644]
subsample.cpp
subsamplecommand.cpp
subsamplecommand.h
summarysharedcommand.cpp
unifracweightedcommand.cpp
venn.cpp
venncommand.cpp
venncommand.h

index 4fb82f5e8acacfb590ac2ac4b44620b70566ad87..f3b6a09ea7e690a55a31fcbe6b9fc457fa59ca2a 100644 (file)
@@ -38,6 +38,7 @@
                A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
                A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; };
                A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
+               A747EC71181EA0F900345732 /* sracommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A747EC70181EA0F900345732 /* sracommand.cpp */; };
                A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */; };
                A74A9A9F148E881E00AB5E3E /* spline.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74A9A9E148E881E00AB5E3E /* spline.cpp */; };
                A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; };
                A741744B175CD9B1007DF49B /* makelefsecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = makelefsecommand.h; sourceTree = "<group>"; };
                A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sequencecountparser.cpp; sourceTree = "<group>"; };
                A741FAD415D168A00067BCC5 /* sequencecountparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sequencecountparser.h; sourceTree = "<group>"; };
+               A747EC6F181EA0E500345732 /* sracommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sracommand.h; sourceTree = "<group>"; };
+               A747EC70181EA0F900345732 /* sracommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sracommand.cpp; sourceTree = "<group>"; };
                A7496D2C167B531B00CC7D7C /* kruskalwalliscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = kruskalwalliscommand.cpp; sourceTree = "<group>"; };
                A7496D2D167B531B00CC7D7C /* kruskalwalliscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = kruskalwalliscommand.h; sourceTree = "<group>"; };
                A74A9A9D148E881E00AB5E3E /* spline.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = spline.h; sourceTree = "<group>"; };
                                A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
                                A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
                                A7E9B84112D37EC400DA6239 /* splitgroupscommand.cpp */,
+                               A747EC6F181EA0E500345732 /* sracommand.h */,
+                               A747EC70181EA0F900345732 /* sracommand.cpp */,
                                A7E9B85012D37EC400DA6239 /* subsamplecommand.h */,
                                A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */,
                                A7E9B85812D37EC400DA6239 /* summarycommand.h */,
                                A77916E8176F7F7600EEFE18 /* designmap.cpp in Sources */,
                                A7D9378A17B146B5001E90B0 /* wilcox.cpp in Sources */,
                                A7F24FC317EA36600021DC9A /* classifyrfsharedcommand.cpp in Sources */,
+                               A747EC71181EA0F900345732 /* sracommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                "DSTROOT[sdk=*]" = TARGET_BUILD_DIR;
                                GCC_DYNAMIC_NO_PIC = NO;
                                GCC_MODEL_TUNING = G5;
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                "INSTALL_PATH[sdk=*]" = TARGET_BUILD_DIR;
                                PRODUCT_NAME = mothur;
                                SDKROOT = macosx;
                                DEPLOYMENT_LOCATION = YES;
                                DSTROOT = TARGET_BUILD_DIR;
                                GCC_MODEL_TUNING = G5;
-                               GCC_OPTIMIZATION_LEVEL = 3;
+                               GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_WARN_UNUSED_VALUE = YES;
                                PRODUCT_NAME = mothur;
                                SDKROOT = macosx;
                                GCC_ENABLE_SSE3_EXTENSIONS = NO;
                                GCC_ENABLE_SSE41_EXTENSIONS = NO;
                                GCC_ENABLE_SSE42_EXTENSIONS = NO;
-                               GCC_OPTIMIZATION_LEVEL = s;
+                               GCC_OPTIMIZATION_LEVEL = 3;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../../release\\\"\"",
-                                       "VERSION=\"\\\"1.31.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"5/24/2013\\\"\"",
+                                       "VERSION=\"\\\"1.32.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"10/31/2013\\\"\"",
                                );
                                GCC_VERSION = "";
                                "GCC_VERSION[arch=*]" = "";
                                GCC_C_LANGUAGE_STANDARD = gnu99;
                                GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
                                GCC_MODEL_TUNING = "";
-                               GCC_OPTIMIZATION_LEVEL = s;
+                               GCC_OPTIMIZATION_LEVEL = 3;
                                GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.32.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"10/01/2013\\\"\"",
+                                       "VERSION=\"\\\"1.32.1\\\"\"",
+                                       "RELEASE_DATE=\"\\\"10/16/2013\\\"\"",
                                );
                                GCC_VERSION = "";
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
index f9784b3ea91e09d1c0d86232b67b70bc5e789b99..738c3241732c814d01b87052fa3cafd899c74182 100644 (file)
@@ -398,6 +398,7 @@ int BinSeqCommand::process(ListVector* list) {
         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
         
         //for each bin in the list vector
+        vector<string> binLabels = list->getLabels();
         for (int i = 0; i < list->size(); i++) {
             
             if (m->control_pressed) {  return 1; }
@@ -421,11 +422,11 @@ int BinSeqCommand::process(ListVector* list) {
                             }
                             if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; }
                             else { groupInfo = "not found";  }
-                            name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name));
+                            name = name + "\t" + groupInfo + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name));
                             out << ">" << name << endl;
                             out << sequence << endl;
                         }else {
-                            name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name));
+                            name = name + "\t" + binLabels[i] + "\tNumRep=" + toString(ct.getNumSeqs(name));
                             out << ">" << name << endl;
                             out << sequence << endl;
                         }
@@ -435,7 +436,7 @@ int BinSeqCommand::process(ListVector* list) {
                     if (sequence != "not found") {
                         //if you don't have groups
                         if (groupfile == "") {
-                            name = name + "\t" + toString(i+1);
+                            name = name + "\t" + binLabels[i];
                             out << ">" << name << endl;
                             out << sequence << endl;
                         }else {//if you do have groups
@@ -444,7 +445,7 @@ int BinSeqCommand::process(ListVector* list) {
                                 m->mothurOut(name + " is missing from your group file. Please correct. ");  m->mothurOutEndLine();
                                 return 1;
                             }else{
-                                name = name + "\t" + group + "\t" + toString(i+1);
+                                name = name + "\t" + group + "\t" + binLabels[i];
                                 out << ">" << name << endl;
                                 out << sequence << endl;
                             }
index 346afe213b12fab2baab1e4d23102e526f6ecc8c..3a9f42b22e0715ccf1f68313439342269da81bf8 100644 (file)
@@ -569,6 +569,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
 
                        
                        if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+            if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                        if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                        
                        //look for uchime exe
index 217bc868eb8984c1111ff50fa84f83abeefa2059..160928f3aa46906900ac5262f198112fb70fda36 100644 (file)
@@ -586,6 +586,7 @@ int ClassifyOtuCommand::process(ListVector* processList) {
         
                //for each bin in the list vector
         string snumBins = toString(processList->getNumBins());
+        vector<string> binLabels = processList->getLabels();
                for (int i = 0; i < processList->getNumBins(); i++) {
                        
                        if (m->control_pressed) { break; }
@@ -598,17 +599,8 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                        names = findConsensusTaxonomy(thisNames, size, conTax);
                
                        if (m->control_pressed) { break; }
-                       
-                       //output to new names file
-            string binLabel = "Otu";
-            string sbinNumber = toString(i+1);
-            if (sbinNumber.length() < snumBins.length()) { 
-                int diff = snumBins.length() - sbinNumber.length();
-                for (int h = 0; h < diff; h++) { binLabel += "0"; }
-            }
-            binLabel += sbinNumber;
 
-                       out << binLabel << '\t' << size << '\t' << conTax << endl;
+                       out << binLabels[i] << '\t' << size << '\t' << conTax << endl;
                        
                        string noConfidenceConTax = conTax;
                        m->removeConfidences(noConfidenceConTax);
@@ -683,16 +675,8 @@ int ClassifyOtuCommand::process(ListVector* processList) {
                     
                     if (m->control_pressed) { break; }
                     
-                    //output to new names file
-                    string binLabel = "Otu";
-                    string sbinNumber = toString(i+1);
-                    if (sbinNumber.length() < snumBins.length()) { 
-                        int diff = snumBins.length() - sbinNumber.length();
-                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
-                    }
-                    binLabel += sbinNumber;
                     
-                    (*outs[groupIndex[itParsed->first]]) << binLabel << '\t' << size << '\t' << conTax << endl;
+                    (*outs[groupIndex[itParsed->first]]) << binLabels[i] << '\t' << size << '\t' << conTax << endl;
                     
                     string noConfidenceConTax = conTax;
                     m->removeConfidences(noConfidenceConTax);
index 59d9ee275800195255961639d77fb6c57d97f417..55546f2032af82f9934fb1af4b73891684904cd6 100644 (file)
@@ -166,7 +166,7 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                //make classify
                Classify* myclassify;
         string outputMethodTag = pDataArray->method + ".";
-               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);             }
+               if(pDataArray->method == "wang"){       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);             }
                else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
         else if(pDataArray->method == "zap"){  
             outputMethodTag = pDataArray->search + "_" + outputMethodTag;
@@ -174,7 +174,7 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
             else {  myclassify = new AlignTree(pDataArray->templateFileName, pDataArray->taxonomyFileName, pDataArray->cutoff);  }
         }
                else {
-                       pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
+                       pDataArray->m->mothurOut(pDataArray->method + " is not a valid method option. I will run the command using wang.");
                        pDataArray->m->mothurOutEndLine();
                        myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip, pDataArray->writeShortcuts);     
                }
index c1ac5bbddd5be05c41e03853c25af4340eab133e..5ac4166c7afa54d2047814bc4b4a18c018786057 100644 (file)
@@ -361,7 +361,7 @@ int ClusterCommand::execute(){
         }
                m->openOutputFile(listFileName, listFile);
         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
-               
+        list->printHeaders(listFile);
                
                time_t estart = time(NULL);
                float previousDist = 0.00000;
index 96bfc5c523be69bcc75dae33e35a209846aa2302..dadc9cc485dedeec55a1057474f2eab19bf0d4ab 100644 (file)
@@ -259,6 +259,7 @@ int ClusterDoturCommand::execute(){
         }
                m->openOutputFile(listFileName, listFile);
         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+        list->printHeaders(listFile);
                
                float previousDist = 0.00000;
                float rndPreviousDist = 0.00000;
index b5dc9692f0f7afefabae27676b9aa61491c048f1..874eb6d7b535e9e400cca8f22593280ecd959b42 100644 (file)
@@ -833,8 +833,10 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                m->openOutputFile(listFileName, outList);
         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
                
-               
                map<float, int>::iterator itLabel;
+        
+        //clears out junk for autocompleting of list files above.  Perhaps there is a beter way to handle this from within the data structure?
+        m->printedListHeaders = false;
 
                //for each label needed
                for(itLabel = userLabels.begin(); itLabel != userLabels.end(); itLabel++) {
@@ -890,6 +892,8 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                 rabund->print(outRabund);
             }
                        //outList << endl;
+            if (!m->printedListHeaders) { 
+                m->listBinLabelsInFile.clear(); completeList.printHeaders(outList); }
             completeList.print(outList);
                        
                        if (rabund != NULL) { delete rabund; }
index dd1ec270334e09f1d316ce060c2ab8c185e0d313..40ee3d255cc03c3a005bc79d77eeee78fb25d510 100644 (file)
@@ -89,7 +89,7 @@ string CollectSharedCommand::getHelpString(){
                helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
                helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
-               helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. shared), '=' and parameters (i.e.yourSharedfile).\n";
                return helpString;
        }
        catch(exception& e) {
index 8ab0a9529ba697aece318c7e11301f9e465b19c3..5a32ac1a526450b8d9408a88e0e57cc31e946720 100644 (file)
 #include "makelefsecommand.h"
 #include "lefsecommand.h"
 #include "kruskalwalliscommand.h"
+#include "sracommand.h"
 
 /*******************************************************/
 
@@ -319,6 +320,7 @@ CommandFactory::CommandFactory(){
     commands["make.lefse"]          = "make.lefse";
     commands["lefse"]               = "lefse";
     commands["kruskal.wallis"]      = "kruskal.wallis";
+    commands["sra"]                 = "sra";
     
 
 }
@@ -408,7 +410,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                        else { optionString += "inputdir=" + inputDir; }
                }
                
-               if(commandName == "cluster")                            {       command = new ClusterCommand(optionString);                                     }
+               if(commandName == "cluster")                    {       command = new ClusterCommand(optionString);                                     }
                else if(commandName == "unique.seqs")                   {       command = new DeconvoluteCommand(optionString);                         }
                else if(commandName == "parsimony")                             {       command = new ParsimonyCommand(optionString);                           }
                else if(commandName == "help")                                  {       command = new HelpCommand(optionString);                                        }
@@ -533,7 +535,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.contigs")          {      command = new MakeContigsCommand(optionString);             }
         else if(commandName == "load.logfile")          {      command = new LoadLogfileCommand(optionString);             }
         else if(commandName == "sff.multiple")          {      command = new SffMultipleCommand(optionString);             }
-        else if(commandName == "classify.rf")           {      command = new ClassifyRFSharedCommand(optionString);          }
+        else if(commandName == "classify.rf")           {      command = new ClassifyRFSharedCommand(optionString);        }
         else if(commandName == "filter.shared")         {      command = new FilterSharedCommand(optionString);            }
         else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
@@ -546,6 +548,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.lefse")                   {       command = new MakeLefseCommand(optionString);                           }
         else if(commandName == "lefse")                 {      command = new LefseCommand(optionString);                   }
         else if(commandName == "kruskal.wallis")        {      command = new KruskalWallisCommand(optionString);           }
+        else if(commandName == "sra")                   {      command = new SRACommand(optionString);                     }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -714,6 +717,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "make.lefse")                   {       pipecommand = new MakeLefseCommand(optionString);                               }
         else if(commandName == "lefse")                 {      pipecommand = new LefseCommand(optionString);                   }
         else if(commandName == "kruskal.wallis")        {      pipecommand = new KruskalWallisCommand(optionString);           }
+        else if(commandName == "sra")                   {      pipecommand = new SRACommand(optionString);                     }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -868,6 +872,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "make.lefse")                   {       shellcommand = new MakeLefseCommand();                          }
         else if(commandName == "lefse")                 {      shellcommand = new LefseCommand();                  }
         else if(commandName == "kruskal.wallis")        {      shellcommand = new KruskalWallisCommand();          }
+        else if(commandName == "sra")                   {      shellcommand = new SRACommand();                    }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 36e8f462b146fc7846ee6cf9ead1eb42e9757a36..eb85c68513bc43ece5cf9802917ebe336bc03d5a 100644 (file)
@@ -27,8 +27,8 @@ public:
        bool isValidCommand(string, string);\r
        void printCommands(ostream&);\r
     void printCommandsCategories(ostream&);\r
-       void setOutputDirectory(string o)               {       outputDir = o;  m->setOutputDir(o);     }\r
-       void setInputDirectory(string i)                {       inputDir = i;           }\r
+       void setOutputDirectory(string o)               {       if(m->dirCheck(o) || (o == "")) {  outputDir = o; m->setOutputDir(o); } }\r
+       void setInputDirectory(string i)                {       if(m->dirCheck(i) || (i == "")) {  inputDir = i;        }       }\r
        void setLogfileName(string n, bool a)   {       logFileName = n;  append = a;           }\r
        string getLogfileName()                                 {       return logFileName;     }\r
        bool getAppend()                                                {       return append;                  }\r
index dfad533c8898ff3d79067d5f16c6869803e21e12..0038a714f4ae356e11d7dea57b4840a81eb946f4 100644 (file)
@@ -31,7 +31,7 @@ CommandOptionParser::CommandOptionParser(string input){
                        optionString = input.substr((openParen+1), (closeParen-openParen-1)); //optionString contains everything between "(" and ")".
                }
                else if (openParen == -1) { m->mothurOut("[ERROR]: You are missing ("); m->mothurOutEndLine(); }
-               else if (closeParen == -1) { m->mothurOut("[ERROR]:You are missing )"); m->mothurOutEndLine(); }
+               else if (closeParen == -1) { m->mothurOut("[ERROR]: You are missing )"); m->mothurOutEndLine(); }
     }
        catch(exception& e) {
                m->errorOut(e, "CommandOptionParser", "CommandOptionParser");
index fbec746804bcd7257383e99f26e058dfe4df6f25..d9a5e294c08d1cd3a68c7890b7c0a6ab7fbde04a 100644 (file)
@@ -436,23 +436,16 @@ int ConsensusSeqsCommand::processList(ListVector*& list){
                outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
                
         string snumBins = toString(list->getNumBins());
+        vector<string> binLabels = list->getLabels();
                for (int i = 0; i < list->getNumBins(); i++) {
                        
                        if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
                        
                        string bin = list->get(i);
                        string consSeq = getConsSeq(bin, outSummary, i);
-            
-            string seqName = "Otu";
-            string sbinNumber = toString(i+1);
-            if (sbinNumber.length() < snumBins.length()) {
-                int diff = snumBins.length() - sbinNumber.length();
-                for (int h = 0; h < diff; h++) { seqName += "0"; }
-            }
-            seqName += sbinNumber;
                        
-                       outFasta << ">" << seqName << endl << consSeq << endl;
-                       outName << seqName << '\t' << seqName << "," << bin << endl;
+                       outFasta << ">" << binLabels[i] << endl << consSeq << endl;
+                       outName << binLabels[i] << '\t' << binLabels[i] << "," << bin << endl;
                }
                
                outSummary.close(); outName.close(); outFasta.close();
index 758a9fff0c155261c1efbe60e7a0992d9b215681..94930ac2db71fff68a9fb208385a6cdc2c94ade4 100644 (file)
@@ -26,7 +26,7 @@ public:
        ~CooccurrenceCommand(){}
        
        vector<string> setParameters();
-       string getCommandName()                 { return "Cooccurrence";                        }
+       string getCommandName()                 { return "cooccurrence";                        }
        string getCommandCategory()             { return "Hypothesis Testing";  }
        
        string getHelpString(); 
index 853c174ac63012c2ebf8d21800c4886138f8ab16..72fa03b75edc196aafcaaa4b6ade6f95d1106d48 100644 (file)
@@ -339,7 +339,7 @@ int CorrAxesCommand::calcPearson(map<string, vector<float> >& axes, ofstream& ou
           //for each otu
           for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                   
-                  if (metadatafile == "") {  out << m->currentBinLabels[i];    }
+                  if (metadatafile == "") {  out << m->currentSharedBinLabels[i];      }
                   else {  out << metadataLabels[i];            }
                                   
                   //find the averages this otu - Y
@@ -474,7 +474,7 @@ int CorrAxesCommand::calcSpearman(map<string, vector<float> >& axes, ofstream& o
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                        
-                       if (metadatafile == "") {  out << m->currentBinLabels[i];       }
+                       if (metadatafile == "") {  out << m->currentSharedBinLabels[i]; }
                        else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
@@ -627,7 +627,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
                
-                       if (metadatafile == "") {  out << m->currentBinLabels[i];       }
+                       if (metadatafile == "") {  out << m->currentSharedBinLabels[i]; }
                        else {  out << metadataLabels[i];               }
                        
                        //find the ranks of this otu - Y
@@ -834,7 +834,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thisloo
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                newBinLabels.push_back(binLabel);
                        }
@@ -843,7 +843,7 @@ int CorrAxesCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>& thisloo
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
                
                thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
+               m->currentSharedBinLabels = newBinLabels;
                
                return 0;
                
index 411a814d73671ebb69b43a783e36e31d6263fe45..575ffe844ed0a1134d52ecd0d6aa30531e2fc584 100644 (file)
@@ -628,7 +628,7 @@ map<int, string> CountSeqsCommand::processNameFile(string name) {
             }
                }
                in.close();
-        out.close();
+       
                
         if (rest != "") {
             vector<string> pieces = m->splitWhiteSpace(rest);
@@ -651,6 +651,7 @@ map<int, string> CountSeqsCommand::processNameFile(string name) {
             }
 
         }
+        out.close();
         
         return indexToNames;
     }
@@ -704,7 +705,7 @@ map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& n
             }
                }
                in.close();
-        out.close();
+        
         
         if (rest != "") {
             vector<string> pieces = m->splitWhiteSpace(rest);
@@ -726,6 +727,7 @@ map<int, string> CountSeqsCommand::getGroupNames(string filename, set<string>& n
                 }
             }
         }
+        out.close();
                
         for (it = groupIndex.begin(); it != groupIndex.end(); it++) {  indexToGroups[it->second] = it->first;  }
         
index 235682bfec8fdfc36c43ed25fb78ffd06eef1b4f..a3531661d47c5cc640b822d5ba176a0f50a61844 100644 (file)
@@ -364,11 +364,15 @@ int CreateDatabaseCommand::execute(){
             header += "repSeqName\trepSeq\tOTUConTaxonomy";
             out << header << endl;
             
+            vector<string> binLabels = list->getLabels();
             for (int i = 0; i < list->getNumBins(); i++) {
                 
+                int index = findIndex(otuLabels, binLabels[i]);
+                if (index == -1) {  m->mothurOut("[ERROR]: " + binLabels[i] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+                
                 if (m->control_pressed) { break; }
                 
-                out << otuLabels[i] << '\t';
+                out << otuLabels[index] << '\t';
                 
                 vector<string> binNames;
                 string bin = list->get(i);
@@ -387,12 +391,12 @@ int CreateDatabaseCommand::execute(){
                     map<string, string>::iterator it = repNames.find(bin);
                     
                     if (it == repNames.end()) {
-                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                        m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
                     }else { seqRepName = it->second;  numSeqsRep = binNames.size(); }
                     
                     //sanity check
-                    if (binNames.size() != classifyOtuSizes[i]) {
-                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                    if (binNames.size() != classifyOtuSizes[index]) {
+                        m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
                     }
                 }else {
                     //find rep sequence in bin
@@ -406,11 +410,11 @@ int CreateDatabaseCommand::execute(){
                     }
                     
                     if (seqRepName == "") {
-                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                        m->mothurOut("[ERROR: OTU " + otuLabels[index] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
                     }
                     
                     if (numSeqsRep != classifyOtuSizes[i]) {
-                        m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+                        m->mothurOut("[ERROR: OTU " + otuLabels[index] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
                     }
                 }
                 
@@ -443,7 +447,7 @@ int CreateDatabaseCommand::execute(){
                 }else { out << numSeqsRep << '\t'; }
                 
                 //output repSeq
-                out << seqRepName << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+                out << seqRepName << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
             }
             
             
@@ -462,8 +466,8 @@ int CreateDatabaseCommand::execute(){
                 
                 if (m->control_pressed) { break; }
                 
-                int index = findIndex(otuLabels, m->currentBinLabels[h]);
-                if (index == -1) {  m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
+                int index = findIndex(otuLabels, m->currentSharedBinLabels[h]);
+                if (index == -1) {  m->mothurOut("[ERROR]: " + m->currentSharedBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
                 
                 if (m->control_pressed) { break; }
                 
@@ -478,7 +482,7 @@ int CreateDatabaseCommand::execute(){
                 
                 //sanity check
                 if (totalAbund != classifyOtuSizes[index]) {
-                    m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true;   break;
+                    m->mothurOut("[WARNING]: OTU " + m->currentSharedBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true;   break;
                 }
                 
                 //output repSeq
index e6c839d4772d0586457a9913df2d9e889db60e00..dfcd684eaaec9e725ddf6e860b9d22ec4c4a90c5 100644 (file)
@@ -23,10 +23,10 @@ class DataVector {
        
 public:
        DataVector(){ m = MothurOut::getInstance(); }// : maxRank(0), numBins(0), numSeqs(0){};
-       DataVector(string l) : label(l) {};
-       DataVector(const DataVector& dv) : label(dv.label){};//, maxRank(dv.maxRank), numBins(dv.numBins), numSeqs(dv.numSeqs) {};
-       DataVector(ifstream&);
-       DataVector(ifstream&, GroupMap*);
+       DataVector(string l) : label(l) { m = MothurOut::getInstance();};
+       DataVector(const DataVector& dv) : label(dv.label){ m = MothurOut::getInstance();};//, maxRank(dv.maxRank), numBins(dv.numBins), numSeqs(dv.numSeqs) {};
+       DataVector(ifstream&) {m = MothurOut::getInstance();}
+       DataVector(ifstream&, GroupMap*){m = MothurOut::getInstance();}
        virtual ~DataVector(){};
        
 //     virtual int getNumBins()        {       return numBins;         }
index f72a7c9c7e1264265d4d28e1072e087cf501c288..a6c151c38318f4a98917e5b46f81f5b8cd7cfc27 100644 (file)
@@ -9,12 +9,14 @@
 
 #include "deuniqueseqscommand.h"
 #include "sequence.hpp"
+#include "counttable.h"
 
 //**********************************************************************************************************************
 vector<string> DeUniqueSeqsCommand::setParameters(){   
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname);
+               CommandParameter pname("name", "InputTypes", "", "", "namecount", "namecount", "none","name",false,false,true); parameters.push_back(pname);
+        CommandParameter pcount("count", "InputTypes", "", "", "namecount", "namecount", "none","group",false,false,true); parameters.push_back(pcount);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
@@ -31,8 +33,8 @@ vector<string> DeUniqueSeqsCommand::setParameters(){
 string DeUniqueSeqsCommand::getHelpString(){   
        try {
                string helpString = "";
-               helpString += "The deunique.seqs command reads a fastafile and namefile, and creates a fastafile containing all the sequences.\n";
-               helpString += "The deunique.seqs command parameters are fasta and name, both are required, unless you have valid current name and fasta files.\n";
+               helpString += "The deunique.seqs command reads a fastafile and namefile or countfile, and creates a fastafile containing all the sequences. It you provide a count file with group information a group file is also created.\n";
+               helpString += "The deunique.seqs command parameters are fasta, name and count. Fasta is required and you must provide either a name or count file.\n";
                helpString += "The deunique.seqs command should be in the following format: \n";
                helpString += "deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n";       
                helpString += "Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n";
@@ -49,7 +51,8 @@ string DeUniqueSeqsCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "fasta") {  pattern = "[filename],redundant.fasta"; } 
+        if (type == "fasta") {  pattern = "[filename],redundant.fasta"; }
+        else if (type == "group") {  pattern = "[filename],redundant.groups"; }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -66,6 +69,7 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
+        outputTypes["group"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand");
@@ -98,6 +102,7 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
+            outputTypes["group"] = tempOutNames;
                
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -119,6 +124,14 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -134,16 +147,31 @@ DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
-                               outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
                        }
                        
                        nameFile = validParameter.validFile(parameters, "name", true);
                        if (nameFile == "not open") { abort = true; }
-                       else if (nameFile == "not found"){                                      
-                               nameFile = m->getNameFile(); 
+                       else if (nameFile == "not found"){      nameFile = ""; }
+                       else { m->setNameFile(nameFile); }
+            
+            countfile = validParameter.validFile(parameters, "count", true);
+                       if (countfile == "not open") { abort = true;  }
+                       else if (countfile == "not found") { countfile = ""; }
+                       else { m->setCountTableFile(countfile); }
+                       
+            if ((countfile != "") && (nameFile != "")) { m->mothurOut("When executing a deunique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+                       
+            
+                       if ((countfile == "") && (nameFile == "")) { //look for currents
+                nameFile = m->getNameFile();
                                if (nameFile != "") { m->mothurOut("Using " + nameFile + " as input file for the name parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current namefile and the name parameter is required."); m->mothurOutEndLine(); abort = true; }
-                       }else { m->setNameFile(nameFile); }
+                               else {
+                    countfile = m->getCountTableFile();
+                    if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+                    else {  m->mothurOut("[ERROR]: You have no current name or count files one is required."); m->mothurOutEndLine(); abort = true; }
+                }
+            }
+
                }
 
        }
@@ -160,7 +188,9 @@ int DeUniqueSeqsCommand::execute() {
 
                //prepare filenames and open files
                ofstream out;
-               string outFastaFile = m->getRootName(m->getSimpleName(fastaFile));
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastaFile);  }
+               string outFastaFile = thisOutputDir + m->getRootName(m->getSimpleName(fastaFile));
                int pos = outFastaFile.find("unique");
                if (pos != string::npos) { outFastaFile = outputDir + outFastaFile.substr(0, pos); }
         else { outFastaFile = outputDir + outFastaFile; }
@@ -169,55 +199,92 @@ int DeUniqueSeqsCommand::execute() {
         outFastaFile = getOutputFileName("fasta", variables);
                m->openOutputFile(outFastaFile, out);
                
-               readNamesFile();
-               if (m->control_pressed) {  out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; }
+               map<string, string> nameMap;
+        CountTable ct;
+        ofstream outGroup;
+        string outGroupFile;
+        vector<string> groups;
+        if (nameFile != "") { m->readNames(nameFile, nameMap); }
+        else {
+            ct.readTable(countfile, true, false);
+            
+            if (ct.hasGroupInfo()) {
+                thisOutputDir = outputDir;
+                if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
+                outGroupFile = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+                variables["[filename]"] = outGroupFile;
+                outGroupFile = getOutputFileName("group", variables);
+                m->openOutputFile(outGroupFile, outGroup);
+                groups = ct.getNamesOfGroups();
+            }
+        }
+        
+               if (m->control_pressed) {  out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; }
                
                ifstream in;
                m->openInputFile(fastaFile, in);
                
                while (!in.eof()) {
                
-                       if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; }
+                       if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); m->mothurRemove(outGroupFile); } } return 0; }
                        
                        Sequence seq(in); m->gobble(in);
                        
                        if (seq.getName() != "") {
                                
-                               //look for sequence name in nameMap
-                               map<string, string>::iterator it = nameMap.find(seq.getName());
-                               
-                               if (it == nameMap.end()) {      m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
-                               else {
-                                       vector<string> names;
-                                       m->splitAtComma(it->second, names);
-                                       
-                                       //output sequences
-                                       for (int i = 0; i < names.size(); i++) {
-                                               out << ">" << names[i] << endl;
-                                               out << seq.getAligned() << endl;
-                                       }
-                                       
-                                       //remove seq from name map so we can check for seqs in namefile not in fastafile later
-                                       nameMap.erase(it);
-                               }
+                if (nameFile != "") {
+                    //look for sequence name in nameMap
+                    map<string, string>::iterator it = nameMap.find(seq.getName());
+                    
+                    if (it == nameMap.end()) { m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+                    else {
+                        vector<string> names;
+                        m->splitAtComma(it->second, names);
+                        
+                        //output sequences
+                        for (int i = 0; i < names.size(); i++) {
+                            out << ">" << names[i] << endl;
+                            out << seq.getAligned() << endl;
+                        }
+                        
+                        //remove seq from name map so we can check for seqs in namefile not in fastafile later
+                        nameMap.erase(it);
+                    }
+                }else {
+                    if (ct.hasGroupInfo()) {
+                        vector<int> groupCounts = ct.getGroupCounts(seq.getName());
+                        int count = 1;
+                        for (int i = 0; i < groups.size(); i++) {
+                            for (int j = 0; j < groupCounts[i]; j++) {
+                                outGroup << seq.getName()+"_"+toString(count) << '\t' << groups[i] << endl; count++;
+                            }
+                        }
+                        
+                    }
+                    
+                    int numReps = ct.getNumSeqs(seq.getName()); //will report error and set m->control_pressed if not found
+                    for (int i = 0; i < numReps; i++) {
+                        out << ">" << seq.getName()+"_"+toString(i+1) << endl;
+                        out << seq.getAligned() << endl;
+                    }
+                }
                        }
                }
                in.close();
-               out.close(); 
+               out.close();
+        if (countfile != "") { if (ct.hasGroupInfo()) { outGroup.close(); } }
                
-               if (nameMap.size() != 0) { //then there are names in the namefile not in the fastafile
-                       for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {  
-                               m->mothurOut(it->first + " is not in your fasta file, but is in your name file. Please correct."); m->mothurOutEndLine();
-                       }
-               }
-                               
-               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); return 0; }
+                                               
+               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); if (countfile != "") { if (ct.hasGroupInfo()) {  m->mothurRemove(outGroupFile); } }return 0; }
                
+        outputNames.push_back(outFastaFile);  outputTypes["fasta"].push_back(outFastaFile);
+        if (countfile != "") { if (ct.hasGroupInfo()) { outputNames.push_back(outGroupFile);  outputTypes["group"].push_back(outGroupFile); } }
+        
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outFastaFile); m->mothurOutEndLine();      
-               outputNames.push_back(outFastaFile);  outputTypes["fasta"].push_back(outFastaFile);  
-               m->mothurOutEndLine();
+               for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
+        m->mothurOutEndLine();
+               
                
                //set fasta file as new current fastafile
                string current = "";
@@ -225,47 +292,18 @@ int DeUniqueSeqsCommand::execute() {
                if (itTypes != outputTypes.end()) {
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
                }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "DeUniqueSeqsCommand", "execute");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int DeUniqueSeqsCommand::readNamesFile() {
-       try {
-               
-               ifstream inNames;
-               m->openInputFile(nameFile, inNames);
-               
-               string name, names;
-               map<string, string>::iterator it;
-       
-               while(inNames){
-                       
-                       if(m->control_pressed) { break; }
-                       
-                       inNames >> name;        m->gobble(inNames);             
-                       inNames >> names;               
-                       
-                       it = nameMap.find(name);
-                       
-                       if (it == nameMap.end()) {      nameMap[name] = names; }
-                       else { m->mothurOut("[ERROR]: Your namefile already contains " + name + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
-                                       
-                       m->gobble(inNames);
+        
+        itTypes = outputTypes.find("group");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
                }
-               inNames.close();
+
                
                return 0;
-
        }
        catch(exception& e) {
-               m->errorOut(e, "DeUniqueSeqsCommand", "readNamesFile");
+               m->errorOut(e, "DeUniqueSeqsCommand", "execute");
                exit(1);
        }
 }
-
 /**************************************************************************************/
index a6d467a16a96c56ca3f1282d7c7fbbdfbdf02ca7..042ff498fce19a60cd51d7b1393ec2cd4074123c 100644 (file)
@@ -36,13 +36,10 @@ public:
        
 private:
 
-       string fastaFile, nameFile, outputDir;
+       string fastaFile, nameFile, outputDir, countfile;
        vector<string> outputNames;
        bool abort;
-       
-       map<string, string> nameMap;
-       
-       int readNamesFile();
+
        
 };
 
index e4e107168c5723c82b90a7f103ccb8d9ee83cb60..d6be4e4f1a16e00af29cfe36052c98e3a6e98efb 100644 (file)
@@ -123,10 +123,12 @@ bool InteractEngine::getInput(){
                                        mout->clearAllGroups();
                                        mout->Treenames.clear();
                                        mout->saveNextLabel = "";
-                                       mout->printedHeaders = false;
-                                       mout->commandInputsConvertError = false;
-                                       mout->currentBinLabels.clear();
-                                       mout->binLabelsInFile.clear();
+                    mout->commandInputsConvertError = false;
+                                       mout->printedSharedHeaders = false;
+                                       mout->currentSharedBinLabels.clear();
+                                       mout->sharedBinLabelsInFile.clear();
+                    mout->printedListHeaders = false;
+                    mout->listBinLabelsInFile.clear();
                                                        
                                        Command* command = cFactory->getCommand(commandName, options);
                                        if (mout->commandInputsConvertError) { quitCommandCalled = 2; }
@@ -309,10 +311,12 @@ bool BatchEngine::getInput(){
                                        mout->clearAllGroups();
                                        mout->Treenames.clear();
                                        mout->saveNextLabel = "";
-                                       mout->printedHeaders = false;
                                        mout->commandInputsConvertError = false;
-                                       mout->currentBinLabels.clear();
-                                       mout->binLabelsInFile.clear();
+                    mout->printedSharedHeaders = false;
+                    mout->currentSharedBinLabels.clear();
+                    mout->sharedBinLabelsInFile.clear();
+                    mout->printedListHeaders = false;
+                    mout->listBinLabelsInFile.clear();
 
                                                        
                                        Command* command = cFactory->getCommand(commandName, options);
@@ -480,10 +484,12 @@ bool ScriptEngine::getInput(){
                                        mout->clearAllGroups();
                                        mout->Treenames.clear();
                                        mout->saveNextLabel = "";
-                                       mout->printedHeaders = false;
-                                       mout->commandInputsConvertError = false;
-                                       mout->currentBinLabels.clear();
-                                       mout->binLabelsInFile.clear();
+                    mout->commandInputsConvertError = false;
+                    mout->printedSharedHeaders = false;
+                    mout->currentSharedBinLabels.clear();
+                    mout->sharedBinLabelsInFile.clear();
+                    mout->printedListHeaders = false;
+                    mout->listBinLabelsInFile.clear();
 
                                        Command* command = cFactory->getCommand(commandName, options);
                                        if (mout->commandInputsConvertError) { quitCommandCalled = 2; }
index 5b5fcb1a5ed36578ebb70b3e8f3ac1048ec1ad4f..a2510a697737c5596a47cf5a0f36a950959c2b04 100644 (file)
@@ -325,7 +325,7 @@ int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup)
        try {
                
                //save mothurOut's binLabels to restore for next label
-               vector<string> saveBinLabels = m->currentBinLabels;
+               vector<string> saveBinLabels = m->currentSharedBinLabels;
                
         map<string, string> variables; 
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
@@ -474,7 +474,7 @@ int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup)
                m->openOutputFile(outputFileName, out);
                outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
-        m->currentBinLabels = filteredLabels;
+        m->currentSharedBinLabels = filteredLabels;
         
                filteredLookup[0]->printHeaders(out);
                
@@ -486,7 +486,7 @@ int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup)
         
         
         //save mothurOut's binLabels to restore for next label
-               m->currentBinLabels = saveBinLabels;
+               m->currentSharedBinLabels = saveBinLabels;
         
         for (int j = 0; j < filteredLookup.size(); j++) { delete filteredLookup[j]; }
                
index 5fbb78131ed3ceda425516bdcd2766f3c1eb196f..2a81aceaec825d29de3a2373d461466fb28aa93c 100644 (file)
@@ -352,14 +352,14 @@ int GetCoreMicroBiomeCommand::createTable(vector<SharedRAbundFloatVector*>& look
                 for (int k = 0; k < counts[j]; k++) { table[j][k]++; }
                 
                 if ((abund == -1) && (samples != -1)) { //we want all OTUs with this number of samples
-                    if (counts[j] >= samples) { otuNames[j].push_back(m->currentBinLabels[i]); }
+                    if (counts[j] >= samples) { otuNames[j].push_back(m->currentSharedBinLabels[i]); }
                 }else if ((abund != -1) && (samples == -1)) { //we want all OTUs with this relabund
                     if (j == abund) {  
-                        for (int k = 0; k < counts[j]; k++) {  otuNames[k+1].push_back(m->currentBinLabels[i]); }
+                        for (int k = 0; k < counts[j]; k++) {  otuNames[k+1].push_back(m->currentSharedBinLabels[i]); }
                     }
                 }else if ((abund != -1) && (samples != -1)) { //we want only OTUs with this relabund for this number of samples
                     if ((j == abund) && (counts[j] >= samples)) {  
-                        otuNames[j].push_back(m->currentBinLabels[i]); 
+                        otuNames[j].push_back(m->currentSharedBinLabels[i]); 
                     }
                 }
             }
index 15dcbba802d5e850fccbb7bb71b05dba6af38b74..bb15a3f5a21e3c7785283cfc5d6eade2c1bda6ed 100644 (file)
@@ -68,7 +68,7 @@ string GetGroupsCommand::getOutputPattern(string type) {
         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "list")        {   pattern = "[filename],[tag],pick,[extension]";    }
         else if (type == "shared")      {   pattern = "[filename],[tag],pick,[extension]";    }
         else if (type == "design")      {   pattern = "[filename],pick,[extension]";    }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
@@ -585,10 +585,6 @@ int GetGroupsCommand::readList(){
         map<string, string> variables; 
         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);
-               
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
                
                ifstream in;
                m->openInputFile(listfile, in);
@@ -599,9 +595,19 @@ int GetGroupsCommand::readList(){
                while(!in.eof()){
                        
                        selectedCount = 0;
-                       
+
                        //read in list vector
                        ListVector list(in);
+            
+            variables["[tag]"] = list.getLabel();
+            string outputFileName = getOutputFileName("list", variables);
+                       
+                       ofstream out;
+                       m->openOutputFile(outputFileName, out);
+                       outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+            
+            vector<string> binLabels = list.getLabels();
+            vector<string> newBinLabels;
                        
                        //make a new list vector
                        ListVector newList;
@@ -613,13 +619,14 @@ int GetGroupsCommand::readList(){
                                
                                //parse out names that are in accnos file
                                string binnames = list.get(i);
+                vector<string> thisBinNames;
+                m->splitAtComma(binnames, thisBinNames);
                                
                                string newNames = "";
-                               while (binnames.find_first_of(',') != -1) { 
-                                       string name = binnames.substr(0,binnames.find_first_of(','));
-                                       binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-                                       
-                                       //if that name is in the .accnos file, add it
+                for (int j = 0; j < thisBinNames.size(); j++) {
+                    string name = thisBinNames[j];
+                    
+                    //if that name is in the .accnos file, add it
                                        if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++;  }
                                        else{
                                                //if you are not in the accnos file check if you are a name that needs to be changed
@@ -629,39 +636,30 @@ int GetGroupsCommand::readList(){
                                                        selectedCount++;
                                                }
                                        }
-                               }
-                               
-                               //get last name
-                               if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  }
-                               else{
-                                       //if you are not in the accnos file check if you are a name that needs to be changed
-                                       map<string, string>::iterator it = uniqueToRedundant.find(binnames);
-                                       if (it != uniqueToRedundant.end()) {
-                                               newNames += it->second + ",";
-                                               selectedCount++;
-                                       }
-                               }
-                               
+                }
+                                                               
                                //if there are names in this bin add to new list
                                if (newNames != "") {  
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newBinLabels.push_back(binLabels[i]);
                                }
                        }
                        
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                newList.setLabels(newBinLabels);
+                newList.printHeaders(out);
                                newList.print(out);
                        }
                        
                        m->gobble(in);
+            out.close();
                }
-               in.close();     
-               out.close();
+               in.close();
                
                if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
-               outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
                
index 862aef7a0da6dde200bc387209bdd73d730015f5..ec5ef61233d7cfa24ef1b9b9d7debf355a5820ba 100644 (file)
@@ -75,7 +75,7 @@ string GetLineageCommand::getOutputPattern(string type) {
         else if (type == "name")            {   pattern = "[filename],pick,[extension]";    }
         else if (type == "group")           {   pattern = "[filename],pick,[extension]";    }
         else if (type == "count")           {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "list")            {   pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]";    }
+        else if (type == "list")            {   pattern = "[filename],[distance],pick,[extension]";    }
         else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
         else if (type == "alignreport")     {   pattern = "[filename],pick.align.report";    }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
@@ -537,18 +537,13 @@ int GetLineageCommand::readList(){
                map<string, string> variables; 
         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               
+                               
                ifstream in;
                m->openInputFile(listfile, in);
                
                bool wroteSomething = false;
                
                while(!in.eof()){
-                       
-                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
 
                        //read in list vector
                        ListVector list(in);
@@ -556,6 +551,18 @@ int GetLineageCommand::readList(){
                        //make a new list vector
                        ListVector newList;
                        newList.setLabel(list.getLabel());
+            
+            variables["[distance]"] = list.getLabel();
+            string outputFileName = getOutputFileName("list", variables);
+                       
+                       ofstream out;
+                       m->openOutputFile(outputFileName, out);
+                       outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+            
+            if (m->control_pressed) { in.close(); out.close(); return 0; }
+            
+            vector<string> binLabels = list.getLabels();
+            vector<string> newBinLabels;
                        
                        //for each bin
                        for (int i = 0; i < list.getNumBins(); i++) {
@@ -576,23 +583,26 @@ int GetLineageCommand::readList(){
                                //if there are names in this bin add to new list
                                if (newNames != "") { 
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newBinLabels.push_back(binLabels[i]);
                                }
                        }
                                
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                               newList.setLabels(newBinLabels);
+                newList.printHeaders(out);
                                newList.print(out);
                        }
                        
                        m->gobble(in);
+            out.close();
                }
                in.close();     
-               out.close();
+               
                
                if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
-               outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
                
                return 0;
 
@@ -615,6 +625,8 @@ int GetLineageCommand::readConsList(){
         bool wroteSomething = false;
         string snumBins = toString(list->getNumBins());
         
+        vector<string> binLabels = list->getLabels();
+        vector<string> newBinLabels;
         for (int i = 0; i < list->getNumBins(); i++) {
             
             if (m->control_pressed) { delete list; return 0;}
@@ -631,6 +643,7 @@ int GetLineageCommand::readConsList(){
             if (names.count(m->getSimpleLabel(otuLabel)) != 0) {
                                selectedCount++;
                 newList.push_back(list->get(i));
+                newBinLabels.push_back(binLabels[i]);
             }
         }
         
@@ -648,6 +661,8 @@ int GetLineageCommand::readConsList(){
         //print new listvector
         if (newList.getNumBins() != 0) {
             wroteSomething = true;
+            newList.setLabels(newBinLabels);
+            newList.printHeaders(out);
             newList.print(out);
         }
                out.close();
@@ -767,9 +782,9 @@ int GetLineageCommand::readShared(){
             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
             
             //is this otu on the list
-            if (names.count(m->getSimpleLabel(m->currentBinLabels[i])) != 0) {
+            if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) {
                 numSelected++; wroteSomething = true;
-                newLabels.push_back(m->currentBinLabels[i]);
+                newLabels.push_back(m->currentSharedBinLabels[i]);
                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
                 }
@@ -789,7 +804,7 @@ int GetLineageCommand::readShared(){
         
                for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
         
-        m->currentBinLabels = newLabels;
+        m->currentSharedBinLabels = newLabels;
         
                newLookup[0]->printHeaders(out);
                
index a4d7b83397f83bf6ae3dd21627566252035b328f..2f194a8068555fb9f962e725bd9eec977d2fa174 100644 (file)
@@ -260,19 +260,20 @@ void GetListCountCommand::process(ListVector* list) {
                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                
                //for each bin in the list vector
+        vector<string> binLabels = list->getLabels();
                for (int i = 0; i < list->getNumBins(); i++) {
                        if (m->control_pressed) { break; }
                        
                        binnames = list->get(i);
                        
                        if (sort == "otu") {
-                               out << i+1 << '\t' << binnames << endl;
+                               out << binLabels[i] << '\t' << binnames << endl;
                        }else{ //sort = name
                                vector<string> names;
                                m->splitAtComma(binnames, names);
                                
                                for (int j = 0; j < names.size(); j++) {
-                                       out << names[j] << '\t' << i+1 << endl;
+                                       out << names[j] << '\t' << binLabels[i] << endl;
                                }
                        }
                }
index 8f78ca2a45fce56f6ef8996c880b48677ae32357..08cd35a9470406ba0d4827b7e3b10a94f7de94ad 100644 (file)
@@ -577,7 +577,7 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
             outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
             
             findQ.printZMatrix(matrixName, thisGroups);
-            findQ.printRelAbund(relabund, m->currentBinLabels);
+            findQ.printRelAbund(relabund, m->currentSharedBinLabels);
             
             if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
                 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
index d7b163e93f1003fdb0dcd73bdb5984de2eee208b..7697430cca6164b695ed92526dd190926ec1eb5e 100644 (file)
@@ -146,7 +146,7 @@ static DWORD WINAPI MyMetaCommunityThreadFunction(LPVOID lpParam){
             pDataArray->outputNames.push_back(pDataArray->matrix[i]);
             
             findQ->printZMatrix(pDataArray->matrix[i], pDataArray->m->getGroups());
-            findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentBinLabels);
+            findQ->printRelAbund(pDataArray->relabunds[i], pDataArray->m->currentSharedBinLabels);
             
             if(pDataArray->optimizegap != -1 && (numPartitions - pDataArray->minPartition) >= pDataArray->optimizegap && numPartitions >= pDataArray->minpartitions){ break; }
             
index 00297e2d8049be362a991589446544b6c1cf99fc..a1413d244171ea6fa6d68c58969cce4afa86d1b6 100644 (file)
@@ -465,9 +465,9 @@ int GetOtuLabelsCommand::readShared(){
             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
             
             //is this otu on the list
-            if (labels.count(m->getSimpleLabel(m->currentBinLabels[i])) != 0) {
+            if (labels.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) {
                 numSelected++; wroteSomething = true;
-                newLabels.push_back(m->currentBinLabels[i]);
+                newLabels.push_back(m->currentSharedBinLabels[i]);
                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
                 }
@@ -487,7 +487,7 @@ int GetOtuLabelsCommand::readShared(){
         
                for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
         
-        m->currentBinLabels = newLabels;
+        m->currentSharedBinLabels = newLabels;
         
                newLookup[0]->printHeaders(out);
                
@@ -521,24 +521,17 @@ int GetOtuLabelsCommand::readList(){
         newList.setLabel(list->getLabel());
         int selectedCount = 0;
         bool wroteSomething = false;
-        string snumBins = toString(list->getNumBins());
         
+        vector<string> binLabels = list->getLabels();
+        vector<string> newLabels;
         for (int i = 0; i < list->getNumBins(); i++) {
             
             if (m->control_pressed) { delete list; return 0;}
             
-            //create a label for this otu
-            string otuLabel = "Otu";
-            string sbinNumber = toString(i+1);
-            if (sbinNumber.length() < snumBins.length()) { 
-                int diff = snumBins.length() - sbinNumber.length();
-                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
-            }
-            otuLabel += sbinNumber; 
-            
-            if (labels.count(m->getSimpleLabel(otuLabel)) != 0) {
+            if (labels.count(m->getSimpleLabel(binLabels[i])) != 0) {
                                selectedCount++;
                 newList.push_back(list->get(i));
+                newLabels.push_back(binLabels[i]);
             }
         }
         
@@ -556,6 +549,8 @@ int GetOtuLabelsCommand::readList(){
         //print new listvector
         if (newList.getNumBins() != 0) {
             wroteSomething = true;
+            newList.setLabels(newLabels);
+            newList.printHeaders(out);
             newList.print(out);
         }
                out.close();
index 7e71a1a836b2e6306b40d67f5cdce0d7f35dbcdc..61092372b18ae196a652d0b0f66c6b04d22d0266 100644 (file)
@@ -23,7 +23,7 @@ inline bool compareName(repStruct left, repStruct right){
 //********************************************************************************************************************
 //sorts lowest to highest
 inline bool compareBin(repStruct left, repStruct right){
-       return (left.bin < right.bin);  
+       return (left.simpleBin < right.simpleBin);
 }
 //********************************************************************************************************************
 //sorts lowest to highest
@@ -979,6 +979,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                }
                
                //for each bin in the list vector
+        vector<string> binLabels = processList->getLabels();
                for (int i = 0; i < processList->size(); i++) {
                        if (m->control_pressed) { 
                                out.close();  
@@ -999,7 +1000,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                        
                        if (Groups.size() == 0) {
                                nameRep = findRep(namesInBin, "");
-                               newNamesOutput << i << '\t' << nameRep << '\t';
+                               newNamesOutput << binLabels[i] << '\t' << nameRep << '\t';
                 
                 //put rep at first position in names line
                 string outputString = nameRep + ",";
@@ -1042,7 +1043,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                                                nameRep = findRep(NamesInGroup[Groups[j]], Groups[j]);
                                                
                                                //output group rep and other members of this group
-                                               (*(filehandles[Groups[j]])) << i << '\t' << nameRep << '\t';
+                                               (*(filehandles[Groups[j]])) << binLabels[i] << '\t' << nameRep << '\t';
                                                
                         //put rep at first position in names line
                         string outputString = nameRep + ",";
@@ -1100,7 +1101,6 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*
                ifstream in;
                m->openInputFile(filename, in);
                
-               int i = 0;
         string tempGroup = "";
         in >> tempGroup; m->gobble(in);
         
@@ -1112,8 +1112,8 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*
     
         int thistotal = 0;
                while (!in.eof()) {
-                       string rep, binnames;
-                       in >> i >> rep >> binnames; m->gobble(in);
+                       string rep, binnames, binLabel;
+                       in >> binLabel >> rep >> binnames; m->gobble(in);
                        
                        vector<string> names;
                        m->splitAtComma(binnames, names);
@@ -1178,7 +1178,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*
 
                        if (sequence != "not found") {
                                if (sorted == "") { //print them out
-                                       rep = rep + "\t" + toString(i+1);
+                                       rep = rep + "\t" + binLabel;
                                        rep = rep + "|" + toString(binsize);
                                        if (group != "") {
                                                rep = rep + "|" + group;
@@ -1186,7 +1186,9 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*
                                        out << ">" << rep << endl;
                                        out << sequence << endl;
                                }else { //save them
-                                       repStruct newRep(rep, i+1, binsize, group);
+                    int simpleLabel;
+                    m->mothurConvert(m->getSimpleLabel(binLabel), simpleLabel);
+                                       repStruct newRep(rep, binLabel, simpleLabel, binsize, group);
                                        reps.push_back(newRep);
                                }
                        }else { 
@@ -1204,7 +1206,7 @@ int GetOTURepCommand::processFastaNames(string filename, string label, FastaMap*
                        //print them
                        for (int i = 0; i < reps.size(); i++) {
                                string sequence = fasta->getSequence(reps[i].name);
-                               string outputName = reps[i].name + "\t" + toString(reps[i].bin);
+                               string outputName = reps[i].name + "\t" + reps[i].bin;
                                outputName = outputName + "|" + toString(reps[i].size);
                                if (reps[i].group != "") {
                                        outputName = outputName + "|" + reps[i].group;
@@ -1245,7 +1247,6 @@ int GetOTURepCommand::processNames(string filename, string label) {
                ifstream in;
                m->openInputFile(filename, in);
                
-               int i = 0;
                string rep, binnames;
         
         string tempGroup = "";
@@ -1259,7 +1260,8 @@ int GetOTURepCommand::processNames(string filename, string label) {
         
                while (!in.eof()) {
                        if (m->control_pressed) { break; }
-                       in >> i >> rep >> binnames; m->gobble(in);
+            string binLabel;
+                       in >> binLabel >> rep >> binnames; m->gobble(in);
             
                        if (countfile == "") { out2 << rep << '\t' << binnames << endl; }
             else {
index ca3439d809f96f6eff93dcf3948abd1a8274a7f5..daf5bc16cbd3640093502d315af1f35008300ef0 100644 (file)
@@ -24,12 +24,13 @@ typedef map<int, float> SeqMap;
 
 struct repStruct {
                string name;
-               int     bin;
+               string bin;
+        int simpleBin;
                int size;
                string group;
                
                repStruct(){}
-               repStruct(string n, int b, int s, string g) : name(n), bin(b), size(s), group(g) {}
+               repStruct(string n, string b, int sb, int s, string g) : name(n), bin(b), size(s), group(g), simpleBin(sb) { }
                ~repStruct() {}
 };
 
index 335ebc4506ca99ac3a0e3dc9a2d068b0f1c41cd7..3ca7f5d575db0a7e3a065ebcf371128698d5056b 100644 (file)
@@ -244,9 +244,16 @@ int GetOtusCommand::execute(){
 //**********************************************************************************************************************
 int GetOtusCommand::readListGroup(){
        try {
-               string thisOutputDir = outputDir;
+               InputData* input = new InputData(listfile, "list");
+               ListVector* list = input->getListVector();
+               string lastLabel = list->getLabel();
+               
+               //using first label seen if none is provided
+               if (label == "") { label = lastLabel; }
+        
+        string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
-        map<string, string> variables; 
+        map<string, string> variables;
         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[tag]"] = label;
         variables["[extension]"] = m->getExtension(listfile);
@@ -254,8 +261,8 @@ int GetOtusCommand::readListGroup(){
                
                ofstream out;
                m->openOutputFile(outputFileName, out);
-               
-               string GroupOutputDir = outputDir;
+        
+        string GroupOutputDir = outputDir;
                if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
         variables["[filename]"] = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile));
         variables["[extension]"] = m->getExtension(groupfile);
@@ -263,13 +270,7 @@ int GetOtusCommand::readListGroup(){
                
                ofstream outGroup;
                m->openOutputFile(outputGroupFileName, outGroup);
-                       
-               InputData* input = new InputData(listfile, "list");
-               ListVector* list = input->getListVector();
-               string lastLabel = list->getLabel();
-               
-               //using first label seen if none is provided
-               if (label == "") { label = lastLabel; }
+
                
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> labels; labels.insert(label);
@@ -366,6 +367,8 @@ int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream
                
                int numOtus = 0;
                //for each bin
+        vector<string> binLabels = list->getLabels();
+        vector<string> newBinLabels;
                for (int i = 0; i < list->getNumBins(); i++) {
                        if (m->control_pressed) { return 0; }
                        
@@ -399,7 +402,8 @@ int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream
                        
                        //if there are sequences from the groups we want in this bin add to new list, output to groupfile
                        if (keepBin) {  
-                               newList.push_back(binnames);    
+                               newList.push_back(binnames);
+                newBinLabels.push_back(binLabels[i]);
                                outGroup << groupFileOutput;
                                numOtus++;
                        }
@@ -408,7 +412,9 @@ int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream
                //print new listvector
                if (newList.getNumBins() != 0) {
                        wroteSomething = true;
-                       newList.print(out);
+                       newList.setLabels(newBinLabels);
+            newList.printHeaders(out);
+            newList.print(out);
                }
                
                m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
index f36f1ba23a6af6c9110dc0aac103054ceefedaff..791f5631a680fe17968eeec63762388c288ba162 100644 (file)
@@ -192,7 +192,7 @@ int GetRelAbundCommand::execute(){
                        if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
 
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                               if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
                                getRelAbundance(lookup, out);
                                
                                processedLabels.insert(lookup[0]->getLabel());
@@ -205,7 +205,7 @@ int GetRelAbundCommand::execute(){
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
                                lookup = input->getSharedRAbundVectors(lastLabel);
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                               if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
                                getRelAbundance(lookup, out);
                                
                                processedLabels.insert(lookup[0]->getLabel());
@@ -246,7 +246,7 @@ int GetRelAbundCommand::execute(){
                        lookup = input->getSharedRAbundVectors(lastLabel);
                        
                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                       if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
                        getRelAbundance(lookup, out);
                        
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
index 82f9710e6e40cb84b8fee9f285fca71b3fe0099c..a1cf493f9e515f91ed962726b392426db9efd978 100644 (file)
@@ -16,6 +16,7 @@
 vector<string> GetSeqsCommand::setParameters(){        
        try {
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
+        CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
                CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
@@ -42,9 +43,9 @@ vector<string> GetSeqsCommand::setParameters(){
 string GetSeqsCommand::getHelpString(){        
        try {
                string helpString = "";
-               helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
+               helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
                helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
-               helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
+               helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, fastq and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
                helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
                helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
                helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
@@ -64,6 +65,7 @@ GetSeqsCommand::GetSeqsCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
+        outputTypes["fastq"] = tempOutNames;
                outputTypes["taxonomy"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
@@ -84,11 +86,12 @@ string GetSeqsCommand::getOutputPattern(string type) {
         string pattern = "";
         
         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "list")        {   pattern = "[filename],[distance],pick,[extension]";    }
         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "accnosreport")      {   pattern = "[filename],pick.accnos.report";    }
         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
@@ -127,6 +130,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
+            outputTypes["fastq"] = tempOutNames;
                        outputTypes["taxonomy"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
@@ -223,6 +227,14 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["count"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("fastq");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -273,6 +285,10 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (qualfile == "not open") { abort = true; }
                        else if (qualfile == "not found") {  qualfile = "";  }
                        else { m->setQualFile(qualfile); }
+            
+            fastqfile = validParameter.validFile(parameters, "fastq", true);
+                       if (fastqfile == "not open") { abort = true; }
+                       else if (fastqfile == "not found") {  fastqfile = "";  }
                        
                        accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
                        if (accnosfile2 == "not open") { abort = true; }
@@ -296,7 +312,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
                        dups = m->isTrue(temp);
                        
-                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality, fastq or listfile."); m->mothurOutEndLine(); abort = true; }
             
             if (countfile == "") {
                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
@@ -333,6 +349,7 @@ int GetSeqsCommand::execute(){
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();                     }
                if (fastafile != "")            {               readFasta();            }
+        if (fastqfile != "")           {               readFastq();            }
                if (groupfile != "")            {               readGroup();            }
         if (countfile != "")           {               readCount();            }
                if (alignfile != "")            {               readAlign();            }
@@ -398,6 +415,71 @@ int GetSeqsCommand::execute(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int GetSeqsCommand::readFastq(){
+       try {
+               bool wroteSomething = false;
+               int selectedCount = 0;
+        
+               ifstream in;
+               m->openInputFile(fastqfile, in);
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastqfile);  }
+               map<string, string> variables;
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
+        variables["[extension]"] = m->getExtension(fastqfile);
+               string outputFileName = getOutputFileName("fastq", variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+
+               
+               while(!in.eof()){
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+                       
+                       //read sequence name
+                       string input = m->getline(in); m->gobble(in);
+                       
+            string outputString = input + "\n";
+            
+                       if (input[0] == '@') {
+                //get rest of lines
+                outputString += m->getline(in) + "\n"; m->gobble(in);
+                outputString += m->getline(in) + "\n"; m->gobble(in);
+                outputString += m->getline(in) + "\n"; m->gobble(in);
+                
+                vector<string> splits = m->splitWhiteSpace(input);
+                string name = splits[0];
+                name = name.substr(1);
+                m->checkName(name);
+                
+                if (names.count(name) != 0) {
+                                       wroteSomething = true;
+                    selectedCount++;
+                    out << outputString;
+                }
+            }
+            
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               
+               if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
+               outputNames.push_back(outputFileName);  outputTypes["fastq"].push_back(outputFileName);
+               
+               m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
+               
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetSeqsCommand", "readFastq");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 int GetSeqsCommand::readFasta(){
@@ -614,9 +696,6 @@ int GetSeqsCommand::readList(){
         map<string, string> variables; 
                variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
                
                ifstream in;
                m->openInputFile(listfile, in);
@@ -629,8 +708,6 @@ int GetSeqsCommand::readList(){
                while(!in.eof()){
                        
                        selectedCount = 0;
-                       
-                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
 
                        //read in list vector
                        ListVector list(in);
@@ -638,6 +715,18 @@ int GetSeqsCommand::readList(){
                        //make a new list vector
                        ListVector newList;
                        newList.setLabel(list.getLabel());
+            
+            variables["[distance]"] = list.getLabel();
+            string outputFileName = getOutputFileName("list", variables);
+                       
+                       ofstream out;
+                       m->openOutputFile(outputFileName, out);
+                       outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+            
+            vector<string> binLabels = list.getLabels();
+            vector<string> newBinLabels;
+            
+            if (m->control_pressed) { in.close(); out.close();  return 0; }
                        
                        //for each bin
                        for (int i = 0; i < list.getNumBins(); i++) {
@@ -648,8 +737,8 @@ int GetSeqsCommand::readList(){
                 m->splitAtComma(binnames, bnames);
                                
                                string newNames = "";
-                for (int i = 0; i < bnames.size(); i++) {
-                                       string name = bnames[i];
+                for (int j = 0; j < bnames.size(); j++) {
+                                       string name = bnames[j];
                                        //if that name is in the .accnos file, add it
                                        if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
                                }
@@ -657,23 +746,26 @@ int GetSeqsCommand::readList(){
                                //if there are names in this bin add to new list
                                if (newNames != "") { 
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newBinLabels.push_back(binLabels[i]);
                                }
                        }
                                
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                               newList.setLabels(newBinLabels);
+                newList.printHeaders(out);
                                newList.print(out);
                        }
                        
                        m->gobble(in);
+            out.close();
                }
                in.close();     
-               out.close();
+               
                
                if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
-               outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
                
                m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
                
index c5b6ca4141afc00c12b646ea34604b898b1c4131..9895432b7c25d6016bbf6b38ced773487aeb0201 100644 (file)
@@ -36,13 +36,14 @@ class GetSeqsCommand : public Command {
        private:
                set<string> names;
                vector<string> outputNames;
-               string accnosfile, accnosfile2, fastafile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
+               string accnosfile, accnosfile2, fastafile, fastqfile, namefile, countfile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
         map<string, string> uniqueMap;
         //for debug
         map<string, set<string> > sanity; //maps file type to names chosen for file. something like "fasta" -> vector<string>. If running in debug mode this is filled and we check to make sure all the files have the same names. If they don't we output the differences for the user.
                
                int readFasta();
+        int readFastq();
                int readName();
                int readGroup();
         int readCount();
index 1074d323e9aaeae08b69a2d546fdb8cae544d330..9302d1d21dfc52dbe295754e38aa86de55e0a4cc 100644 (file)
@@ -431,6 +431,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                int num = 0;
                                
                //go through each bin, find out if shared
+        vector<string> binLabels = shared->getLabels();
                for (int i = 0; i < shared->getNumBins(); i++) {
                        if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
                        
@@ -452,7 +453,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                                //find group
                                string seqGroup = groupMap->getGroup(name);
                                if (output != "accnos") {
-                                       namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + toString(i+1)));
+                                       namesOfSeqsInThisBin.push_back((name + "|" + seqGroup + "|" + binLabels[i]));
                                }else {  namesOfSeqsInThisBin.push_back(name);  }
                                
                                if (seqGroup == "not found") { m->mothurOut(name + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1);  }
@@ -691,7 +692,7 @@ int GetSharedOTUCommand::process(vector<SharedRAbundVector*>& lookup) {
                        
                        for(int j = 0; j < lookup.size(); j++) {
                                string seqGroup = lookup[j]->getGroup();
-                string name = m->currentBinLabels[i];
+                string name = m->currentSharedBinLabels[i];
                                
                 if (lookup[j]->getAbundance(i) != 0) {
                     if (output != "accnos") {
index b991ccd6289b9afd44628c12bf451eb86529fa44..06d7260462878fc134268dba18ff0692515c80f8 100644 (file)
@@ -315,7 +315,9 @@ int HClusterCommand::execute(){
                }else{
                        m->mothurOut("Error: no list vector!"); m->mothurOutEndLine(); return 0;
                }
-               
+        
+        list->printHeaders(listFile);
+        
                float previousDist = 0.00000;
                float rndPreviousDist = 0.00000;
                oldRAbund = *rabund;
index 367342c7167451402c5879a1f6efd1f83e7a48a4..514c7af15312b60d4787bb0aa659dbd9f1ccaf08 100644 (file)
@@ -119,7 +119,7 @@ string HeatMap::getPic(vector<SharedRAbundVector*> lookup) {
                }
                
                //sort lookup so shared bins are on top
-        vector<string> sortedLabels = m->currentBinLabels;
+        vector<string> sortedLabels = m->currentSharedBinLabels;
                if (sorted != "none") {  sortedLabels = sortSharedVectors(lookup);  }
                
                vector<vector<string> > scaleRelAbund;
@@ -221,7 +221,7 @@ vector<string> HeatMap::sortSharedVectors(vector<SharedRAbundVector*>& lookup){
                map<int, int> place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2.
                map<int, int>::iterator it;
         
-        vector<string> sortedLabels = m->currentBinLabels;
+        vector<string> sortedLabels = m->currentSharedBinLabels;
                
                /****************** find order of otus **********************/
                if (sorted == "shared") {
@@ -254,7 +254,7 @@ vector<string> HeatMap::sortSharedVectors(vector<SharedRAbundVector*>& lookup){
                                int newAbund = looktemp[j]->getAbundance(i);                                                                                            // 1 -> 3
                                lookup[j]->set(place[i], newAbund, looktemp[j]->getGroup()); //binNumber, abundance, group
                        }
-            sortedLabels[place[i]] = m->currentBinLabels[i];
+            sortedLabels[place[i]] = m->currentSharedBinLabels[i];
                }
                
                //delete looktemp -- Sarah look at - this is causing segmentation faults
@@ -430,7 +430,7 @@ string HeatMap::getPic(vector<SharedRAbundFloatVector*> lookup) {
                }
                
                //sort lookup so shared bins are on top
-               vector<string> sortedLabels = m->currentBinLabels;
+               vector<string> sortedLabels = m->currentSharedBinLabels;
                if (sorted != "none") {  sortedLabels = sortSharedVectors(lookup);  }
                
                vector<vector<string> > scaleRelAbund;
@@ -532,7 +532,7 @@ vector<string> HeatMap::sortSharedVectors(vector<SharedRAbundFloatVector*>& look
                map<int, int> place; //spot in lookup where you insert shared by, ie, 3 -> 2 if they are shared by 3 inset into location 2.
                map<int, int>::iterator it;
         
-        vector<string> sortedLabels = m->currentBinLabels;
+        vector<string> sortedLabels = m->currentSharedBinLabels;
                
                /****************** find order of otus **********************/
                if (sorted == "shared") {
@@ -564,7 +564,7 @@ vector<string> HeatMap::sortSharedVectors(vector<SharedRAbundFloatVector*>& look
                        for (int j = 0; j < looktemp.size(); j++) {                                                                                                             // 3 -> 2
                                float newAbund = looktemp[j]->getAbundance(i);                                                                                          // 1 -> 3
                                lookup[j]->set(place[i], newAbund, looktemp[j]->getGroup()); //binNumber, abundance, group
-                sortedLabels[place[i]] = m->currentBinLabels[i];
+                sortedLabels[place[i]] = m->currentSharedBinLabels[i];
                        }
                }
                
index fd818acb44c4c5280c6a54971d3d39413307861d..ab6e67027b1fe85ff97e2e7d40c1b6f9ff3ff1bc 100644 (file)
@@ -490,17 +490,17 @@ int IndicatorCommand::GetIndicatorSpecies(){
                                
                        if (m->control_pressed) { out.close(); return 0; }
                        
-                       out << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; 
+                       out << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t';
                        
                        if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } 
                        else { out << "<" << (1/(float)iters) << endl; }
                        
                        if (pValues[j] <= 0.05) {
-                               cout << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j]  << '\t';
+                               cout << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j]  << '\t';
                                string pValueString = "<" + toString((1/(float)iters)); 
                                if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} 
                                else { cout << "<" << (1/(float)iters); }
-                               m->mothurOutJustToLog(m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
+                               m->mothurOutJustToLog(m->currentSharedBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
                                m->mothurOutEndLine(); 
                        }
                }
@@ -538,7 +538,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                
                //print headings
                out << "TreeNode\t";
-               for (int i = 0; i < numBins; i++) { out << m->currentBinLabels[i] << "_IndGroups" << '\t' << m->currentBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; }
+               for (int i = 0; i < numBins; i++) { out << m->currentSharedBinLabels[i] << "_IndGroups" << '\t' << m->currentSharedBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; }
                out << endl;
                
                m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n");
@@ -697,11 +697,11 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                }
                                
                                if (pValues[j] <= 0.05) {
-                                       cout << i+1 << '\t' << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j]  << '\t';
+                                       cout << i+1 << '\t' << m->currentSharedBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j]  << '\t';
                                        string pValueString = "<" + toString((1/(float)iters)); 
                                        if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} 
                                        else { cout << "<" << (1/(float)iters); }
-                                       m->mothurOutJustToLog(toString(i) + "\t" + m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
+                                       m->mothurOutJustToLog(toString(i) + "\t" + m->currentSharedBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString);
                                        m->mothurOutEndLine(); 
                                }
                        }
index 777444bc09749c6aaa7240448e8d777da8c14317..dc297e11d3ed61202a8e4bcb001686ea9475e52c 100644 (file)
@@ -307,7 +307,7 @@ int KruskalWallisCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap
             double H = linear.calcKruskalWallis(values, pValue);
             
             //output H and signifigance
-            out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
+            out << m->currentSharedBinLabels[i] << '\t' << H << '\t' << pValue << endl;
         }
         out.close();
                 
index 8d1768c2b1d87bc840de743cc5d1dbdc0801b7da..0403c910705313432fa7320150326927d03c783b 100644 (file)
@@ -1009,8 +1009,8 @@ int LefseCommand::printResults(vector< vector<double> > means, map<int, double>
             if (maxMean > logMaxMean) { logMaxMean = maxMean; }
             logMaxMean = log10(logMaxMean);
             
-            out << m->currentBinLabels[i] << '\t' << logMaxMean << '\t';
-            if (m->debug) { temp = m->currentBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; }
+            out << m->currentSharedBinLabels[i] << '\t' << logMaxMean << '\t';
+            if (m->debug) { temp = m->currentSharedBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; }
             
             map<int, double>::iterator it = sigLDA.find(i);
             if (it != sigLDA.end()) {
@@ -1040,7 +1040,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector<double> >& adjustedLook
         for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
             if (m->control_pressed) { break; }
             
-            cout << m->currentBinLabels[it->first] << " <- c(";
+            cout << m->currentSharedBinLabels[it->first] << " <- c(";
             for (int h = 0; h < rand_s.size()-1; h++) {  cout << (adjustedLookup[count][rand_s[h]]) << ", "; }
             cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n";
             count++;
@@ -1096,7 +1096,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector<double> >& adjustedLook
         for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
             if (m->control_pressed) { break; }
             
-            tempOutput += "\"" + m->currentBinLabels[it->first] + "\"=" + m->currentBinLabels[it->first] + ",";
+            tempOutput += "\"" + m->currentSharedBinLabels[it->first] + "\"=" + m->currentSharedBinLabels[it->first] + ",";
         }
         //tempOutput = tempOutput.substr(0, tempOutput.length()-1);
         tempOutput += " class=treatments";
@@ -1109,7 +1109,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector<double> >& adjustedLook
         for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
             if (m->control_pressed) { break; }
             
-            tempOutput +=  m->currentBinLabels[it->first] + "+";
+            tempOutput +=  m->currentSharedBinLabels[it->first] + "+";
         }
         tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign
         tempOutput += "), data = dat, tol = 1e-10))";
@@ -1168,7 +1168,7 @@ int LefseCommand::makeShared(int numDesignLines) {
             lookup.push_back(temp);
         }
         
-        m->currentBinLabels.clear();
+        m->currentSharedBinLabels.clear();
         int count = 0;
         while (!in.eof()) {
             if (m->control_pressed) { return 0; }
@@ -1189,7 +1189,7 @@ int LefseCommand::makeShared(int numDesignLines) {
                     lookup[i-1]->push_back(value, toString(i-1));
                     //cout << pieces[i] << '\t';
                 }
-                m->currentBinLabels.push_back(toString(count));
+                m->currentSharedBinLabels.push_back(toString(count));
                 //m->currentBinLabels.push_back(pieces[0]);
                 //cout << line<< endl;
                 //cout << endl;
index 037c8225cfbc96f4b3ea3986ef16bf961a9afec1..512dd6da763241d47d727dd10efcce3a68eab2b8 100644 (file)
@@ -461,7 +461,7 @@ int ListOtuLabelsCommand::createList(vector<SharedRAbundVector*>& lookup){
                ofstream out;
                m->openOutputFile(outputFileName, out);
         
-        for (int i = 0; i < m->currentBinLabels.size(); i++) {  out << m->currentBinLabels[i] << endl;  }
+        for (int i = 0; i < m->currentSharedBinLabels.size(); i++) {  out << m->currentSharedBinLabels[i] << endl;  }
         
         out.close();
         
@@ -485,7 +485,7 @@ int ListOtuLabelsCommand::createList(vector<SharedRAbundFloatVector*>& lookup){
                ofstream out;
                m->openOutputFile(outputFileName, out);
         
-        for (int i = 0; i < m->currentBinLabels.size(); i++) {  out << m->currentBinLabels[i] << endl;  }
+        for (int i = 0; i < m->currentSharedBinLabels.size(); i++) {  out << m->currentSharedBinLabels[i] << endl;  }
         
         out.close();
         
@@ -507,20 +507,8 @@ int ListOtuLabelsCommand::createList(ListVector*& list){
                ofstream out;
                m->openOutputFile(outputFileName, out);
         
-        string snumBins = toString(list->getNumBins());
-        for (int i = 0; i < list->getNumBins(); i++) {
-            if (m->control_pressed) { break; }
-            
-            string otuLabel = "Otu";
-            string sbinNumber = toString(i+1);
-            if (sbinNumber.length() < snumBins.length()) { 
-                int diff = snumBins.length() - sbinNumber.length();
-                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
-            }
-            otuLabel += sbinNumber; 
-            
-            out << otuLabel << endl;
-        }
+        vector<string> binLabels = list->getLabels();
+        for (int i = 0; i < binLabels.size(); i++) {  out << binLabels[i] << endl;  }
 
         out.close();
         
index 343d4fdc6399f8ca9a91c6cac9b31313d7b76bbb..0ead54a90cf2ef44205947baddecb39b21e6b8f7 100644 (file)
@@ -16,6 +16,7 @@
 //**********************************************************************************************************************
 vector<string> ListSeqsCommand::setParameters(){       
        try {
+        CommandParameter pfastq("fastq", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfastq);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pname);
         CommandParameter pcount("count", "InputTypes", "", "", "FNGLT", "FNGLT", "none","accnos",false,false,true); parameters.push_back(pcount);
@@ -39,8 +40,8 @@ vector<string> ListSeqsCommand::setParameters(){
 string ListSeqsCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy or alignreport file and outputs a .accnos file containing sequence names.\n";
-               helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy and alignreport.  You must provide one of these parameters.\n";
+               helpString += "The list.seqs command reads a fasta, name, group, count, list, taxonomy, fastq or alignreport file and outputs a .accnos file containing sequence names.\n";
+               helpString += "The list.seqs command parameters are fasta, name, group, count, list, taxonomy, fastq and alignreport.  You must provide one of these parameters.\n";
                helpString += "The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n";
                helpString += "Example list.seqs(fasta=amazon.fasta).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
@@ -169,6 +170,14 @@ ListSeqsCommand::ListSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["count"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("fastq");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
+                               }
                        }
 
                        //check for required parameters
@@ -205,8 +214,12 @@ ListSeqsCommand::ListSeqsCommand(string option)  {
                        if (countfile == "not open") { abort = true; }
                        else if (countfile == "not found") {  countfile = "";  }
                        else { m->setCountTableFile(countfile); }
+            
+            fastqfile = validParameter.validFile(parameters, "fastq", true);
+                       if (fastqfile == "not open") { abort = true; }
+                       else if (fastqfile == "not found") {  fastqfile = "";  }
                        
-                       if ((countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (listfile == "") && (groupfile == "") && (alignfile == "") && (taxfile == ""))  { m->mothurOut("You must provide a file."); m->mothurOutEndLine(); abort = true; }
                        
                        int okay = 1;
                        if (outputDir != "") { okay++; }
@@ -230,6 +243,7 @@ int ListSeqsCommand::execute(){
                
                //read functions fill names vector
                if (fastafile != "")            {       inputFileName = fastafile;      readFasta();    }
+        else if (fastqfile != "")      {       inputFileName = fastqfile;      readFastq();    }
                else if (namefile != "")        {       inputFileName = namefile;       readName();             }
                else if (groupfile != "")       {       inputFileName = groupfile;      readGroup();    }
                else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
@@ -285,6 +299,53 @@ int ListSeqsCommand::execute(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+int ListSeqsCommand::readFastq(){
+       try {
+               
+               ifstream in;
+               m->openInputFile(fastqfile, in);
+               string name;
+               
+               //ofstream out;
+               //string newFastaName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "numsAdded.fasta";
+               //m->openOutputFile(newFastaName, out);
+               int count = 1;
+               //string lastName = "";
+               
+               while(!in.eof()){
+                       
+                       if (m->control_pressed) { in.close(); return 0; }
+                       
+                       //read sequence name
+                       string name = m->getline(in); m->gobble(in);
+                       
+                       if (name[0] == '@') {
+                vector<string> splits = m->splitWhiteSpace(name);
+                name = splits[0];
+                name = name.substr(1);
+                m->checkName(name);
+                names.push_back(name);
+                //get rest of lines
+                name = m->getline(in); m->gobble(in);
+                name = m->getline(in); m->gobble(in);
+                name = m->getline(in); m->gobble(in);
+            }
+                       
+                       m->gobble(in);
+                       if (m->debug) { count++; cout << "[DEBUG]: count = " + toString(count) + ", name = " + name + "\n"; }
+               }
+               in.close();
+               //out.close();
+               
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListSeqsCommand", "readFastq");
+               exit(1);
+       }
+}
 
 //**********************************************************************************************************************
 int ListSeqsCommand::readFasta(){
index b8a79c06035825fd92050a1e0c610ac6b77bd122..9d320d9e9a94ad031826bc547d42a03b30e8ec30 100644 (file)
@@ -35,7 +35,7 @@ class ListSeqsCommand : public Command {
        
        private:
                vector<string> names, outputNames;
-               string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile;
+               string fastafile, namefile, groupfile, countfile, alignfile, inputFileName, outputDir, listfile, taxfile, fastqfile;
                bool abort;
                
                int readFasta();
@@ -45,7 +45,7 @@ class ListSeqsCommand : public Command {
                int readList();
                int readTax();
         int readCount();
-               
+        int readFastq();
 };
 
 #endif
index 2758c94264a1ba527e8d7c3970e0bac53254579b..90df802cfea64f2894733bcd377936b4476ac05b 100644 (file)
@@ -67,8 +67,59 @@ ListVector::ListVector(string id, vector<string> lv) : DataVector(id), data(lv){
 ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) {
        try {
                int hold;
-               f >> label >> hold;
+        
+        //are we at the beginning of the file??
+               if (m->saveNextLabel == "") {
+                       f >> label;
+            
+                       //is this a shared file that has headers
+                       if (label == "label") {
+                               
+                               //gets "numOtus"
+                               f >> label; m->gobble(f);
+                               
+                               //eat rest of line
+                               label = m->getline(f); m->gobble(f);
+                               
+                               //parse labels to save
+                               istringstream iStringStream(label);
+                               m->listBinLabelsInFile.clear();
+                               while(!iStringStream.eof()){
+                                       if (m->control_pressed) { break; }
+                                       string temp;
+                                       iStringStream >> temp;  m->gobble(iStringStream);
+                    
+                                       m->listBinLabelsInFile.push_back(temp);
+                               }
+                               
+                               f >> label >> hold;
+                       }else {
+                //read in first row
+                f >> hold;
+                
+                //make binlabels because we don't have any
+                string snumBins = toString(hold);
+                m->listBinLabelsInFile.clear();
+                for (int i = 0; i < hold; i++) {
+                    //if there is a bin label use it otherwise make one
+                    string binLabel = "Otu";
+                    string sbinNumber = toString(i+1);
+                    if (sbinNumber.length() < snumBins.length()) {
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    m->listBinLabelsInFile.push_back(binLabel);
+                }
+            }
+            m->saveNextLabel = label;
+               }else {
+            f >> label >> hold;
+            m->saveNextLabel = label;
+        }
        
+        binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
+               
                data.assign(hold, "");
                string inputData = "";
        
@@ -77,6 +128,8 @@ ListVector::ListVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numS
                        set(i, inputData);
                }
                m->gobble(f);
+        
+        if (f.eof()) { m->saveNextLabel = ""; }
        }
        catch(exception& e) {
                m->errorOut(e, "ListVector", "ListVector");
@@ -109,6 +162,60 @@ void ListVector::set(int binNumber, string seqNames){
 string ListVector::get(int index){
        return data[index];
 }
+/***********************************************************************/
+
+void ListVector::setLabels(vector<string> labels){
+       try {
+               binLabels = labels;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListVector", "setLabels");
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+//could potentially end up with duplicate binlabel names with code below.
+//we don't currently use them in a way that would do that.
+//if you had a listfile that had been subsampled and then added to it, dup names would be possible.
+vector<string> ListVector::getLabels(){
+    try {
+        
+        string tagHeader = "Otu";
+        if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
+        
+        if (binLabels.size() < data.size()) {
+            string snumBins = toString(numBins);
+            
+            for (int i = 0; i < numBins; i++) {
+                string binLabel = tagHeader;
+                
+                if (i < binLabels.size()) { //label exists, check leading zeros length
+                    string sbinNumber = m->getSimpleLabel(binLabels[i]);
+                    if (sbinNumber.length() < snumBins.length()) {
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    binLabels[i] = binLabel;
+                }else{
+                    string sbinNumber = toString(i+1);
+                    if (sbinNumber.length() < snumBins.length()) {
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    binLabels.push_back(binLabel);
+                }
+            }
+        }
+        return binLabels;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ListVector", "getLabels");
+               exit(1);
+       }
+}
 
 /***********************************************************************/
 
@@ -150,6 +257,52 @@ void ListVector::clear(){
        
 }
 
+/***********************************************************************/
+void ListVector::printHeaders(ostream& output){
+       try {
+               string snumBins = toString(numBins);
+               output << "label\tnumOtus\t";
+               if (m->sharedHeaderMode == "tax") {
+                       for (int i = 0; i < numBins; i++) {
+                               
+                               //if there is a bin label use it otherwise make one
+                               string binLabel = "PhyloType";
+                               string sbinNumber = toString(i+1);
+                               if (sbinNumber.length() < snumBins.length()) {
+                                       int diff = snumBins.length() - sbinNumber.length();
+                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                               }
+                               binLabel += sbinNumber;
+                               if (i < binLabels.size()) {  binLabel = binLabels[i]; }
+                               
+                               output << binLabel << '\t';
+                       }
+                       output << endl;
+               }else {
+                       for (int i = 0; i < numBins; i++) {
+                               //if there is a bin label use it otherwise make one
+                               string binLabel = "Otu";
+                               string sbinNumber = toString(i+1);
+                               if (sbinNumber.length() < snumBins.length()) {
+                                       int diff = snumBins.length() - sbinNumber.length();
+                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                               }
+                               binLabel += sbinNumber;
+                               if (i < binLabels.size()) {  binLabel = binLabels[i]; }
+                               
+                               output << binLabel << '\t';
+                       }
+                       
+                       output << endl;
+               }
+               m->printedListHeaders = true;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ListVector", "printHeaders");
+               exit(1);
+       }
+}
+
 /***********************************************************************/
 
 void ListVector::print(ostream& output){
index dcf01a0ad4b356f152a3cf8ca168a6ba528b63bb..b2bc511cfd258da992f9f1b8a44971f4dd29e3e2 100644 (file)
@@ -20,7 +20,7 @@ public:
        ListVector(int);
 //     ListVector(const ListVector&);
        ListVector(string, vector<string>);
-       ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){};
+       ListVector(const ListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels) {};
        ListVector(ifstream&);
        ~ListVector(){};
        
@@ -30,11 +30,14 @@ public:
 
        void set(int, string);  
        string get(int);
+    vector<string> getLabels();
+    void setLabels(vector<string>);
        void push_back(string);
        void resize(int);
        void clear();
        int size();
        void print(ostream&);
+    void printHeaders(ostream&);
        
        RAbundVector getRAbundVector();
        SAbundVector getSAbundVector();
@@ -45,6 +48,7 @@ private:
        int maxRank;
        int numBins;
        int numSeqs;
+    vector<string> binLabels;
 
 };
 
index 248b3c8194d27e05e9e8b15359cf6de5b9c2395a..4c350922608489b89156c01244c170d8688d930d 100644 (file)
@@ -10,6 +10,7 @@
 #include "sharedrabundvector.h"
 #include "inputdata.h"
 #include "sharedutilities.h"
+#include "phylotree.h"
 
 //taken from http://biom-format.org/documentation/biom_format.html
 /* Minimal Sparse 
@@ -94,11 +95,11 @@ vector<string> MakeBiomCommand::setParameters(){
        try {
                CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared);
         CommandParameter pcontaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
-        //CommandParameter preference("referencetax", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preference);
+        CommandParameter preference("reftaxonomy", "InputTypes", "", "", "none", "none", "refPi","",false,false); parameters.push_back(preference);
         CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata);
                CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
-        //CommandParameter ppicrust("picrust", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppicrust);
+        CommandParameter ppicrust("picrust", "InputTypes", "", "", "none", "none", "refPi","shared",false,false); parameters.push_back(ppicrust);
         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype);
@@ -116,14 +117,14 @@ vector<string> MakeBiomCommand::setParameters(){
 string MakeBiomCommand::getHelpString(){       
        try {
                string helpString = "";
-               helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n"; //, picrust and referencetax
+               helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype, picrust, reftaxonomy and label.  shared is required, unless you have a valid current file.\n"; //
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
                helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
                helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
         helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
-        //helpString += "The picrust parameter is used to indicate the biom file is for input to picrust.  NOTE: Picrust requires a greengenes taxonomy. \n";
-        //helpString += "The referencetax parameter is used with the picrust parameter.  Picrust requires the name of the reference taxonomy sequence to be in the biom file. \n";
+        helpString += "The picrust parameter is used to provide the greengenes OTU IDs map table.  NOTE: Picrust requires a greengenes taxonomy. \n";
+        helpString += "The referencetax parameter is used with the picrust parameter.  Picrust requires the greengenes OTU IDs to be in the biom file. \n";
                helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
                helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
                helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
@@ -141,7 +142,8 @@ string MakeBiomCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "biom") {  pattern = "[filename],[distance],biom"; } 
+        if (type == "biom") {  pattern = "[filename],[distance],biom"; }
+        else if (type == "shared") {  pattern = "[filename],[distance],biom_shared"; }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -159,6 +161,7 @@ MakeBiomCommand::MakeBiomCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["biom"] = tempOutNames;
+        outputTypes["shared"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
@@ -192,6 +195,7 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["biom"] = tempOutNames;
+            outputTypes["shared"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -214,12 +218,20 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                                        if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
                                }
                 
-                it = parameters.find("referencetax");
+                it = parameters.find("reftaxonomy");
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["referencetax"] = inputDir + it->second;             }
+                                       if (path == "") {       parameters["reftaxonomy"] = inputDir + it->second;              }
+                               }
+                
+                it = parameters.find("picrust");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["picrust"] = inputDir + it->second;          }
                                }
                 
                 it = parameters.find("metadata");
@@ -249,9 +261,13 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                        if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
                        else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
             
-            //referenceTax = validParameter.validFile(parameters, "referencetax", true);
-                       //if (referenceTax == "not found") {  referenceTax = "";  }
-                       //else if (referenceTax == "not open") { referenceTax = ""; abort = true; }
+            referenceTax = validParameter.validFile(parameters, "reftaxonomy", true);
+                       if (referenceTax == "not found") {  referenceTax = "";  }
+                       else if (referenceTax == "not open") { referenceTax = ""; abort = true; }
+            
+            picrustOtuFile = validParameter.validFile(parameters, "picrust", true);
+                       if (picrustOtuFile == "not found") {  picrustOtuFile = "";  }
+                       else if (picrustOtuFile == "not open") { picrustOtuFile = ""; abort = true; }
 
             metadatafile = validParameter.validFile(parameters, "metadata", true);
                        if (metadatafile == "not found") {  metadatafile = "";  }
@@ -265,13 +281,6 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                                if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
-            
-            //string temp = validParameter.validFile(parameters, "picrust", false);                    if (temp == "not found"){       temp = "f";                             }
-                       //picrust = m->isTrue(temp);
-            //if (picrust && ((contaxonomyfile == "") || (referenceTax == ""))) {
-                //m->mothurOut("[ERROR]: the picrust parameter requires a consensus taxonomy with greengenes taxonomy the reference."); m->mothurOutEndLine(); abort = true;
-           //}
-            picrust=false;
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = ""; }
@@ -280,6 +289,12 @@ MakeBiomCommand::MakeBiomCommand(string option) {
                                m->setGroups(Groups);
                        }
                        
+            if (picrustOtuFile != "") {
+                picrust=true;
+                if (contaxonomyfile == "") {  m->mothurOut("[ERROR]: the constaxonomy parameter is required with the picrust parameter, aborting."); m->mothurOutEndLine(); abort = true;  }
+                if (referenceTax == "") {  m->mothurOut("[ERROR]: the reftaxonomy parameter is required with the picrust parameter, aborting."); m->mothurOutEndLine(); abort = true;  }
+            }else { picrust=false; }
+            
             if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
             
                        format = validParameter.validFile(parameters, "matrixtype", false);                             if (format == "not found") { format = "sparse"; }
@@ -428,9 +443,9 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
         out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
         
+        
+        vector<string> metadata = getMetaData(lookup);
         int numBins = lookup[0]->getNumBins();
-        vector<string> picrustLabels;
-        vector<string> metadata = getMetaData(lookup, picrustLabels);
         
         if (m->control_pressed) {  out.close(); return 0; }
         
@@ -447,11 +462,10 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
         string rowBack = "\", \"metadata\":";
         for (int i = 0; i < numBins-1; i++) {
             if (m->control_pressed) {  out.close(); return 0; }
-            if (!picrust) { out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n"; }
-            else {  out << rowFront << picrustLabels[i] << rowBack << metadata[i] << "},\n"; }
+            out << rowFront << m->currentSharedBinLabels[i] << rowBack << metadata[i] << "},\n"; 
         }
-        if (!picrust) {  out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
-        else {  out << rowFront << picrustLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
+        out << rowFront << m->currentSharedBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; 
+       
         //get column info
         /*"columns": [
                     {"id":"Sample1", "metadata":null},
@@ -543,7 +557,7 @@ int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
        }
 }
 //**********************************************************************************************************************
-vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup, vector<string>& picrustLabels){
+vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
        try {
         vector<string> metadata;
         
@@ -574,7 +588,7 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup,
             in.close();
             
             //should the labels be Otu001 or PhyloType001
-            string firstBin = m->currentBinLabels[0];
+            string firstBin = m->currentSharedBinLabels[0];
             string binTag = "Otu";
             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
             
@@ -601,6 +615,8 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup,
                 }else {  labelTaxMap[m->getSimpleLabel(otuLabels[i])] = taxs[i]; }
             }
             
+            //merges OTUs classified to same gg otuid, sets otulabels to gg otuids, averages confidence scores of merged otus.  overwritting of otulabels is fine because constaxonomy only allows for one label to be processed.  If this assumption changes, could cause bug.
+            if (picrust) {  getGreenGenesOTUIDs(lookup, labelTaxMap);  }
             
             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
             
@@ -611,14 +627,10 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup,
                 
                 if (m->control_pressed) { return metadata; }
                 
-                it = labelTaxMap.find(m->getSimpleLabel(m->currentBinLabels[i]));
+                it = labelTaxMap.find(m->getSimpleLabel(m->currentSharedBinLabels[i]));
                 
-                if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
+                if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentSharedBinLabels[i] + ".\n"); m->control_pressed = true; }
                 else {
-                    if (picrust) {
-                        string temp = it->second; m->removeConfidences(temp);
-                        picrustLabels.push_back(temp);
-                    }
                     vector<string> bootstrapValues;
                     string data = "{\"taxonomy\":[";
             
@@ -651,6 +663,180 @@ vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup,
                exit(1);
        }
 
+}
+//**********************************************************************************************************************
+int MakeBiomCommand::getGreenGenesOTUIDs(vector<SharedRAbundVector*>& lookup, map<string, string>& labelTaxMap){
+       try {
+        //read reftaxonomy
+        PhyloTree phylo(referenceTax);
+        
+        //read otu map file
+        map<string, string> otuMap = readGGOtuMap(); //maps reference ID -> OTU ID
+        
+        if (m->control_pressed) { return 0; }
+        
+        map<string, vector<string> > ggOTUIDs;
+        //loop through otu taxonomies
+        for (map<string, string>::iterator it = labelTaxMap.begin(); it != labelTaxMap.end(); it++) { //maps label -> consensus taxonomy
+            if (m->control_pressed) { break; }
+            
+            //get list of reference ids that map to this taxonomy
+            vector<string> referenceIds = phylo.getSeqs(it->second);
+            
+            if (m->control_pressed) { break; }
+            
+            //look for each one in otu map to find match
+            string otuID = "not found";
+            string referenceString = "";
+            for (int i = 0; i < referenceIds.size(); i++) {
+                referenceString += referenceIds[i] + " ";
+                map<string, string>::iterator itMap = otuMap.find(referenceIds[i]);
+                if (itMap != otuMap.end()) { //found it
+                    otuID = itMap->second;
+                    i += referenceIds.size(); //stop looking
+                }
+            }
+            
+            //if found, add otu to ggOTUID list
+            if (otuID != "not found") {
+                map<string, vector<string> >::iterator itGG = ggOTUIDs.find(otuID);
+                if (itGG == ggOTUIDs.end()) {
+                    vector<string> temp; temp.push_back(it->first); //save mothur OTU label
+                    ggOTUIDs[otuID] = temp;
+                }else { ggOTUIDs[otuID].push_back(it->first); } //add mothur OTU label to list
+            }else {  m->mothurOut("[ERROR]: could not find OTUId for " + it->second + ". Its reference sequences are " + referenceString + ".\n"); m->control_pressed = true; }
+            
+        }
+        
+       
+        vector<SharedRAbundVector*> newLookup;
+               for (int i = 0; i < lookup.size(); i++) {
+                       SharedRAbundVector* temp = new SharedRAbundVector();
+                       temp->setLabel(lookup[i]->getLabel());
+                       temp->setGroup(lookup[i]->getGroup());
+                       newLookup.push_back(temp);
+               }
+               
+        map<string, int> labelIndex;
+               for (int i = 0; i < m->currentSharedBinLabels.size(); i++) {  labelIndex[m->getSimpleLabel(m->currentSharedBinLabels[i])] = i; }
+        
+        vector<string> newBinLabels;
+        map<string, string> newLabelTaxMap;
+        //loop through ggOTUID list combining mothur otus and adjusting labels
+        //ggOTUIDs = 16097 -> <OTU01, OTU10, OTU22>
+        for (map<string, vector<string> >::iterator itMap = ggOTUIDs.begin(); itMap != ggOTUIDs.end(); itMap++) {
+                       if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+            
+            //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria
+            //find taxonomy of this otu
+            map<string, string>::iterator it = labelTaxMap.find(m->getSimpleLabel(itMap->second[0]));
+            vector<string> scores;
+            vector<string> taxonomies = parseTax(it->second, scores);
+            
+            //merge/set OTU abundances
+            vector<int> abunds; abunds.resize(lookup.size(), 0);
+            string mergeString = "";
+            vector<float> boots; boots.resize(scores.size(), 0);
+            for (int j = 0; j < itMap->second.size(); j++) { //<OTU01, OTU10, OTU22>
+                //merge bootstrap scores
+                vector<string> scores;
+                vector<string> taxonomies = parseTax(it->second, scores);
+                for (int i = 0; i < boots.size(); i++) {
+                    float tempScore; m->mothurConvert(scores[i], tempScore);
+                    boots[i] += tempScore;
+                }
+                
+                //merge abunds
+                mergeString += (itMap->second)[j] + " ";
+                for (int i = 0; i < lookup.size(); i++) {
+                    abunds[i] += lookup[i]->getAbundance(labelIndex[m->getSimpleLabel((itMap->second)[j])]);
+                }
+            }
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: merging " + mergeString + " for ggOTUid = " + itMap->first + ".\n");  }
+            
+            //average scores
+            //add merged otu to new lookup
+            for (int j = 0; j < boots.size(); j++) { boots[j] /= (float) itMap->second.size(); }
+            
+            //assemble new taxomoy
+            string newTaxString = "";
+            for (int j = 0; j < boots.size(); j++) {
+                newTaxString += taxonomies[j] + "(" + toString(boots[j]) + ");";
+            }
+
+            //set new gg otu id to taxonomy. OTU01 -> k__Bacteria becomes 16097 -> k__Bacteria
+            //find taxonomy of this otu
+            newLabelTaxMap[itMap->first] = newTaxString;
+            
+            //add merged otu to new lookup
+            for (int j = 0; j < abunds.size(); j++) { newLookup[j]->push_back(abunds[j], newLookup[j]->getGroup()); }
+            
+            //saved otu label
+            newBinLabels.push_back(itMap->first);
+        }
+               
+               for (int j = 0; j < lookup.size(); j++) {  delete lookup[j];  }
+               
+               lookup = newLookup;
+               m->currentSharedBinLabels = newBinLabels;
+        labelTaxMap = newLabelTaxMap;
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+        variables["[distance]"] = lookup[0]->getLabel();
+        string outputFileName = getOutputFileName("shared",variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+        
+        lookup[0]->printHeaders(out);
+               
+               for (int i = 0; i < lookup.size(); i++) {
+                       out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
+                       lookup[i]->print(out);
+               }
+               out.close();
+
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeBiomCommand", "getGreenGenesOTUIDs");
+               exit(1);
+       }
+    
+}
+//**********************************************************************************************************************
+map<string, string> MakeBiomCommand::readGGOtuMap(){
+       try {
+        map<string, string> otuMap;
+        
+        ifstream in;
+        m->openInputFile(picrustOtuFile, in);
+        
+        //map referenceIDs -> otuIDs
+        //lines look like:
+        //16097        671376  616121  533566  683683  4332909 4434717 772666  611808  695209
+        while(!in.eof()) {
+            if (m->control_pressed) { break; }
+            
+            string line = m->getline(in); m->gobble(in);
+            vector<string> pieces = m->splitWhiteSpace(line);
+            
+            if (pieces.size() != 0) {
+                string otuID = pieces[1];
+                for (int i = 1; i < pieces.size(); i++) {  otuMap[pieces[i]] = otuID; }
+            }
+        }
+        in.close();
+        
+        return otuMap;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeBiomCommand", "readGGOtuMap");
+               exit(1);
+       }
+    
 }
 //**********************************************************************************************************************
 int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
index 1ff398518c850c0447ff58c904b1ceddf9b6c3d3..8f07c40d439529355af3fb3ee78e11b6b23aab7a 100644 (file)
@@ -36,16 +36,19 @@ public:
        
 private:
     
-       string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label, referenceTax;
+       string sharedfile, contaxonomyfile, metadatafile, groups, outputDir, format, label, referenceTax, picrustOtuFile;
        vector<string> outputNames, Groups, sampleMetadata;
        set<string> labels;
     
        bool abort, allLines, picrust;
     
     int getBiom(vector<SharedRAbundVector*>&);
-    vector<string> getMetaData(vector<SharedRAbundVector*>&, vector<string>&);
+    vector<string> getMetaData(vector<SharedRAbundVector*>&);
     vector<string> parseTax(string tax, vector<string>& scores);
     int getSampleMetaData(vector<SharedRAbundVector*>&);
+    //for picrust
+    int getGreenGenesOTUIDs(vector<SharedRAbundVector*>&, map<string, string>&);
+    map<string, string> readGGOtuMap();
 };
 
 
index 87027f4df5e0166f25852fec44cd6cfa120c90a7..8796ab2113f455b12c8eace90df64d4bbe2c318e 100644 (file)
@@ -1298,9 +1298,11 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(unsigned long int& c
         
         if (uniques.size() != 0) {
             for (itUniques = uniques.begin(); itUniques != uniques.end(); itUniques++) {
+                if (m->control_pressed) { break; }
                 m->mothurOut("[WARNING]: did not find paired read for " + itUniques->first + ", ignoring.\n");
             }
             for (map<string, pairFastqRead>:: iterator it = pairUniques.begin(); it != pairUniques.end(); it++) {
+                if (m->control_pressed) { break; }
                 m->mothurOut("[WARNING]: did not find paired read for " + (it->first).substr(1) + ", ignoring.\n");
             }
             m->mothurOutEndLine();
@@ -1660,6 +1662,7 @@ fastqRead MakeContigsCommand::readFastq(ifstream& in, bool& ignore){
         
         vector<int> qualScores = convertQual(quality);
         
+        m->checkName(name);
         read.name = name;
         read.sequence = sequence;
         read.scores = qualScores;
@@ -1974,7 +1977,6 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, stri
                     else { uniquePrimers.insert(tempPair); }
                                        
                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
-                    
                                        primers[indexPrimer]=newPrimer; indexPrimer++;          
                                        primerNameVector.push_back(group);
                                }else if(type == "BARCODE"){
index 7fc8608be4176f7e8360ea58ee36412e074bdae5..42e71a0e42bb0341c3cff63a8c616d6572537f4b 100644 (file)
--- a/makefile
+++ b/makefile
@@ -15,8 +15,8 @@ USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
 MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"10/01/2013\""
-VERSION = "\"1.32.0\""
+RELEASE_DATE = "\"10/16/2013\""
+VERSION = "\"1.32.1\""
 FORTAN_COMPILER = gfortran
 FORTRAN_FLAGS = 
 
index 73f9db202a0171bd49623ab685f88f9b38593dc5..c859bfbc230201e4be1ef73ab19ed56f28b3412d 100644 (file)
@@ -289,7 +289,7 @@ int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedR
         
         for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
             if (m->control_pressed) { break; }
-            string nameOfOtu = m->currentBinLabels[i];
+            string nameOfOtu = m->currentSharedBinLabels[i];
             if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
                 map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
                 if (it != consTax.end()) {
@@ -299,7 +299,7 @@ int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedR
                     //remove confidences and change ; to |
                     m->removeConfidences(nameOfOtu);
                     for (int j = 0; j < nameOfOtu.length()-1; j++) {
-                        if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
+                        if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentSharedBinLabels[i] + '|'; }
                         else { fixedName += nameOfOtu[j]; }
                     }
                     nameOfOtu = fixedName;
index bfd1b8484eee963c897000a20024aae831c531f5..0ddc3d477336d34340497b82121cb0c032c9067b 100644 (file)
@@ -327,7 +327,7 @@ int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
                                
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                                
-                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                               if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
                                process(lookup, out);
                                
                                processedLabels.insert(lookup[0]->getLabel());
@@ -341,7 +341,7 @@ int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
                                lookup = input.getSharedRAbundVectors(lastLabel);
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                                
-                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                               if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
                                process(lookup, out);
                                
                                processedLabels.insert(lookup[0]->getLabel());
@@ -383,7 +383,7 @@ int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
                        
                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                        
-                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
+                       if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
                        process(lookup, out);
                        
                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
index 204d83e10df302ba17d3303c0bdb0a81f54536be..33b559fc2dc358017c90fd44fe77ec0be61acb65 100644 (file)
@@ -646,7 +646,7 @@ int MetaStatsCommand::convertToInput(vector<SharedRAbundVector*>& subset, string
         out << subset[subset.size()-1]->getGroup() << endl;
         
         for (int i = 0; i < subset[0]->getNumBins(); i++) {
-            out << m->currentBinLabels[i] << '\t';
+            out << m->currentSharedBinLabels[i] << '\t';
             for (int j = 0; j < subset.size()-1; j++) {
                 out << subset[j]->getAbundance(i) << '\t';
             }
index 85db5e0e8ffc384960f7954d6db1266bac9ea3c1..a2079251c4af101750b317e3926eba75d5302c9c 100644 (file)
@@ -295,6 +295,7 @@ int MGClusterCommand::execute(){
             m->openOutputFile(rabundFileName,  rabundFile);
         }
                m->openOutputFile(listFileName, listFile);
+        list->printHeaders(listFile);
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
index 4715de9b7ac6d8c9f14e4fd4575906c433ae2808..f6a7800c3697833eeb9160330671804f13633bfb 100644 (file)
@@ -229,7 +229,7 @@ int MothurMetastats::runMetastats(string outputFileName, vector< vector<double>
                        if (m->control_pressed) { out.close(); return 0; }
                        
             //if there are binlabels use them otherwise count.
-                       if (m->binLabelsInFile.size() == row) { out << m->binLabelsInFile[i] << '\t'; }
+                       if (i < m->currentSharedBinLabels.size()) { out << m->currentSharedBinLabels[i] << '\t'; }
             else { out << (i+1) << '\t'; }
             
             out << C1[i][0] << '\t' << C1[i][1] << '\t' << C1[i][2] << '\t' << C2[i][0] << '\t' << C2[i][1] << '\t' << C2[i][2] << '\t' << pvalues[i] << '\t' << qvalues[i] << endl;
index 3d4504daf761d625ffedd1e11749aa567ca9327b..0f24cb2dc362c679dd16ed0eb7c50444f07e7a6e 100644 (file)
@@ -779,6 +779,8 @@ string MothurOut::getPathName(string longName){
 bool MothurOut::dirCheck(string& dirName){
        try {
         
+        if (dirName == "") { return false; }
+        
         string tag = "";
         #ifdef USE_MPI
             int pid; 
@@ -797,7 +799,7 @@ bool MothurOut::dirCheck(string& dirName){
 
         //test to make sure directory exists
         dirName = getFullPathName(dirName);
-        string outTemp = dirName + tag + "temp";
+        string outTemp = dirName + tag + "temp"+ toString(time(NULL));
         ofstream out;
         out.open(outTemp.c_str(), ios::trunc);
         if(!out) {
@@ -1140,6 +1142,105 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){
                exit(1);
        }       
 }
+/***********************************************************************/
+int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle){
+       try {
+        
+               //get full path name
+               string completeFileName = getFullPathName(fileName);
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#ifdef USE_COMPRESSION
+        // check for gzipped or bzipped file
+        if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+            string tempName = string(tmpnam(0));
+            mkfifo(tempName.c_str(), 0666);
+            int fork_result = fork();
+            if (fork_result < 0) {
+                cerr << "Error forking.\n";
+                exit(1);
+            } else if (fork_result == 0) {
+                string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
+                cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+                system(command.c_str());
+                cerr << "Done decompressing " << completeFileName << "\n";
+                mothurRemove(tempName);
+                exit(EXIT_SUCCESS);
+            } else {
+                cerr << "waiting on child process " << fork_result << "\n";
+                completeFileName = tempName;
+            }
+        }
+#endif
+#endif
+        
+               fileHandle.open(completeFileName.c_str(), ios::binary);
+               if(!fileHandle) {
+                       mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
+                       return 1;
+               }
+               else {
+                       //check for blank file
+                       gobble(fileHandle);
+                       if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine();  }
+                       
+                       return 0;
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "openInputFileBinary");
+               exit(1);
+       }       
+}
+/***********************************************************************/
+int MothurOut::openInputFileBinary(string fileName, ifstream& fileHandle, string noerror){
+       try {
+        
+               //get full path name
+               string completeFileName = getFullPathName(fileName);
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#ifdef USE_COMPRESSION
+        // check for gzipped or bzipped file
+        if (endsWith(completeFileName, ".gz") || endsWith(completeFileName, ".bz2")) {
+            string tempName = string(tmpnam(0));
+            mkfifo(tempName.c_str(), 0666);
+            int fork_result = fork();
+            if (fork_result < 0) {
+                cerr << "Error forking.\n";
+                exit(1);
+            } else if (fork_result == 0) {
+                string command = (endsWith(completeFileName, ".gz") ? "zcat " : "bzcat ") + completeFileName + string(" > ") + tempName;
+                cerr << "Decompressing " << completeFileName << " via temporary named pipe " << tempName << "\n";
+                system(command.c_str());
+                cerr << "Done decompressing " << completeFileName << "\n";
+                mothurRemove(tempName);
+                exit(EXIT_SUCCESS);
+            } else {
+                cerr << "waiting on child process " << fork_result << "\n";
+                completeFileName = tempName;
+            }
+        }
+#endif
+#endif
+        
+               fileHandle.open(completeFileName.c_str(), ios::binary);
+               if(!fileHandle) {
+                       //mothurOut("[ERROR]: Could not open " + completeFileName); mothurOutEndLine();
+                       return 1;
+               }
+               else {
+                       //check for blank file
+                       gobble(fileHandle);
+                       //if (fileHandle.eof()) { mothurOut("[ERROR]: " + completeFileName + " is blank. Please correct."); mothurOutEndLine();  }
+                       
+                       return 0;
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "openInputFileBinary - no error");
+               exit(1);
+       }
+}
+
 /***********************************************************************/
 
 int MothurOut::renameFile(string oldName, string newName){
@@ -1289,6 +1390,36 @@ int MothurOut::appendFiles(string temp, string filename) {
                exit(1);
        }       
 }
+/**************************************************************************************************/
+int MothurOut::appendBinaryFiles(string temp, string filename) {
+       try{
+               ofstream output;
+               ifstream input;
+        
+               //open output file in append mode
+               openOutputFileBinaryAppend(filename, output);
+               int ableToOpen = openInputFileBinary(temp, input, "no error");
+               
+               if (ableToOpen == 0) { //you opened it
+            
+            char buffer[4096];
+            while (!input.eof()) {
+                input.read(buffer, 4096);
+                output.write(buffer, input.gcount());
+            }
+                       input.close();
+               }
+               
+               output.close();
+               
+               return ableToOpen;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "appendBinaryFiles");
+               exit(1);
+       }       
+}
+
 /**************************************************************************************************/
 int MothurOut::appendFilesWithoutHeaders(string temp, string filename) {
        try{
@@ -3013,6 +3144,64 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
        }       
 }
 /***********************************************************************/
+bool MothurOut::checkLocations(string& filename, string inputDir){
+       try {
+               filename = getFullPathName(filename);
+        
+        int ableToOpen;
+        ifstream in;
+        ableToOpen = openInputFile(filename, in, "noerror");
+        in.close();
+        
+        //if you can't open it, try input location
+        if (ableToOpen == 1) {
+            if (inputDir != "") { //default path is set
+                string tryPath = inputDir + getSimpleName(filename);
+                mothurOut("Unable to open " + filename + ". Trying input directory " + tryPath); mothurOutEndLine();
+                ifstream in2;
+                ableToOpen = openInputFile(tryPath, in2, "noerror");
+                in2.close();
+                filename = tryPath;
+            }
+        }
+        
+        //if you can't open it, try default location
+        if (ableToOpen == 1) {
+            if (getDefaultPath() != "") { //default path is set
+                string tryPath = getDefaultPath() + getSimpleName(filename);
+                mothurOut("Unable to open " + filename + ". Trying default " + tryPath); mothurOutEndLine();
+                ifstream in2;
+                ableToOpen = openInputFile(tryPath, in2, "noerror");
+                in2.close();
+                filename = tryPath;
+            }
+        }
+        
+        //if you can't open it its not in current working directory or inputDir, try mothur excutable location
+        if (ableToOpen == 1) {
+            string exepath = argv;
+            string tempPath = exepath;
+            for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
+            exepath = exepath.substr(0, (tempPath.find_last_of('m')));
+            
+            string tryPath = getFullPathName(exepath) + getSimpleName(filename);
+            mothurOut("Unable to open " + filename + ". Trying mothur's executable location " + tryPath); mothurOutEndLine();
+            ifstream in2;
+            ableToOpen = openInputFile(tryPath, in2, "noerror");
+            in2.close();
+            filename = tryPath;
+        }
+        
+        if (ableToOpen == 1) { mothurOut("Unable to open " + filename + "."); mothurOutEndLine(); return false;  }
+        
+        return true;
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "checkLocations");
+               exit(1);
+       }
+}
+/***********************************************************************/
 
 //This function parses the estimator options and puts them in a vector
 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
index f58f4e874a7f72961489d7e2651bda651e3d2bb4..1747b1480c732ae42f361940ff57dcbe151bbc7b 100644 (file)
@@ -66,11 +66,11 @@ class MothurOut {
        
                vector<string> getAllGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; }
                vector<string> Treenames;
-               //map<string, string> names;
-               vector<string> binLabelsInFile;
-               vector<string> currentBinLabels;
+               vector<string> sharedBinLabelsInFile;
+               vector<string> currentSharedBinLabels;
+        vector<string> listBinLabelsInFile;
                string saveNextLabel, argv, sharedHeaderMode, groupMode;
-               bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
+               bool printedSharedHeaders, printedListHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
                
                //functions from mothur.h
                //file operations
@@ -82,6 +82,7 @@ class MothurOut {
                vector<unsigned long long> setFilePosFasta(string, int&);
                string sortFile(string, string);
                int appendFiles(string, string);
+        int appendBinaryFiles(string, string);
         int appendFilesWithoutHeaders(string, string);
                int renameFile(string, string); //oldname, newname
                string getFullPathName(string);
@@ -97,7 +98,11 @@ class MothurOut {
                int openOutputFileAppend(string, ofstream&);
         int openOutputFileBinaryAppend(string, ofstream&);
                int openInputFile(string, ifstream&);
-               int openInputFile(string, ifstream&, string); //no error given 
+        int openInputFileBinary(string, ifstream&);
+        int openInputFileBinary(string, ifstream&, string);
+               int openInputFile(string, ifstream&, string); //no error given
+    
+        bool checkLocations(string&, string);  //filename, inputDir. checks for file in ./, inputdir, default and mothur's exe location.  Returns false if cant be found. If found completes name with location
                string getline(ifstream&);
                string getline(istringstream&);
                void gobble(istream&);
@@ -270,7 +275,8 @@ class MothurOut {
             counttablefile = "";
             summaryfile = "";
                        gui = false;
-                       printedHeaders = false;
+                       printedSharedHeaders = false;
+            printedListHeaders = false;
                        commandInputsConvertError = false;
             mothurCalling = false;
             debug = false;
index a90ed2965888928f4a90049fe04fe888d773c450..82d2d7f2c2c4335f76b3b8909031359e014e8963 100644 (file)
@@ -288,7 +288,7 @@ int NMDSCommand::execute(){
                outBest.setf(ios::fixed, ios::floatfield);
                outBest.setf(ios::showpoint);
                
-               outBest << '\t';
+               outBest << "group" << '\t';
                for (int k = 0; k < bestConfig.size(); k++) { outBest << "axis" << (k+1) << '\t'; }
                outBest << endl;
                
index acd8208ea7ebb20a3d3810d254d0b24f5386d4d3..1000e4243e0c6fbf15f5e384871131f74d2a4ea5 100644 (file)
@@ -458,7 +458,7 @@ int NormalizeSharedCommand::execute(){
 int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
        try {
                //save mothurOut's binLabels to restore for next label
-               vector<string> saveBinLabels = m->currentBinLabels;
+               vector<string> saveBinLabels = m->currentSharedBinLabels;
                
                if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
                
@@ -540,7 +540,7 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
                
                out.close();
                
-               m->currentBinLabels = saveBinLabels;
+               m->currentSharedBinLabels = saveBinLabels;
                
                return 0;
        }
@@ -555,7 +555,7 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLook
        try {
                
                //save mothurOut's binLabels to restore for next label
-               vector<string> saveBinLabels = m->currentBinLabels;
+               vector<string> saveBinLabels = m->currentSharedBinLabels;
                
         map<string, string> variables; 
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
@@ -629,7 +629,7 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLook
                
                out.close();
                
-               m->currentBinLabels = saveBinLabels;
+               m->currentSharedBinLabels = saveBinLabels;
                
                return 0;
        }
@@ -675,7 +675,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thisl
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                newBinLabels.push_back(binLabel);
                        }
@@ -684,7 +684,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thisl
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
 
                thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
+               m->currentSharedBinLabels = newBinLabels;
                
                return 0;
  
@@ -731,7 +731,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                newBinLabels.push_back(binLabel);
                        }
@@ -740,7 +740,7 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
                
                thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
+               m->currentSharedBinLabels = newBinLabels;
                
                return 0;
                
index 9ccdeeaba28b54a899fd90dada402030e0d87c3f..705540c3a62564a14da7a7ed00262c4adb5f220b 100644 (file)
@@ -360,7 +360,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
+                    if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << m->currentSharedBinLabels[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
                }else { //compare otus to metadata
@@ -377,7 +377,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], metadata[k], sig);     }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
+                    if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
 
@@ -523,7 +523,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], xy[k], sig);   }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << m->binLabelsInFile[k] << '\t' << coef << '\t' << sig << endl; }
+                    if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << m->currentSharedBinLabels[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
                }else { //compare otus to metadata
@@ -540,7 +540,7 @@ int OTUAssociationCommand::process(vector<SharedRAbundFloatVector*>& lookup){
                     else if (method == "kendall")      {       coef = linear.calcKendall(xy[i], metadata[k], sig);     }                   
                     else { m->mothurOut("[ERROR]: invalid method, choices are spearman, pearson or kendall."); m->mothurOutEndLine(); m->control_pressed = true; }
                     
-                    if (sig < cutoff) { out << m->binLabelsInFile[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
+                    if (sig < cutoff) { out << m->currentSharedBinLabels[i] << '\t' << metadataLabels[k] << '\t' << coef << '\t' << sig << endl; }
                 }
             }
             
index a294a77c69a8cd5582a38cf7394c510a1e5ccccb..07e3c1d0146ef521563697fe07deb38d7837b67d 100644 (file)
@@ -175,11 +175,11 @@ int OtuHierarchyCommand::execute(){
                }
                
                //map sequences to bin number in the "little" otu
-               map<string, int> littleBins; 
+               map<string, int> littleBins;
+        vector<string> binLabels0 = lists[0].getLabels();
                for (int i = 0; i < lists[0].getNumBins(); i++) {
                
                        if (m->control_pressed) {  return 0; }
-                       
                        string bin = lists[0].get(i);
             vector<string> names; m->splitAtComma(bin, names);
                        for (int j = 0; j < names.size(); j++) { littleBins[names[j]] = i; }
@@ -195,17 +195,17 @@ int OtuHierarchyCommand::execute(){
                m->openOutputFile(outputFileName, out);
                
                //go through each bin in "big" otu and output the bins in "little" otu which created it
+        vector<string> binLabels1 = lists[1].getLabels();
                for (int i = 0; i < lists[1].getNumBins(); i++) {
                
                        if (m->control_pressed) { outputTypes.clear(); out.close(); m->mothurRemove(outputFileName); return 0; }
                        
                        string binnames = lists[1].get(i);
             vector<string> names; m->splitAtComma(binnames, names);
-            
                        
                        //output column 1
                        if (output == "name")   {   out << binnames << '\t';    }
-                       else                                    {       out << (i+1) << '\t';           }
+                       else                                    {       out << binLabels1[i] << '\t';           }
                        
                        map<int, int> bins; //bin numbers in little that are in this bin in big
                        map<int, int>::iterator it;
@@ -216,7 +216,7 @@ int OtuHierarchyCommand::execute(){
                        string col2 = "";
                        for (it = bins.begin(); it != bins.end(); it++) {
                                if (output == "name")   {   col2 += lists[0].get(it->first) + "\t";     }
-                               else                                    {       col2 += toString(it->first) + "\t";             }
+                               else                                    {       col2 += binLabels0[it->first] + "\t";           }
                        }
                        
                        //output column 2
index 051c1dfb057c0148c3f0053f94707b7e6d034dc0..c3bd3933836c3f67149a96b9dba1661978b31bc4 100644 (file)
 vector<string> ParseFastaQCommand::setParameters(){    
        try {
                CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
+        CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
                CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
                CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
         CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
@@ -35,8 +42,15 @@ string ParseFastaQCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
-               helpString += "The fastq.info command parameters are fastq, fasta, qfile and format; fastq is required.\n";
+               helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
         helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
+        helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
+        helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
@@ -56,7 +70,8 @@ string ParseFastaQCommand::getOutputPattern(string type) {
         string pattern = "";
         
         if (type == "fasta") {  pattern = "[filename],fasta"; } 
-        else if (type == "qfile") {  pattern = "[filename],qual"; } 
+        else if (type == "qfile") {  pattern = "[filename],qual"; }
+        else if (type == "fastq") {  pattern = "[filename],[group],fastq"; } 
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -74,6 +89,7 @@ ParseFastaQCommand::ParseFastaQCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["fastq"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
@@ -83,7 +99,8 @@ ParseFastaQCommand::ParseFastaQCommand(){
 //**********************************************************************************************************************
 ParseFastaQCommand::ParseFastaQCommand(string option){
        try {
-               abort = false; calledHelp = false;   
+               abort = false; calledHelp = false;
+        split = 1;
                
                if(option == "help") {  help(); abort = true; calledHelp = true; }
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
@@ -106,6 +123,7 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
+            outputTypes["fastq"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -119,12 +137,40 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //check for required parameters
                        fastaQFile = validParameter.validFile(parameters, "fastq", true);
                        if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
-                       else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }       
+                       else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }
+            
+            oligosfile = validParameter.validFile(parameters, "oligos", true);
+                       if (oligosfile == "not found") {        oligosfile = "";        }
+                       else if (oligosfile == "not open")      {       oligosfile = ""; abort = true;  }
+            else { m->setOligosFile(oligosfile); split = 2; }
+            
+            groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not found") { groupfile = ""; }
+                       else if (groupfile == "not open")       {       groupfile = ""; abort = true;   }
+            else { m->setGroupFile(groupfile); split = 2; }
+            
+            if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;  }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
@@ -139,6 +185,23 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
             temp = validParameter.validFile(parameters, "pacbio", false);      if(temp == "not found"){        temp = "F";     }
                        pacbio = m->isTrue(temp);
 
+            temp = validParameter.validFile(parameters, "bdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, bdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs);
+            
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
+                       
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+
                        
             format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
             
@@ -171,6 +234,18 @@ int ParseFastaQCommand::execute(){
                
                if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
                if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
+        
+        TrimOligos* trimOligos = NULL;
+        int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
+        if (oligosfile != "")       {
+            readOligos(oligosfile);
+            numPrimers = primers.size(); numBarcodes = barcodes.size();
+            //find group read belongs to
+            if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
+            else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
+
+        }
+        else if (groupfile != "")   { readGroup(groupfile);     }
                
                ifstream in;
                m->openInputFile(fastaQFile, in);
@@ -181,65 +256,100 @@ int ParseFastaQCommand::execute(){
             convertTable.push_back(temp);
         }
                
+        
+        int count = 0;
                while (!in.eof()) {
                        
                        if (m->control_pressed) { break; }
-               
-                       //read sequence name
-                       string name = m->getline(in); m->gobble(in);
-                       if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { 
-                name = name.substr(1); 
-                m->checkName(name);
-            }
-                       
-                       //read sequence
-                       string sequence = m->getline(in); m->gobble(in);
-                       if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-                       //read sequence name
-                       string name2 = m->getline(in); m->gobble(in);
-                       if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { 
-                name2 = name2.substr(1);  
-                m->checkName(name2);
-            }
-                       
-                       //read quality scores
-                       string quality = m->getline(in); m->gobble(in);
-                       if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-                       //sanity check sequence length and number of quality scores match
-                       if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
-                       if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-            vector<int> qualScores;
-            if (qual) {
-                               qualScores = convertQual(quality);
-                               outQual << ">" << name << endl;
-                               for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
-                               outQual << endl;
-                       }
             
-            if (m->control_pressed) { break; }
+            bool ignore;
+            fastqRead2 thisRead = readFastq(in, ignore);
             
-            if (pacbio) {
-                if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
-                for (int i = 0; i < qualScores.size(); i++) {
-                    if (qualScores[i] == 0){ sequence[i] = 'N'; }
+            if (!ignore) {
+                vector<int> qualScores;
+                if (qual) {
+                    qualScores = convertQual(thisRead.quality);
+                    outQual << ">" << thisRead.seq.getName() << endl;
+                    for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+                    outQual << endl;
                 }
-            }
-            
-                       //print sequence info to files
-                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-                       
+                
+                if (m->control_pressed) { break; }
+                
+                if (pacbio) {
+                    if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
+                    string sequence = thisRead.seq.getAligned();
+                    for (int i = 0; i < qualScores.size(); i++) {
+                        if (qualScores[i] == 0){ sequence[i] = 'N'; }
+                    }
+                    thisRead.seq.setAligned(sequence);
+                }
+                
+                //print sequence info to files
+                if (fasta) { thisRead.seq.printSequence(outFasta); }
+                
+                if (split > 1) {
+                    int barcodeIndex, primerIndex, trashCodeLength;
+                    if (oligosfile != "")      {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers);    }
+                    else if (groupfile != "")  {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode");   }
+                    else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
+                    
+                    if(trashCodeLength == 0){
+                        ofstream out;
+                        m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
+                        out << thisRead.wholeRead;
+                        out.close();
+                    }else{
+                        ofstream out;
+                        m->openOutputFileAppend(noMatchFile, out);
+                        out << thisRead.wholeRead;
+                        out.close();
+                    }
+                }
+                //report progress
+                if((count+1) % 10000 == 0){    m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
+                if(count > 100000){    break;  }
+                count++;
+                       }
                }
                
                in.close();
                if (fasta)      { outFasta.close();     }
                if (qual)       { outQual.close();      }
+        
+        //report progress
+               if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
+        
+        if (split > 1) {
+            
+            if (groupfile != "")        { delete groupMap;      }
+            else if (oligosfile != "")  { delete trimOligos;    }
+           
+                       map<string, string>::iterator it;
+                       set<string> namesToRemove;
+                       for(int i=0;i<fastqFileNames.size();i++){
+                               for(int j=0;j<fastqFileNames[0].size();j++){
+                                       if (fastqFileNames[i][j] != "") {
+                                               if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
+                                                       if(m->isBlank(fastqFileNames[i][j])){
+                                                               m->mothurRemove(fastqFileNames[i][j]);
+                                                               namesToRemove.insert(fastqFileNames[i][j]);
+                            }
+                                               }
+                                       }
+                               }
+                       }
+            
+                       //remove names for outputFileNames, just cleans up the output
+                       for(int i = 0; i < outputNames.size(); i++) {
+                if (namesToRemove.count(outputNames[i]) != 0) {
+                    outputNames.erase(outputNames.begin()+i);
+                    i--;
+                }
+            }
+            if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }
+            else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
+        }
                
                if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                
@@ -267,6 +377,55 @@ int ParseFastaQCommand::execute(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
+    try {
+        ignore = false;
+        string wholeRead = "";
+        
+        //read sequence name
+        string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+        vector<string> pieces = m->splitWhiteSpace(line);
+        string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
+        if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
+        else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
+        else { name = name.substr(1); }
+        
+        //read sequence
+        string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
+        if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
+        
+        //read sequence name
+        line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+        pieces = m->splitWhiteSpace(line);
+        string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
+        if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
+        else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
+        else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
+        
+                
+        //read quality scores
+        string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
+        if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
+        
+        //sanity check sequence length and number of quality scores match
+        if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
+        if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
+        
+        m->checkName(name);
+        Sequence seq(name, sequence);
+        fastqRead2 read(seq, quality, wholeRead);
+            
+        if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
+        
+        return read;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ParseFastaQCommand", "readFastq");
+        exit(1);
+    }
+}
+
 //**********************************************************************************************************************
 vector<int> ParseFastaQCommand::convertQual(string qual) {
        try {
@@ -302,6 +461,446 @@ vector<int> ParseFastaQCommand::convertQual(string qual) {
        }
 }
 //**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
+       try {
+        int success = 1;
+        string trashCode = "";
+        int currentSeqsDiffs = 0;
+        
+        Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
+        QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
+        
+        if(linker.size() != 0){
+            success = trimOligos->stripLinker(currSeq, currQual);
+            if(success > ldiffs)               {       trashCode += 'k';       }
+            else{ currentSeqsDiffs += success;  }
+            
+        }
+        
+        if(numBarcodes != 0){
+            success = trimOligos->stripBarcode(currSeq, currQual, barcode);
+            if(success > bdiffs)               {       trashCode += 'b';       }
+            else{ currentSeqsDiffs += success;  }
+        }
+        
+        if(spacer.size() != 0){
+            success = trimOligos->stripSpacer(currSeq, currQual);
+            if(success > sdiffs)               {       trashCode += 's';       }
+            else{ currentSeqsDiffs += success;  }
+            
+        }
+        
+        if(numPrimers != 0){
+            success = trimOligos->stripForward(currSeq, currQual, primer, true);
+            if(success > pdiffs)               {       trashCode += 'f';       }
+            else{ currentSeqsDiffs += success;  }
+        }
+        
+        if (currentSeqsDiffs > tdiffs) {       trashCode += 't';   }
+        
+        if(revPrimer.size() != 0){
+            success = trimOligos->stripReverse(currSeq, currQual);
+            if(!success)                               {       trashCode += 'r';       }
+        }
+        
+        
+        return trashCode.length();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "findGroup");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
+       try {
+        string trashCode = "";
+        primer = 0;
+        
+        string group = groupMap->getGroup(thisRead.seq.getName());
+        if (group == "not found") {     trashCode += "g";   } //scrap for group
+        else { //find file group
+            map<string, int>::iterator it = barcodes.find(group);
+            if (it != barcodes.end()) {
+                barcode = it->second;
+            }else { trashCode += "g"; }
+        }
+        
+        return trashCode.length();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "findGroup");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+bool ParseFastaQCommand::readOligos(string oligoFile){
+       try {
+               ifstream inOligos;
+               m->openInputFile(oligoFile, inOligos);
+               
+               string type, oligo, roligo, group;
+        bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
+        
+               int indexPrimer = 0;
+               int indexBarcode = 0;
+        int indexPairedPrimer = 0;
+               int indexPairedBarcode = 0;
+        set<string> uniquePrimers;
+        set<string> uniqueBarcodes;
+               
+               while(!inOligos.eof()){
+            
+                       inOligos >> type;
+            
+                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+            
+                       if(type[0] == '#'){
+                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               m->gobble(inOligos);
+                       }
+                       else{
+                               m->gobble(inOligos);
+                               //make type case insensitive
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
+                               
+                               inOligos >> oligo;
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+                               
+                               for(int i=0;i<oligo.length();i++){
+                                       oligo[i] = toupper(oligo[i]);
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
+                               }
+                               
+                               if(type == "FORWARD"){
+                                       group = "";
+                                       
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                                       
+                                       //check for repeat barcodes
+                                       map<string, int>::iterator itPrime = primers.find(oligo);
+                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
+                    
+                                       primers[oligo]=indexPrimer; indexPrimer++;
+                                       primerNameVector.push_back(group);
+                               }
+                else if (type == "PRIMER"){
+                    m->gobble(inOligos);
+                                       
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    roligo = reverseOligo(roligo);
+                    
+                    group = "";
+                    
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                    
+                    oligosPair newPrimer(oligo, roligo);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+                                       
+                                       //check for repeat barcodes
+                    string tempPair = oligo+roligo;
+                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
+                    else { uniquePrimers.insert(tempPair); }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
+                    
+                                       pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+                                       primerNameVector.push_back(group);
+                    hasPrimer = true;
+                }
+                               else if(type == "REVERSE"){
+                                       //Sequence oligoRC("reverse", oligo);
+                                       //oligoRC.reverseComplement();
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
+                               }
+                               else if(type == "BARCODE"){
+                                       inOligos >> group;
+                    
+                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
+                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
+                    
+                    string temp = "";
+                    while (!inOligos.eof())    {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  temp += c;  }
+                                       }
+                                       
+                    //then this is illumina data with 4 columns
+                    if (temp != "") {
+                        hasPairedBarcodes = true;
+                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+                        group = temp;
+                        
+                        for(int i=0;i<reverseBarcode.length();i++){
+                            reverseBarcode[i] = toupper(reverseBarcode[i]);
+                            if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
+                        }
+                        
+                        reverseBarcode = reverseOligo(reverseBarcode);
+                        oligosPair newPair(oligo, reverseBarcode);
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+                        //check for repeat barcodes
+                        string tempPair = oligo+reverseBarcode;
+                        if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
+                        else { uniqueBarcodes.insert(tempPair); }
+                        
+                        pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }else {
+                        //check for repeat barcodes
+                        map<string, int>::iterator itBar = barcodes.find(oligo);
+                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                        
+                        barcodes[oligo]=indexBarcode; indexBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
+                               }
+                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+                       }
+                       m->gobble(inOligos);
+               }
+               inOligos.close();
+               
+        if (hasPairedBarcodes || hasPrimer) {
+            pairedOligos = true;
+            if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
+        }
+        
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+                       barcodes[""] = 0;
+                       barcodeNameVector.push_back("");
+               }
+               
+               if(primerNameVector.size() == 0){
+                       primers[""] = 0;
+                       primerNameVector.push_back("");
+               }
+               
+               fastqFileNames.resize(barcodeNameVector.size());
+               for(int i=0;i<fastqFileNames.size();i++){
+                       fastqFileNames[i].assign(primerNameVector.size(), "");
+               }
+               
+               
+                       set<string> uniqueNames; //used to cleanup outputFileNames
+            if (pairedOligos) {
+                for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+                    for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+                        
+                        string primerName = primerNameVector[itPrimer->first];
+                        string barcodeName = barcodeNameVector[itBar->first];
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastqFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeNameVector[itBar->first];
+                            }
+                            else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerNameVector[itPrimer->first];
+                                }
+                                else{
+                                    comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                                }
+                            }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                            variables["[group]"] = comboGroupName;
+                            fastqFileName = getOutputFileName("fastq", variables);
+                            if (uniqueNames.count(fastqFileName) == 0) {
+                                outputNames.push_back(fastqFileName);
+                                outputTypes["fastq"].push_back(fastqFileName);
+                                uniqueNames.insert(fastqFileName);
+                            }
+                            
+                            fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
+                            m->openOutputFile(fastqFileName, temp);            temp.close();
+                            
+                        }
+                    }
+                }
+            }else {
+                for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                        
+                        string primerName = primerNameVector[itPrimer->second];
+                        string barcodeName = barcodeNameVector[itBar->second];
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastqFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeNameVector[itBar->second];
+                            }
+                            else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerNameVector[itPrimer->second];
+                                }
+                                else{
+                                    comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                }
+                            }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                            variables["[group]"] = comboGroupName;
+                            fastqFileName = getOutputFileName("fastq", variables);
+                            if (uniqueNames.count(fastqFileName) == 0) {
+                                outputNames.push_back(fastqFileName);
+                                outputTypes["fastq"].push_back(fastqFileName);
+                                uniqueNames.insert(fastqFileName);
+                            }
+                            
+                            fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
+                            m->openOutputFile(fastqFileName, temp);            temp.close();
+                            
+                        }
+                    }
+                }
+            }
+               
+        ofstream temp;
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+        variables["[group]"] = "scrap";
+        noMatchFile = getOutputFileName("fastq", variables);
+        m->openOutputFile(noMatchFile, temp);          temp.close();
+
+               return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "getOligos");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+bool ParseFastaQCommand::readGroup(string groupfile){
+       try {
+        fastqFileNames.clear();
+        
+        groupMap = new GroupMap();
+        groupMap->readMap(groupfile);
+        
+        //like barcodeNameVector - no primer names
+        vector<string> groups = groupMap->getNamesOfGroups();
+               
+               fastqFileNames.resize(groups.size());
+        for (int i = 0; i < fastqFileNames.size(); i++) {
+            for (int j = 0; j < 1; j++) {
+                
+                map<string, string> variables;
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                variables["[group]"] = groups[i];
+                string thisFilename = getOutputFileName("fastq",variables);
+                outputNames.push_back(thisFilename);
+                outputTypes["fastq"].push_back(thisFilename);
+                
+                ofstream temp;
+                m->openOutputFileBinary(thisFilename, temp); temp.close();
+                fastqFileNames[i].push_back(thisFilename);
+                barcodes[groups[i]] = i;
+            }
+        }
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+        variables["[group]"] = "scrap";
+               noMatchFile = getOutputFileName("fastq",variables);
+        m->mothurRemove(noMatchFile);
+               
+               return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "readGroup");
+               exit(1);
+       }
+}
+//********************************************************************/
+string ParseFastaQCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
+               exit(1);
+       }
+}
+
+
+//**********************************************************************************************************************
 
 
 
index 5ae6b9c482fe6b4022bb4d4c5d4ae2e79ea3aad8..2ac141626b4b528d352d2860dc64e65826e22357 100644 (file)
 
 
 #include "command.hpp"
+#include "trimoligos.h"
+#include "sequence.hpp"
+#include "groupmap.h"
+
+struct fastqRead2 {
+    string quality;
+       Sequence seq;
+    string wholeRead;
+       
+       fastqRead2() {  };
+       fastqRead2(Sequence s, string q, string w) : seq(s), quality(q), wholeRead(w){};
+       ~fastqRead2() {};
+};
+
 
 class ParseFastaQCommand : public Command {
 
@@ -35,11 +49,29 @@ public:
 private:
 
        vector<string> outputNames;     
-       string outputDir, fastaQFile, format;
-       bool abort, fasta, qual, pacbio;
+       string outputDir, fastaQFile, format, oligosfile, groupfile;
+       bool abort, fasta, qual, pacbio, pairedOligos;
+    int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split;
+    GroupMap* groupMap;
+    
+    //oligos file data structures
+    vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
+       map<string, int> barcodes;
+       map<string, int> primers;
+    map<int, oligosPair> pairedBarcodes;
+    map<int, oligosPair> pairedPrimers;
+    vector<vector<string> > fastqFileNames;
+    string noMatchFile;
        
        vector<int> convertQual(string);
     vector<char> convertTable;
+    bool readOligos(string oligosFile);
+    bool readGroup(string oligosFile);
+    string reverseOligo(string oligo);
+    fastqRead2 readFastq(ifstream&, bool&);
+    int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int);
+    int findGroup(fastqRead2, int&, int&, string);
+    
 };
 
 #endif
index 32a7e0828ff143d8f414bacf66c52a205bbf27c7..cfd1b6b9705ccb131e050ce132547192892d12ea 100644 (file)
@@ -53,7 +53,7 @@ string ParseListCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "list") {  pattern = "[filename],[group],list"; } 
+        if (type == "list") {  pattern = "[filename],[group],[distance],list"; } 
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -205,27 +205,6 @@ int ParseListCommand::execute(){
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               //set fileroot
-               map<string, string> variables; 
-        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
-               
-               //fill filehandles with neccessary ofstreams
-               int i;
-               ofstream* temp;
-               vector<string> gGroups;
-        if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
-        else { gGroups = ct.getNamesOfGroups(); }
-        
-               for (i=0; i<gGroups.size(); i++) {
-                       temp = new ofstream;
-                       filehandles[gGroups[i]] = temp;
-                       
-            variables["[group]"] = gGroups[i];
-                       string filename = getOutputFileName("list",variables);
-                       outputNames.push_back(filename); outputTypes["list"].push_back(filename);
-                       m->openOutputFile(filename, *temp);
-               }
-               
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;        
@@ -236,16 +215,13 @@ int ParseListCommand::execute(){
                
                if (m->control_pressed) { 
                        delete list; if (groupfile != "") { delete groupMap; }
-                       for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
-                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
-                       return 0;
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
                }
                
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                
                        if (m->control_pressed) { 
                                delete list; if (groupfile != "") { delete groupMap; }
-                               for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
                                for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
                                return 0;
                        }
@@ -283,7 +259,6 @@ int ParseListCommand::execute(){
                
                if (m->control_pressed) { 
                        if (groupfile != "") { delete groupMap; }
-                       for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
                        return 0;
                }
@@ -304,7 +279,6 @@ int ParseListCommand::execute(){
                
                if (m->control_pressed) { 
                        if (groupfile != "") { delete groupMap; }
-                       for (i=0; i<gGroups.size(); i++) {  (*(filehandles[gGroups[i]])).close();  delete filehandles[gGroups[i]]; } 
                        for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } outputTypes.clear();
                        return 0;
                }
@@ -320,11 +294,6 @@ int ParseListCommand::execute(){
                        delete list;
                }
                
-               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
-                       (*(filehandles[it3->first])).close();
-                       delete it3->second;
-               }
-               
                if (groupfile != "") { delete groupMap; }
                
                if (m->control_pressed) { 
@@ -354,8 +323,33 @@ int ParseListCommand::execute(){
 /**********************************************************************************************************************/
 int ParseListCommand::parse(ListVector* thisList) {
        try {
+        map<string, ofstream*> filehandles;
+        map<string, ofstream*>::iterator it3;
+        
+        //set fileroot
+               map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[distance]"] = thisList->getLabel();
+               
+               //fill filehandles with neccessary ofstreams
+               ofstream* temp;
+               vector<string> gGroups;
+        if (groupfile != "") { gGroups = groupMap->getNamesOfGroups(); }
+        else { gGroups = ct.getNamesOfGroups(); }
+        
+               for (int i=0; i<gGroups.size(); i++) {
+                       temp = new ofstream;
+                       filehandles[gGroups[i]] = temp;
+                       
+            variables["[group]"] = gGroups[i];
+                       string filename = getOutputFileName("list",variables);
+                       m->openOutputFile(filename, *temp);
+            outputNames.push_back(filename); outputTypes["list"].push_back(filename);
+               }
+
        
                map<string, string> groupVector;
+        map<string, string> groupLabels;
                map<string, string>::iterator itGroup;
                map<string, int> groupNumBins;
                
@@ -363,11 +357,12 @@ int ParseListCommand::parse(ListVector* thisList) {
                for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
                        groupNumBins[it3->first] = 0;
                        groupVector[it3->first] = "";
+            groupLabels[it3->first] = "label\tnumOtus\t";
                }
 
-               
+               vector<string> binLabels = thisList->getLabels();
                for (int i = 0; i < thisList->getNumBins(); i++) {
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { break; }
                        
                        map<string, string> groupBins;
                        string bin = list->get(i); 
@@ -408,14 +403,26 @@ int ParseListCommand::parse(ListVector* thisList) {
                        
                        //print parsed bin info to files
                        for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
-                               groupVector[itGroup->first] +=  itGroup->second + '\t'; 
+                               groupVector[itGroup->first] +=  itGroup->second + '\t';
+                groupLabels[itGroup->first] +=  binLabels[i] + '\t';
                        }
                
                }
                
+        if (m->control_pressed) {
+            for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                (*(filehandles[it3->first])).close();
+                delete it3->second;
+            }
+            return 0;
+        }
+        
                //end list vector
                for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+            (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
                        (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
+            (*(filehandles[it3->first])).close();
+            delete it3->second;
                }
                
                return 0;
index 84abc85d46e0bc3fddd916a0095c4b45aa44dbd1..48b53a69d8141efbedc7373de2d770d53bace282 100644 (file)
@@ -47,9 +47,6 @@ private:
        set<string> labels;
        bool abort, allLines;
        vector<string> outputNames;
-       
-       map<string, ofstream*> filehandles;
-       map<string, ofstream*>::iterator it3;
 
 };
 
index f4481ab9402f5a5362b6d864980bbc9b6e1e101f..ba9062437d78df8aa337bc5948b603b1acf8e351 100644 (file)
@@ -60,7 +60,7 @@ private:
     int readCount(set<string>);
     bool readOligos();
     bool readEcoli();
-       int driverPcr(string, string, string, string, set<string>&, linePair, int&, int&, bool&);
+       int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
        int createProcesses(string, string, string, set<string>&);
     bool isAligned(string, map<int, int>&);
     string reverseOligo(string);
@@ -98,8 +98,8 @@ struct pcrData {
         nomatch = nm;
         keepprimer = kp;
         keepdots = kd;
+        end = en;
                start = st;
-               end = en;
         length = l;
                fstart = fst;
         fend = fen;
@@ -142,8 +142,8 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
         set<int> lengths;
         //pdiffs, bdiffs, primers, barcodes, revPrimers
         map<string, int> faked;
-        vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
-        locations.resize(2);
+        set<int> locations; //locations = beginning locations
+        
         TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
                
                for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
@@ -192,7 +192,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                     else            {
                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
                                         if (pDataArray->fileAligned) {
-                                            thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
+                                            thisPStart = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
                                         }
 }
@@ -202,7 +202,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                     else            {
                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
                                         if (pDataArray->fileAligned) {
-                                            thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
+                                            thisPStart = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
                                         }
                                     }
@@ -237,7 +237,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                     else            {
                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
                                         if (pDataArray->fileAligned) {
-                                            thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
+                                            thisPEnd = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
                                         }
 
@@ -248,7 +248,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                     else            {
                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
                                         if (pDataArray->fileAligned) {
-                                            thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
+                                            thisPEnd = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
                                         }
 
@@ -303,8 +303,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                                if(goodSeq == 1)    {
                     currSeq.printSequence(goodFile);
                     if (locationsString != "") { locationsFile << locationsString; }
-                    if (thisPStart != -1)   { locations[0].insert(thisPStart);  }
-                    if (thisPEnd != -1)     { locations[1].insert(thisPEnd);    }
+                    if (thisPStart != -1)   { locations.insert(thisPStart);  }
                 }
                                else {  
                     pDataArray->badSeqNames.insert(currSeq.getName()); 
@@ -327,9 +326,8 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
         if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); }
         
         if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
-            if ((locations[0].size() > 1) || (locations[1].size() > 1)) { pDataArray->adjustNeeded = true; }
-            if (pDataArray->primers.size() != 0)    {   set<int>::iterator it = locations[0].begin();  pDataArray->pstart = *it;  }
-            if (pDataArray->revPrimer.size() != 0)  {   set<int>::reverse_iterator it2 = locations[1].rbegin();  pDataArray->pend = *it2; }
+            if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
+            if (pDataArray->primers.size() != 0)    {   set<int>::iterator it = locations.begin();  pDataArray->pstart = *it;  }
         }
         
         return 0;
index b9bab4ec2934a4121305f87111cf2d238f5ec4d5..2e3136e1f42e1a93d15fcab921b2e612f55f3997 100644 (file)
@@ -233,6 +233,48 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
                exit(1);
        }
 }
+/**************************************************************************************************/
+
+vector<string> PhyloTree::getSeqs(string seqTaxonomy){
+       try {
+        string taxCopy = seqTaxonomy;
+        vector<string> names;
+        map<string, int>::iterator childPointer;
+               
+               int currentNode = 0;
+
+        m->removeConfidences(seqTaxonomy);
+        
+        string taxon;
+        while(seqTaxonomy != ""){
+                       
+                       if (m->control_pressed) { return names; }
+                       
+                       taxon = getNextTaxon(seqTaxonomy, "");
+            
+            if (m->debug) { m->mothurOut(taxon +'\n'); }
+                       
+                       if (taxon == "") {  m->mothurOut(taxCopy + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); break;  }
+                       
+                       childPointer = tree[currentNode].children.find(taxon);
+                       
+                       if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
+                               currentNode = childPointer->second;
+                       }
+                       else{                                                                                   //otherwise, error this taxonomy is not in tree
+                               m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+                       }
+            
+                       if (seqTaxonomy == "") {   names = tree[currentNode].accessions;        }
+               }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "getSeqs");
+               exit(1);
+       }
+}
 
 /**************************************************************************************************/
 
index e0002209e4d83aaaf9468573173858d9b0602b61..4b7a35a24547b155c857f140496b6cc03654c66d 100644 (file)
@@ -46,6 +46,7 @@ public:
        string getName(int i);                  
        int getGenusIndex(string seqName);      
        string getFullTaxonomy(string);  //pass a sequence name return taxonomy
+    vector<string> getSeqs(string);     //returns names of sequences in given taxonomy
        
        int getMaxLevel()               {       return maxLevel;        }
        int getNumSeqs()                {       return numSeqs;         }
index 16117053fc7c0a8147c7f00fcdd935324ff66edb..b21865abb54472b29473a76e2a9898939f7904c5 100644 (file)
@@ -303,7 +303,9 @@ int PhylotypeCommand::execute(){
                                }       
                                
                                //print listvector
-                               list.print(outList);
+                               if (!m->printedListHeaders) { list.printHeaders(outList); }
+                list.print(outList);
+                
                 if (countfile == "") {
                     //print rabund
                     list.getRAbundVector().print(outRabund);
index cb8dd490e3a53a40edce7d1d52fffada445fac13..a1eae4687c4d49b2ce8e0f90895a36a994aec689 100644 (file)
@@ -443,13 +443,13 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                                process++;
                        }else if (pid == 0){
                 string locationsFile = toString(getpid()) + ".temp";
-                               num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, pend, adjustNeeded);
+                               num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
                                
                                //pass numSeqs to parent
                                ofstream out;
                                string tempFile = filename + toString(getpid()) + ".num.temp";
                                m->openOutputFile(tempFile, out);
-                out << pstart << '\t' << pend << '\t' << adjustNeeded << endl;
+                out << pstart << '\t' << adjustNeeded << endl;
                                out << num << '\t' << badSeqNames.size() << endl;
                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
                     out << (*it) << endl;
@@ -465,7 +465,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                }
                
         string locationsFile = toString(getpid()) + ".temp";
-        num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, pend, adjustNeeded);
+        num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
         
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -478,20 +478,16 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                        string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
             int numBadNames = 0; string name = "";
-            int tpstart = -1; int tpend = -1; bool tempAdjust = false;
+            int tpstart = -1; bool tempAdjust = false;
             
                        if (!in.eof()) {
-                in >> tpstart >> tpend >> tempAdjust; m->gobble(in);
+                in >> tpstart >> tempAdjust; m->gobble(in);
                 
                 if (tempAdjust) { adjustNeeded = true; }
                 if (tpstart != -1)   {
                     if (tpstart != pstart) { adjustNeeded = true; }
                     if (tpstart < pstart) { pstart = tpstart; } //smallest start
                 } 
-                if (tpend != -1)     {
-                    if (tpend != pend) { adjustNeeded = true; }
-                    if (tpend > pend) { pend = tpend; } //largest end
-                } 
                 int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
             }
             for (int j = 0; j < numBadNames; j++) {
@@ -541,7 +537,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                }
                
         //do your part
-        num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, pend, adjustNeeded);
+        num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
         processIDS.push_back(processors-1);
         
                //Wait until all threads have terminated.
@@ -558,11 +554,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                 if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
                 if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
             } //smallest start
-            if (pDataArray[i]->pend != -1)     {
-                if (pDataArray[i]->pend != pend) { adjustNeeded = true; }
-                if (pDataArray[i]->pend > pend) { pend = pDataArray[i]->pend; }
-            } //largest end
-
+            
             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) {        badSeqNames.insert(*it);       }
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
@@ -580,7 +572,43 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                }
         
 #endif 
-        if (fileAligned && adjustNeeded) { adjustDots(goodFileName, locationsFile, pstart, pend); }
+        
+        
+
+        if (fileAligned) {
+            //find pend - pend is the biggest ending value, but we must account for when we adjust the start.  That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
+            ifstream inLocations;
+            m->openInputFile(locationsFile, inLocations);
+            
+            while(!inLocations.eof()) {
+                
+                if (m->control_pressed) { break; }
+                
+                string name = "";
+                int thisStart = -1; int thisEnd = -1;
+                if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
+                if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
+                else { pend = -1; break; }
+                
+                int myDiff = 0;
+                if (pstart != -1) {
+                    if (thisStart != -1) {
+                        if (thisStart != pstart) { myDiff += (thisStart - pstart); }
+                    }
+                }
+                
+                int myEnd = thisEnd + myDiff;
+                //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
+                
+                if (thisEnd != -1) {
+                    if (myEnd > pend) { pend = myEnd; }
+                }
+                
+            }
+            inLocations.close();
+            
+            adjustDots(goodFileName, locationsFile, pstart, pend);
+        }
         
         return num;
         
@@ -592,7 +620,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
 }
 
 //**********************************************************************************************************************
-int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, int& pend, bool& adjustNeeded){
+int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
        try {
                ofstream goodFile;
                m->openOutputFile(goodFasta, goodFile);
@@ -611,8 +639,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                bool done = false;
                int count = 0;
         set<int> lengths;
-        vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
-        locations.resize(2);
+        set<int> locations; //locations[0] = beginning locations, 
         
         //pdiffs, bdiffs, primers, barcodes, revPrimers
         map<string, int> faked;
@@ -759,9 +786,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
                                if(goodSeq == 1)    {
                     currSeq.printSequence(goodFile);
                     if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
+                    if (thisPStart != -1)   { locations.insert(thisPStart);  }
                     if (locationsString != "") { locationsFile << locationsString; }
-                    if (thisPStart != -1)   { locations[0].insert(thisPStart);  }
-                    if (thisPEnd != -1)     { locations[1].insert(thisPEnd);    }
                 }
                                else {  
                     badSeqNames.insert(currSeq.getName()); 
@@ -792,9 +818,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
         if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
     
         if (fileAligned && !keepdots) { //print out smallest start value and largest end value
-            if ((locations[0].size() > 1) || (locations[1].size() > 1)) { adjustNeeded = true; }
-            if (primers.size() != 0)    {   set<int>::iterator it = locations[0].begin();  pstart = *it;  }
-            if (revPrimer.size() != 0)  {   set<int>::reverse_iterator it2 = locations[1].rbegin();  pend = *it2; }
+            if (locations.size() > 1) { adjustNeeded = true; }
+            if (primers.size() != 0)    {   set<int>::iterator it = locations.begin();  pstart = *it;  }
         }
 
                return count;
@@ -837,6 +862,7 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i
         
         set<int> lengths;
         //cout << pstart << '\t' << pend << endl;
+        //if (pstart > pend) { //swap them
         
         while(!inFasta.eof()) {
             if(m->control_pressed) { break; }
@@ -847,6 +873,8 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i
             int thisStart = -1; int thisEnd = -1;
             if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
             if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
+            
+            
             //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
             //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
             
@@ -887,7 +915,8 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i
         
         //cout << "final lengths = \n";
         //for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
-           // cout << *it << endl;
+           //cout << *it << endl;
+           // cout << lengths.count(*it) << endl;
        // }
         
         return 0;
index fbbe4bb3a2fc5c99f77875292849e0a0e95f1043..fe722a669f2c0ca983c94ef21cc73dc95f5719bc 100644 (file)
@@ -300,6 +300,7 @@ int PreClusterCommand::execute(){
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); 
                                
                }else {
+            if (processors != 1) { m->mothurOut("When using running without group information mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
                        if (namefile != "") { readNameFile(); }
                
                        //reads fasta file and return number of seqs
index 960da85e8ce7ab4922f6afc38c728f2be5288ca2..bd68e2c97721b4451f7fe51a559a22a50710885f 100644 (file)
@@ -20,7 +20,7 @@ vector<string> PrimerDesignCommand::setParameters(){
         CommandParameter pmintm("mintm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmintm);
         CommandParameter pmaxtm("maxtm", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxtm);
         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors);
-        CommandParameter potunumber("otunumber", "Number", "", "-1", "", "", "","",false,true,true); parameters.push_back(potunumber);
+        CommandParameter potunumber("otulabel", "String", "", "", "", "", "","",false,true,true); parameters.push_back(potunumber);
         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
         CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pcutoff);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -40,13 +40,13 @@ string PrimerDesignCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The primer.design allows you to identify sequence fragments that are specific to particular OTUs.\n";
-               helpString += "The primer.design command parameters are: list, fasta, name, count, otunumber, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
+               helpString += "The primer.design command parameters are: list, fasta, name, count, otulabel, cutoff, length, pdiffs, mintm, maxtm, processors and label.\n";
                helpString += "The list parameter allows you to provide a list file and is required.\n";
         helpString += "The fasta parameter allows you to provide a fasta file and is required.\n";
         helpString += "The name parameter allows you to provide a name file associated with your fasta file.\n";
         helpString += "The count parameter allows you to provide a count file associated with your fasta file.\n";
         helpString += "The label parameter is used to indicate the label you want to use from your list file.\n";
-        helpString += "The otunumber parameter is used to indicate the otu you want to use from your list file. It is required.\n";
+        helpString += "The otulabel parameter is used to indicate the otu you want to use from your list file. It is required.\n";
         helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
         helpString += "The length parameter is used to indicate the length of the primer. The default is 18.\n";
         helpString += "The mintm parameter is used to indicate minimum melting temperature.\n";
@@ -216,9 +216,8 @@ PrimerDesignCommand::PrimerDesignCommand(string option)  {
             temp = validParameter.validFile(parameters, "maxtm", false);  if (temp == "not found") { temp = "-1"; }
                        m->mothurConvert(temp, maxTM); 
             
-            temp = validParameter.validFile(parameters, "otunumber", false);  if (temp == "not found") { temp = "-1"; }
-                       m->mothurConvert(temp, otunumber); 
-            if (otunumber < 1) {  m->mothurOut("[ERROR]: You must provide an OTU number, aborting.\n"); abort = true; }
+            otulabel = validParameter.validFile(parameters, "otulabel", false);  if (otulabel == "not found") { temp = ""; }
+            if (otulabel == "") {  m->mothurOut("[ERROR]: You must provide an OTU label, aborting.\n"); abort = true; }
             
             temp = validParameter.validFile(parameters, "processors", false);  if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
@@ -253,7 +252,9 @@ int PrimerDesignCommand::execute(){
         
         //reads list file and selects the label the users specified or the first label
         getListVector();
-        if (otunumber > list->getNumBins()) { m->mothurOut("[ERROR]: You selected an OTU number larger than the number of OTUs you have in your list file, quitting.\n"); return 0; }
+        vector<string> binLabels = list->getLabels();
+        int binIndex = findIndex(otulabel, binLabels);
+        if (binIndex == -1) { m->mothurOut("[ERROR]: You selected an OTU label that is not in your in your list file, quitting.\n"); return 0; }
         
         map<string, int> nameMap;
         unsigned long int numSeqs;  //used to sanity check the files. numSeqs = total seqs for namefile and uniques for count.
@@ -293,7 +294,7 @@ int PrimerDesignCommand::execute(){
         
         m->mothurOut("Done.\n\n");
         
-        set<string> primers = getPrimer(conSeqs[otunumber-1]);  
+        set<string> primers = getPrimer(conSeqs[binIndex]);
         
         if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
         
@@ -302,7 +303,7 @@ int PrimerDesignCommand::execute(){
         ofstream outSum;
         m->openOutputFile(consSummaryFile, outSum);
         
-        outSum << "PrimerOtu: " << otunumber << " Members: " << list->get(otunumber-1) << endl << "Primers\tminTm\tmaxTm" << endl;
+        outSum << "PrimerOtu: " << otulabel << " Members: " << list->get(binIndex) << endl << "Primers\tminTm\tmaxTm" << endl;
         
         //find min and max melting points
         vector<double> minTms;
@@ -339,7 +340,7 @@ int PrimerDesignCommand::execute(){
         outSum.close();
         
         //check each otu's conseq for each primer in otunumber
-        set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs);
+        set<int> otuToRemove = createProcesses(consSummaryFile, minTms, maxTms, primers, conSeqs, binIndex);
         
         if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
         
@@ -348,21 +349,29 @@ int PrimerDesignCommand::execute(){
         mvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
         mvariables["[extension]"] = m->getExtension(listfile);
         string newListFile = getOutputFileName("list", mvariables);
-        outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
-        ofstream outList;
-        m->openOutputFile(newListFile, outList);
+        ofstream outListTemp;
+        m->openOutputFile(newListFile+".temp", outListTemp);
         
-        outList << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+        outListTemp << list->getLabel() << '\t' << (list->getNumBins()-otuToRemove.size()) << '\t';
+        string headers = "label\tnumOtus\t";
         for (int j = 0; j < list->getNumBins(); j++) {
             if (m->control_pressed) { break; }
             //good otus
             if (otuToRemove.count(j) == 0) {  
                 string bin = list->get(j);
-                if (bin != "") {  outList << bin << '\t';  } 
+                if (bin != "") {  outListTemp << bin << '\t';  headers += binLabels[j] + '\t'; }
             }
         }
-        outList << endl;
+        outListTemp << endl;
+        outListTemp.close();
+        
+        ofstream outList;
+        m->openOutputFile(newListFile, outList);
+        outList << headers << endl;
         outList.close();
+        m->appendFiles(newListFile+".temp", newListFile);
+        m->mothurRemove(newListFile+".temp");
+        outputNames.push_back(newListFile); outputTypes["list"].push_back(newListFile);
         
         if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
         
@@ -525,7 +534,7 @@ set<string> PrimerDesignCommand::getPrimer(Sequence primerSeq){
        }
 }
 /**************************************************************************************************/
-set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs) {
+set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int binIndex) {
        try {
                
                vector<int> processIDS;
@@ -560,7 +569,7 @@ set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<doub
                 //clear old file because we append in driver
                 m->mothurRemove(newSummaryFile + toString(getpid()) + ".temp");
                 
-                               otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed);
+                               otusToRemove = driver(newSummaryFile + toString(getpid()) + ".temp", minTms, maxTms, primers, conSeqs, lines[process].start, lines[process].end, numBinsProcessed, binIndex);
                 
                 string tempFile = toString(getpid()) + ".otus2Remove.temp";
                 ofstream outTemp;
@@ -580,7 +589,7 @@ set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<doub
                }
                
                //do my part
-               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed, binIndex);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -624,7 +633,7 @@ set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<doub
                        string extension = toString(i) + ".temp";
                        m->mothurRemove(newSummaryFile+extension);
             
-                       primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, otunumber, length, i);
+                       primerDesignData* tempPrimer = new primerDesignData((newSummaryFile+extension), m, lines[i].start, lines[i].end, minTms, maxTms, primers, conSeqs, pdiffs, binIndex, length, i);
                        pDataArray.push_back(tempPrimer);
                        processIDS.push_back(i);
                        
@@ -635,7 +644,7 @@ set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<doub
                
         
                //using the main process as a worker saves time and memory
-               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed);
+               otusToRemove = driver(newSummaryFile, minTms, maxTms, primers, conSeqs, lines[0].start, lines[0].end, numBinsProcessed, binIndex);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
@@ -668,7 +677,7 @@ set<int> PrimerDesignCommand::createProcesses(string newSummaryFile, vector<doub
        }
 }
 //**********************************************************************************************************************
-set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed){
+set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& minTms, vector<double>& maxTms, set<string>& primers, vector<Sequence>& conSeqs, int start, int end, int& numBinsProcessed, int binIndex){
        try {
         set<int> otuToRemove;
         
@@ -679,7 +688,7 @@ set<int> PrimerDesignCommand::driver(string summaryFileName, vector<double>& min
         
             if (m->control_pressed) { break; }
             
-            if (i != (otunumber-1)) {
+            if (i != (binIndex)) {
                 int primerIndex = 0;
                 for (set<string>::iterator it = primers.begin(); it != primers.end(); it++) {
                     vector<int> primerStarts;
@@ -1236,6 +1245,21 @@ int PrimerDesignCommand::countDiffs(string oligo, string seq){
        }
 }
 //**********************************************************************************************************************
+int PrimerDesignCommand::findIndex(string binLabel, vector<string> binLabels){
+       try {
+        int index = -1;
+        for (int i = 0; i < binLabels.size(); i++){
+            if (m->control_pressed) { return index; }
+            if (m->isLabelEquivalent(binLabel, binLabels[i])) { index = i; break; }
+        }
+        return index;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PrimerDesignCommand", "findIndex");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 
 
index 2879d2b47003b2c166c7d7b3d5887d2e62a0843d..ff1ddf23ca8edf0438e240c25c69baaa4dcd96b1 100644 (file)
@@ -50,8 +50,8 @@ private:
        };
     
     bool abort, allLines, large;
-    int cutoff, pdiffs, length, otunumber, processors, alignedLength;
-    string outputDir, listfile, namefile, countfile, fastafile, label;
+    int cutoff, pdiffs, length, processors, alignedLength;
+    string outputDir, listfile, otulabel, namefile, countfile, fastafile, label;
     double minTM, maxTM;
     ListVector* list;
     vector<string> outputNames;
@@ -65,10 +65,11 @@ private:
     bool findPrimer(string, string, vector<int>&, vector<int>&, vector<int>&);
     int findMeltingPoint(string primer, double&, double&);
     
-    set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&);
-    set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&);
+    set<int> createProcesses(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int);
+    set<int> driver(string, vector<double>&, vector<double>&, set<string>&, vector<Sequence>&, int, int, int&, int);
     vector< vector< vector<unsigned int> > > driverGetCounts(map<string, int>&, unsigned long int&, vector<unsigned int>&, unsigned long long&, unsigned long long&);
     vector<Sequence> createProcessesConSeqs(map<string, int>&, unsigned long int&);
+    int findIndex(string binLabel, vector<string> binLabels);
     
 };
 
@@ -81,7 +82,7 @@ struct primerDesignData {
        MothurOut* m;
        int start;
        int end;
-       int pdiffs, threadID, otunumber, length;
+       int pdiffs, threadID,  length, binIndex;
        set<string> primers;
        vector<double> minTms, maxTms;
     set<int> otusToRemove;
@@ -99,7 +100,7 @@ struct primerDesignData {
         maxTms = max;
         primers = pri;
         consSeqs = seqs;
-        otunumber = otun;
+        binIndex = otun;
         length = l;
                threadID = tid;
         numBinsProcessed = 0;
@@ -121,7 +122,7 @@ static DWORD WINAPI MyPrimerThreadFunction(LPVOID lpParam){
             
             if (pDataArray->m->control_pressed) { break; }
             
-            if (i != (pDataArray->otunumber-1)) {
+            if (i != (pDataArray->binIndex)) {
                 int primerIndex = 0;
                 for (set<string>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
                     vector<int> primerStarts;
index 1f86efc51304cc81fa2f359c8b2c996da52e8c30..0c55650b33149148c80374a0b8c03d41a7a4ea53 100644 (file)
@@ -200,7 +200,7 @@ bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
                
                if(seqName != sequence.getName()){
                        m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
-                       m->mothurOutEndLine();  
+                       m->mothurOutEndLine();  m->control_pressed = true;
                }
                
                int end;
index d998de63ee8f31df493cd8546ecb5fb00b83311c..acf87dfebcd022d37cf6974f8331c7ad940a017a 100644 (file)
@@ -180,7 +180,7 @@ int RandomForest::calcForrestVariableImportance(string filename) {
         m->openOutputFile(filename, out);
         out <<"OTU\tMean decrease accuracy\n";
         for (int i = 0; i < globalVariableRanks.size(); i++) {
-            out << m->currentBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl;
+            out << m->currentSharedBinLabels[(int)globalVariableRanks[i].first] << '\t' << globalVariableImportanceList[globalVariableRanks[i].first] << endl;
         }
         out.close();
         return 0;
index 9eefa43c9f303249c812d27a1ccbc481dc6b7fcf..42f088b953d2f3657dd98dcec4f184f87eb8a70b 100644 (file)
@@ -70,7 +70,7 @@ string RemoveGroupsCommand::getOutputPattern(string type) {
         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
         else if (type == "shared")      {   pattern = "[filename],[tag],pick,[extension]";    }
-        else if (type == "design")      {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "design")      {   pattern = "[filename],[tag],pick,[extension]";    }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -560,9 +560,9 @@ int RemoveGroupsCommand::readShared(){
                m->setGroups(groupsToKeep);
                m->clearAllGroups();
                m->saveNextLabel = "";
-               m->printedHeaders = false;
-               m->currentBinLabels.clear();
-               m->binLabelsInFile.clear();
+               m->printedSharedHeaders = false;
+               m->currentSharedBinLabels.clear();
+               m->sharedBinLabelsInFile.clear();
                
                InputData input(sharedfile, "sharedfile");
                lookup = input.getSharedRAbundVectors();
@@ -623,12 +623,7 @@ int RemoveGroupsCommand::readList(){
                map<string, string> variables; 
         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);
-
-               
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               
+                               
                ifstream in;
                m->openInputFile(listfile, in);
                
@@ -641,6 +636,16 @@ int RemoveGroupsCommand::readList(){
                        
                        //read in list vector
                        ListVector list(in);
+            
+            variables["[tag]"] = list.getLabel();
+            string outputFileName = getOutputFileName("list", variables);
+                       
+                       ofstream out;
+                       m->openOutputFile(outputFileName, out);
+                       outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+            
+            vector<string> binLabels = list.getLabels();
+            vector<string> newBinLabels;
                        
                        //make a new list vector
                        ListVector newList;
@@ -681,24 +686,26 @@ int RemoveGroupsCommand::readList(){
                                //if there are names in this bin add to new list
                                if (newNames != "") {  
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newBinLabels.push_back(binLabels[i]);
                                }
                        }
                        
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                               newList.setLabels(newBinLabels);
+                newList.printHeaders(out);
                                newList.print(out);
                        }
                        
                        m->gobble(in);
+            out.close();
                }
                in.close();     
-               out.close();
                
-               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
-               outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
                m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
                
                return 0;
index 01f45ca97329fbbe383c2157f12880ca05671a10..7349203007d90b4228502680b95b93b7ab9c3240 100644 (file)
@@ -75,7 +75,7 @@ string RemoveLineageCommand::getOutputPattern(string type) {
         else if (type == "name")            {   pattern = "[filename],pick,[extension]";    }
         else if (type == "group")           {   pattern = "[filename],pick,[extension]";    }
         else if (type == "count")           {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "list")            {   pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]";    }
+        else if (type == "list")            {   pattern = "[filename],[distance],pick,[extension]";    }
         else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
         else if (type == "alignreport")     {   pattern = "[filename],pick.align.report";    }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
@@ -481,9 +481,6 @@ int RemoveLineageCommand::readList(){
                map<string, string> variables; 
         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);   
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
                
                ifstream in;
                m->openInputFile(listfile, in);
@@ -491,12 +488,25 @@ int RemoveLineageCommand::readList(){
                bool wroteSomething = false;
                
                while(!in.eof()){
+            
                        //read in list vector
                        ListVector list(in);
                        
                        //make a new list vector
                        ListVector newList;
                        newList.setLabel(list.getLabel());
+            
+            variables["[distance]"] = list.getLabel();
+            string outputFileName = getOutputFileName("list", variables);
+                       
+                       ofstream out;
+                       m->openOutputFile(outputFileName, out);
+                       outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+            
+            if (m->control_pressed) { in.close(); out.close(); return 0; }
+            
+            vector<string> binLabels = list.getLabels();
+            vector<string> newBinLabels;
                        
                        //for each bin
                        for (int i = 0; i < list.getNumBins(); i++) {
@@ -517,23 +527,26 @@ int RemoveLineageCommand::readList(){
                                //if there are names in this bin add to new list
                                if (newNames != "") {  
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newBinLabels.push_back(binLabels[i]);
                                }
                        }
                                
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                               newList.setLabels(newBinLabels);
+                newList.printHeaders(out);
                                newList.print(out);
                        }
                        
                        m->gobble(in);
+            out.close();
                }
                in.close();     
-               out.close();
+               
                
                if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
-               outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
                                
                return 0;
 
@@ -692,6 +705,8 @@ int RemoveLineageCommand::readConsList(){
         bool wroteSomething = false;
         string snumBins = toString(list->getNumBins());
         
+        vector<string> binLabels = list->getLabels();
+        vector<string> newBinLabels;
         for (int i = 0; i < list->getNumBins(); i++) {
             
             if (m->control_pressed) { delete list; return 0;}
@@ -707,6 +722,7 @@ int RemoveLineageCommand::readConsList(){
             
             if (names.count(m->getSimpleLabel(otuLabel)) == 0) {
                 newList.push_back(list->get(i));
+                newBinLabels.push_back(binLabels[i]);
             }else { removedCount++; }
         }
         
@@ -724,6 +740,8 @@ int RemoveLineageCommand::readConsList(){
         //print new listvector
         if (newList.getNumBins() != 0) {
             wroteSomething = true;
+            newList.setLabels(newBinLabels);
+            newList.printHeaders(out);
             newList.print(out);
         }
                out.close();
@@ -843,9 +861,9 @@ int RemoveLineageCommand::readShared(){
             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
             
             //is this otu on the list
-            if (names.count(m->getSimpleLabel(m->currentBinLabels[i])) == 0) {
+            if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) {
                 wroteSomething = true;
-                newLabels.push_back(m->currentBinLabels[i]);
+                newLabels.push_back(m->currentSharedBinLabels[i]);
                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
                 }
@@ -865,7 +883,7 @@ int RemoveLineageCommand::readShared(){
         
                for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
         
-        m->currentBinLabels = newLabels;
+        m->currentSharedBinLabels = newLabels;
         
                newLookup[0]->printHeaders(out);
                
index 161c4bfb3553d52ceac6fd89b033152cf9fa8e43..18322f1bcb46d0eeeb698b3e759c885e8b923437 100644 (file)
@@ -462,9 +462,9 @@ int RemoveOtuLabelsCommand::readShared(){
             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
             
             //is this otu on the list
-            if (labels.count(m->getSimpleLabel(m->currentBinLabels[i])) == 0) {
+            if (labels.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) {
                 wroteSomething = true;
-                newLabels.push_back(m->currentBinLabels[i]);
+                newLabels.push_back(m->currentSharedBinLabels[i]);
                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
                 }
@@ -484,7 +484,7 @@ int RemoveOtuLabelsCommand::readShared(){
         
                for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
         
-        m->currentBinLabels = newLabels;
+        m->currentSharedBinLabels = newLabels;
         
                newLookup[0]->printHeaders(out);
                
@@ -518,23 +518,16 @@ int RemoveOtuLabelsCommand::readList(){
         newList.setLabel(list->getLabel());
         int removedCount = 0;
         bool wroteSomething = false;
-        string snumBins = toString(list->getNumBins());
         
+        vector<string> binLabels = list->getLabels();
+        vector<string> newLabels;
         for (int i = 0; i < list->getNumBins(); i++) {
             
             if (m->control_pressed) { delete list; return 0;}
             
-            //create a label for this otu
-            string otuLabel = "Otu";
-            string sbinNumber = toString(i+1);
-            if (sbinNumber.length() < snumBins.length()) { 
-                int diff = snumBins.length() - sbinNumber.length();
-                for (int h = 0; h < diff; h++) { otuLabel += "0"; }
-            }
-            otuLabel += sbinNumber; 
-            
-            if (labels.count(m->getSimpleLabel(otuLabel)) == 0) {
+            if (labels.count(m->getSimpleLabel(binLabels[i])) == 0) {
                 newList.push_back(list->get(i));
+                newLabels.push_back(binLabels[i]);
             }else { removedCount++; }
         }
         
@@ -552,6 +545,8 @@ int RemoveOtuLabelsCommand::readList(){
         //print new listvector
         if (newList.getNumBins() != 0) {
             wroteSomething = true;
+            newList.setLabels(newLabels);
+            newList.printHeaders(out);
             newList.print(out);
         }
                out.close();
index 16fd8dce8bab96b030a57effd81d218e122628a5..a840bba6d8d91c94cc34350e944a02aa01613ee8 100644 (file)
@@ -243,6 +243,13 @@ int RemoveOtusCommand::execute(){
 //**********************************************************************************************************************
 int RemoveOtusCommand::readListGroup(){
        try {
+        InputData* input = new InputData(listfile, "list");
+               ListVector* list = input->getListVector();
+               string lastLabel = list->getLabel();
+               
+               //using first label seen if none is provided
+               if (label == "") { label = lastLabel; }
+        
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
                map<string, string> variables; 
@@ -263,13 +270,6 @@ int RemoveOtusCommand::readListGroup(){
                ofstream outGroup;
                m->openOutputFile(outputGroupFileName, outGroup);
                
-               InputData* input = new InputData(listfile, "list");
-               ListVector* list = input->getListVector();
-               string lastLabel = list->getLabel();
-               
-               //using first label seen if none is provided
-               if (label == "") { label = lastLabel; }
-               
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> labels; labels.insert(label);
                set<string> processedLabels;
@@ -365,6 +365,8 @@ int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstr
                
                int numOtus = 0;
                //for each bin
+        vector<string> binLabels = list->getLabels();
+        vector<string> newBinLabels;
                for (int i = 0; i < list->getNumBins(); i++) {
                        if (m->control_pressed) { return 0; }
                        
@@ -400,7 +402,8 @@ int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstr
                                
                                if (!removeBin) {
                                        //if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
-                                       newList.push_back(binnames);    
+                                       newList.push_back(binnames);
+                    newBinLabels.push_back(binLabels[i]);
                                        outGroup << groupFileOutput;
                                }else {
                                        numOtus++;
@@ -414,7 +417,9 @@ int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstr
                //print new listvector
                if (newList.getNumBins() != 0) {
                        wroteSomething = true;
-                       newList.print(out);
+                       newList.setLabels(newBinLabels);
+            newList.printHeaders(out);
+            newList.print(out);
                }
                
                m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
index 2eb5f63da369828e4e67e0f0b262b1e786b75a88..24d5bef750e32e06493f7e91351bbf8af0b3099b 100644 (file)
@@ -68,7 +68,7 @@ string RemoveRareCommand::getOutputPattern(string type) {
         else if (type == "sabund")    {   pattern = "[filename],pick,[extension]";    }
         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "list")        {   pattern = "[filename],[tag],pick,[extension]";    }
         else if (type == "shared")      {   pattern = "[filename],[tag],pick,[extension]";    }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
@@ -344,24 +344,7 @@ int RemoveRareCommand::execute(){
 //**********************************************************************************************************************
 int RemoveRareCommand::processList(){
        try {
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
-        map<string, string> variables; 
-        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
-        variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);
-        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
-        variables["[extension]"] = m->getExtension(groupfile);
-               string outputGroupFileName = getOutputFileName("group", variables);
-        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
-        variables["[extension]"] = m->getExtension(countfile);
-        string outputCountFileName = getOutputFileName("count", variables);
-        
-               ofstream out, outGroup;
-               m->openOutputFile(outputFileName, out);
-               
-               bool wroteSomething = false;
-               
+                               
                //you must provide a label because the names in the listfile need to be consistent
                string thisLabel = "";
                if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
@@ -400,6 +383,26 @@ int RemoveRareCommand::processList(){
                                list = input.getListVector(lastLabel); 
                        }
                }
+        
+        string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
+        map<string, string> variables;
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[extension]"] = m->getExtension(listfile);
+        variables["[tag]"] = list->getLabel();
+               string outputFileName = getOutputFileName("list", variables);
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
+        variables["[extension]"] = m->getExtension(groupfile);
+               string outputGroupFileName = getOutputFileName("group", variables);
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
+        variables["[extension]"] = m->getExtension(countfile);
+        string outputCountFileName = getOutputFileName("count", variables);
+        
+               ofstream out, outGroup;
+               m->openOutputFile(outputFileName, out);
+               
+               bool wroteSomething = false;
+
                
                //if groupfile is given then use it
                GroupMap* groupMap;
@@ -420,7 +423,11 @@ int RemoveRareCommand::processList(){
         }
                
                
-               if (list != NULL) {     
+               if (list != NULL) {
+            
+            vector<string> binLabels = list->getLabels();
+            vector<string> newLabels;
+            
                        //make a new list vector
                        ListVector newList;
                        newList.setLabel(list->getLabel());
@@ -479,6 +486,7 @@ int RemoveRareCommand::processList(){
 
                                if (binsize > nseqs) { //keep bin
                                        newList.push_back(saveBinNames);
+                    newLabels.push_back(binLabels[i]);
                                        if (groupfile != "") {  for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }  }
                     else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) {  ct.remove(newGroupFile[k]); } }  
                                }else {  if (countfile != "") {  for(int k = 0; k < names.size(); k++) {  ct.remove(names[k]); } }  }
@@ -487,7 +495,9 @@ int RemoveRareCommand::processList(){
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
-                               newList.print(out);
+                               newList.setLabels(newLabels);
+                newList.printHeaders(out);
+                newList.print(out);
                        }
                }       
                
@@ -850,7 +860,7 @@ int RemoveRareCommand::processLookup(vector<SharedRAbundVector*>& lookup){
                                
                                //eliminates zero otus
                                if (allZero) { for (int j = 0; j < newRabunds.size(); j++) {  newRabunds[j].pop_back(); } }
-                else { headers.push_back(m->currentBinLabels[i]); }
+                else { headers.push_back(m->currentSharedBinLabels[i]); }
                        }
                }else {
                        //for each otu
@@ -867,7 +877,7 @@ int RemoveRareCommand::processLookup(vector<SharedRAbundVector*>& lookup){
                                
                                //eliminates otus below rare cutoff
                                if (totalAbund <= nseqs) { for (int j = 0; j < newRabunds.size(); j++) {  newRabunds[j].pop_back(); } }
-                else { headers.push_back(m->currentBinLabels[i]); }
+                else { headers.push_back(m->currentSharedBinLabels[i]); }
                        }
                }
                
index fa7e6dda2a853d6a0234c1dc92be3bad258118d1..25d45a7c578c47167fac24164cac3322f3462772 100644 (file)
@@ -15,6 +15,7 @@
 //**********************************************************************************************************************
 vector<string> RemoveSeqsCommand::setParameters(){     
        try {
+        CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
                CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
@@ -41,9 +42,9 @@ vector<string> RemoveSeqsCommand::setParameters(){
 string RemoveSeqsCommand::getHelpString(){     
        try {
                string helpString = "";
-               helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
+               helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
                helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
-               helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
+               helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, fastq and dups.  You must provide accnos and at least one of the file parameters.\n";
                helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
                helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
                helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
@@ -61,11 +62,12 @@ string RemoveSeqsCommand::getOutputPattern(string type) {
         string pattern = "";
         
         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
-        else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
+        else if (type == "list")        {   pattern = "[filename],[distance],pick,[extension]";    }
         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
@@ -84,6 +86,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
+        outputTypes["fastq"] = tempOutNames;
                outputTypes["taxonomy"] = tempOutNames;
                outputTypes["name"] = tempOutNames;
                outputTypes["group"] = tempOutNames;
@@ -123,6 +126,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
+            outputTypes["fastq"] = tempOutNames;
                        outputTypes["taxonomy"] = tempOutNames;
                        outputTypes["name"] = tempOutNames;
                        outputTypes["group"] = tempOutNames;
@@ -210,6 +214,14 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["count"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("fastq");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -258,6 +270,10 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        if (qualfile == "not open") { abort = true; }
                        else if (qualfile == "not found") {  qualfile = "";  }                  
                        else { m->setQualFile(qualfile); }
+            
+            fastqfile = validParameter.validFile(parameters, "fastq", true);
+                       if (fastqfile == "not open") { abort = true; }
+                       else if (fastqfile == "not found") {  fastqfile = "";  }
                        
                        string usedDups = "true";
                        string temp = validParameter.validFile(parameters, "dups", false);      
@@ -280,7 +296,7 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
             }
                        
-                       if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+                       if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport, fastq or list."); m->mothurOutEndLine(); abort = true; }
                        
             if (countfile == "") {
                 if ((fastafile != "") && (namefile == "")) {
@@ -317,6 +333,7 @@ int RemoveSeqsCommand::execute(){
                //read through the correct file and output lines you want to keep
                if (namefile != "")                     {               readName();             }
                if (fastafile != "")            {               readFasta();    }
+        if (fastqfile != "")           {               readFastq();            }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
@@ -438,6 +455,71 @@ int RemoveSeqsCommand::readFasta(){
        }
 }
 //**********************************************************************************************************************
+int RemoveSeqsCommand::readFastq(){
+       try {
+               bool wroteSomething = false;
+               int removedCount = 0;
+        
+               ifstream in;
+               m->openInputFile(fastqfile, in);
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(fastqfile);  }
+               map<string, string> variables;
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
+        variables["[extension]"] = m->getExtension(fastqfile);
+               string outputFileName = getOutputFileName("fastq", variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+        
+               
+               while(!in.eof()){
+                       
+                       if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
+                       
+                       //read sequence name
+                       string input = m->getline(in); m->gobble(in);
+                       
+            string outputString = input + "\n";
+            
+                       if (input[0] == '@') {
+                //get rest of lines
+                outputString += m->getline(in) + "\n"; m->gobble(in);
+                outputString += m->getline(in) + "\n"; m->gobble(in);
+                outputString += m->getline(in) + "\n"; m->gobble(in);
+                
+                vector<string> splits = m->splitWhiteSpace(input);
+                string name = splits[0];
+                name = name.substr(1);
+                m->checkName(name);
+                
+                if (names.count(name) == 0) {
+                                       wroteSomething = true;
+                    out << outputString;
+                }else { removedCount++; }
+            }
+            
+                       m->gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               
+               if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
+               outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
+               m->mothurOut("Removed " + toString(removedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
+
+               
+               return 0;
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveSeqsCommand", "readFastq");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 int RemoveSeqsCommand::readQual(){
        try {
                string thisOutputDir = outputDir;
@@ -582,10 +664,7 @@ int RemoveSeqsCommand::readList(){
                map<string, string> variables; 
                variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
         variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);   
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               
+                               
                ifstream in;
                m->openInputFile(listfile, in);
                
@@ -602,48 +681,60 @@ int RemoveSeqsCommand::readList(){
                        //make a new list vector
                        ListVector newList;
                        newList.setLabel(list.getLabel());
+            
+                       variables["[distance]"] = list.getLabel();
+            string outputFileName = getOutputFileName("list", variables);
                        
+                       ofstream out;
+                       m->openOutputFile(outputFileName, out);
+                       outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+            
+            vector<string> binLabels = list.getLabels();
+            vector<string> newBinLabels;
+            
+            if (m->control_pressed) { in.close(); out.close();  return 0; }
+
                        //for each bin
                        for (int i = 0; i < list.getNumBins(); i++) {
                                if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
                        
                                //parse out names that are in accnos file
-                               string binnames = list.get(i);
+                               string bin = list.get(i);
+                vector<string> bnames;
+                m->splitAtComma(bin, bnames);
                                
                                string newNames = "";
-                               while (binnames.find_first_of(',') != -1) { 
-                                       string name = binnames.substr(0,binnames.find_first_of(','));
-                                       binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
-                                       
-                                       //if that name is in the .accnos file, add it
+                for (int j = 0; j < bnames.size(); j++) {
+                                       string name = bnames[j];
+                    //if that name is in the .accnos file, add it
                                        if (names.count(name) == 0) {  newNames += name + ",";  }
                                        else {  removedCount++;  }
-                               }
-                       
-                               //get last name
-                               if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
-                               else {  removedCount++;  }
+                }
 
                                //if there are names in this bin add to new list
                                if (newNames != "") {  
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newBinLabels.push_back(binLabels[i]);
                                }
                        }
                                
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                               newList.setLabels(newBinLabels);
+                newList.printHeaders(out);
                                newList.print(out);
+
                        }
                        
                        m->gobble(in);
+            out.close();
                }
                in.close();     
-               out.close();
+               
                
                if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
-               outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
                
index 1a6365dac35b9e18669f8827c4054268b9123d9e..7aaff172800c43b160fe62b2b49c2035e08502f8 100644 (file)
@@ -35,12 +35,13 @@ class RemoveSeqsCommand : public Command {
        
        private:
                set<string> names;
-               string accnosfile, fastafile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
+               string accnosfile, fastafile, fastqfile, namefile, groupfile, countfile, alignfile, listfile, taxfile, qualfile, outputDir;
                bool abort, dups;
                vector<string> outputNames;
         map<string, string> uniqueMap;
                
                int readFasta();
+        int readFastq();
                int readName();
                int readGroup();
         int readCount();
index a56b0ebb73d1f6188ecd09430e70ed0128abaf31..bf702e13534f507576bee7c24bcf6462aacb7ca3 100644 (file)
@@ -2119,7 +2119,7 @@ int ScreenSeqsCommand::screenGroupFile(map<string, string> badSeqNames){
                while(!inputGroups.eof()){
                        if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
 
-                       inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
+                       inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group; m->gobble(inputGroups);
                        it = badSeqNames.find(seqName);
                        
                        if(it != badSeqNames.end()){
@@ -2128,7 +2128,6 @@ int ScreenSeqsCommand::screenGroupFile(map<string, string> badSeqNames){
                        else{
                                goodGroupOut << seqName << '\t' << group << endl;
                        }
-                       m->gobble(inputGroups);
                }
                
                if (m->control_pressed) { goodGroupOut.close();  inputGroups.close(); m->mothurRemove(goodGroupFile);  return 0; }
index 12786ca41c848d967ffda72e2d06f382881d8eed..5746cdabc2161d094dcde6ab24b864d69db9ee8e 100644 (file)
@@ -793,6 +793,8 @@ string SensSpecCommand::preProcessList(){
                        //make a new list vector
                        ListVector newList;
                        newList.setLabel(list.getLabel());
+            vector<string> binLabels = list.getLabels();
+            vector<string> newLabels;
                        
                        //for each bin
                        for (int i = 0; i < list.getNumBins(); i++) {
@@ -812,13 +814,16 @@ string SensSpecCommand::preProcessList(){
                                //if there are names in this bin add to new list
                                if (newNames != "") { 
                                        newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
-                                       newList.push_back(newNames);    
+                                       newList.push_back(newNames);
+                    newLabels.push_back(binLabels[i]);
                                }
                        }
             
                        //print new listvector
                        if (newList.getNumBins() != 0) {
                                wroteSomething = true;
+                newList.setLabels(newLabels);
+                if (!m->printedListHeaders) { newList.printHeaders(out); }
                                newList.print(out);
                        }
                        
index 8e7a4395fbf20009d135502fde3774b2592a9440..8c712c3ab8bb08c8b8e9282e59e97b03c77d29fc 100644 (file)
@@ -9,6 +9,8 @@
 
 #include "seqnoise.h"
 #include "sequence.hpp"
+#include "listvector.hpp"
+#include "inputdata.h"
 
 #define MIN_DELTA 1.0e-6
 #define MIN_ITER 20
@@ -138,91 +140,93 @@ int seqNoise::getListData(string listFileName, double cutOff, vector<int>& otuDa
                
                ifstream listFile;
                m->openInputFile(listFileName, listFile);
-               double threshold;
-               int numOTUs;
-               string line = "";
-               bool adjustCutoff = true;
-               
-               if(listFile.peek() == 'u'){     m->getline(listFile);   }
                
-               while(listFile){
-                       listFile >> threshold;
-                       
-                       if(threshold < cutOff){
-                               line = m->getline(listFile); m->gobble(listFile);
-                       }
-                       else{
-                               adjustCutoff = false;
-                               listFile >> numOTUs;
-                               otuFreq.resize(numOTUs, 0);
-                               
-                               for(int i=0;i<numOTUs;i++){
-                                       
-                                       if (m->control_pressed) { return 0; }
-                                       
-                                       string otu;
-                                       listFile >> otu;
-                                       
-                                       int count = 0;
-                                       
-                                       string number = "";
-                                       
-                                       for(int j=0;j<otu.size();j++){
-                                               if(otu[j] != ','){
-                                                       number += otu[j];
-                                               }
-                                               else{
-                                                       int index = atoi(number.c_str());
-                                                       otuData[index] = i;
-                                                       count++;
-                                                       number = "";
-                                               }
-                                       }
-                                       
-                                       int index = atoi(number.c_str());
-                                       otuData[index] = i;
-                                       count++;
-                                       
-                                       otuFreq[i] = count;
-                               }
-                               
-                               otuBySeqLookUp.resize(numOTUs);
-                               
-                               int numSeqs = otuData.size();
-                               
-                               for(int i=0;i<numSeqs;i++){
-                                       if (m->control_pressed) { return 0; }
-                                       otuBySeqLookUp[otuData[i]].push_back(i);
-                               }
-                               for(int i=0;i<numOTUs;i++){
-                                       if (m->control_pressed) { return 0; }
-                                       for(int j=otuBySeqLookUp[i].size();j<numSeqs;j++){
-                                               otuBySeqLookUp[i].push_back(0);
-                                       }
-                               }
-                               
-                               break;
-                       }
+               bool adjustCutoff = true;
+        string lastLabel = "";
+        
+               while(!listFile.eof()){
+            
+            ListVector list(listFile); m->gobble(listFile); //10/18/13 - change to reading with listvector to accomodate changes to the listfiel format. ie. adding header labels.
+            
+            string thisLabel = list.getLabel();
+            lastLabel = thisLabel;
+            
+            if (thisLabel == "unique") {} //skip to next label in listfile
+            else {
+                double threshold;
+                m->mothurConvert(thisLabel, threshold);
+                
+                if(threshold < cutOff){} //skip to next label in listfile
+                else{
+                    adjustCutoff = false;
+                    int numOTUs = list.getNumBins();
+                    otuFreq.resize(numOTUs, 0);
+                    
+                    for(int i=0;i<numOTUs;i++){
+                        
+                        if (m->control_pressed) { return 0; }
+                        
+                        string otu = list.get(i);
+                        int count = 0;
+                        string number = "";
+                        
+                        for(int j=0;j<otu.size();j++){
+                            if(otu[j] != ','){
+                                number += otu[j];
+                            }
+                            else{
+                                int index = atoi(number.c_str());
+                                otuData[index] = i;
+                                count++;
+                                number = "";
+                            }
+                        }
+                        
+                        int index = atoi(number.c_str());
+                        otuData[index] = i;
+                        count++;
+                        
+                        otuFreq[i] = count;
+                    }
+                    
+                    otuBySeqLookUp.resize(numOTUs);
+                    
+                    int numSeqs = otuData.size();
+                    
+                    for(int i=0;i<numSeqs;i++){
+                        if (m->control_pressed) { return 0; }
+                        otuBySeqLookUp[otuData[i]].push_back(i);
+                    }
+                    for(int i=0;i<numOTUs;i++){
+                        if (m->control_pressed) { return 0; }
+                        for(int j=otuBySeqLookUp[i].size();j<numSeqs;j++){
+                            otuBySeqLookUp[i].push_back(0);
+                        }
+                    }
+                    
+                    break;
+                }
+            }
                }
                
                listFile.close();
                
                //the listfile does not contain a threshold greater than the cutoff so use highest value
                if (adjustCutoff) {
-                       istringstream iss (line,istringstream::in);
-                       
-                       iss >> numOTUs;
-                       otuFreq.resize(numOTUs, 0);
+            
+            InputData input(listFileName, "list");
+            ListVector* list = input.getListVector(lastLabel);
+            
+            int numOTUs = list->getNumBins();
+            otuFreq.resize(numOTUs, 0);
                        
                        for(int i=0;i<numOTUs;i++){
                                
                                if (m->control_pressed) { return 0; }
                                
-                               string otu;
-                               iss >> otu;
+                               string otu = list->get(i);
                                
                                int count = 0;
-                               
                                string number = "";
                                
                                for(int j=0;j<otu.size();j++){
@@ -259,6 +263,7 @@ int seqNoise::getListData(string listFileName, double cutOff, vector<int>& otuDa
                                }
                        }
                        
+            delete list;
                }
                
                return 0;
index d6073d75da3ed630b27b626bbba8615d79b58858..efeb88ce65183135c2a0aaef36d8d3a1c8f432cc 100644 (file)
@@ -76,7 +76,8 @@ Sequence::Sequence(istringstream& fastaString){
                                }
                        }
                        
-                       while (!fastaString.eof())      {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
+                       //while (!fastaString.eof())    {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
+            comment = getCommentString(fastaString);
                        
                        int numAmbig = 0;
                        sequence = getSequenceString(fastaString, numAmbig);
@@ -120,7 +121,8 @@ Sequence::Sequence(istringstream& fastaString, string JustUnaligned){
                                }
                        }
                        
-                       while (!fastaString.eof())      {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
+                       //while (!fastaString.eof())    {       char c = fastaString.get();  if (c == 10 || c == 13){ break;    }       } // get rest of line if there's any crap there
+            comment = getCommentString(fastaString);
                        
                        int numAmbig = 0;
                        sequence = getSequenceString(fastaString, numAmbig);
@@ -166,8 +168,8 @@ Sequence::Sequence(ifstream& fastaFile){
                                }
                        }
                        
-                       //read real sequence
-                       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){  break;      }       } // get rest of line if there's any crap there
+                       //while (!fastaFile.eof())      {       char c = fastaFile.get(); if (c == 10 || c == 13){  break;      }       } // get rest of line if there's any crap there
+            comment = getCommentString(fastaFile);
                        
                        int numAmbig = 0;
                        sequence = getSequenceString(fastaFile, numAmbig);
@@ -216,9 +218,11 @@ Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
                        //read info after sequence name
                        while (!fastaFile.eof())        {       
                 char c = fastaFile.get(); 
-                if (c == 10 || c == 13 || c == -1){  break;    }
+                if (c == 10 || c == 13 || c == -1){   break;   }
                 extraInfo += c;
-            } 
+            }
+            
+            comment = extraInfo;
                        
                        int numAmbig = 0;
                        sequence = getSequenceString(fastaFile, numAmbig);
@@ -261,8 +265,8 @@ Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
                                }
                        }
                        
-                       //read real sequence
-                       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){       break; }       } // get rest of line if there's any crap there
+                       //while (!fastaFile.eof())      {       char c = fastaFile.get(); if (c == 10 || c == 13){       break; }       } // get rest of line if there's any crap there
+            comment = getCommentString(fastaFile);
                        
                        int numAmbig = 0;
                        sequence = getSequenceString(fastaFile, numAmbig);
@@ -360,17 +364,19 @@ string Sequence::getSequenceString(ifstream& fastaFile, int& numAmbig) {
 string Sequence::getCommentString(ifstream& fastaFile) {
        try {
                char letter;
-               string sequence = "";
+               string temp = "";
                
                while(fastaFile){
                        letter=fastaFile.get();
-                       if((letter == '\r') || (letter == '\n')){  
+                       if((letter == '\r') || (letter == '\n') || letter == -1){
                                m->gobble(fastaFile);  //in case its a \r\n situation
                                break;
-                       }
+                       }else {
+                temp += letter;
+            }
                }
                
-               return sequence;
+               return temp;
        }
        catch(exception& e) {
                m->errorOut(e, "Sequence", "getCommentString");
@@ -414,17 +420,19 @@ string Sequence::getSequenceString(istringstream& fastaFile, int& numAmbig) {
 string Sequence::getCommentString(istringstream& fastaFile) {
        try {
                char letter;
-               string sequence = "";
+               string temp = "";
                
                while(fastaFile){
                        letter=fastaFile.get();
-                       if((letter == '\r') || (letter == '\n')){  
+                       if((letter == '\r') || (letter == '\n') || letter == -1){  
                                m->gobble(fastaFile);  //in case its a \r\n situation
                                break;
-                       }
+                       }else {
+                temp += letter;
+            }
                }
                
-               return sequence;
+               return temp;
        }
        catch(exception& e) {
                m->errorOut(e, "Sequence", "getCommentString");
@@ -439,6 +447,7 @@ void Sequence::initialize(){
        unaligned = "";
        aligned = "";
        pairwise = "";
+    comment = "";
        
        numBases = 0;
        alignmentLength = 0;
@@ -581,7 +590,7 @@ int Sequence::getNumNs(){
 
 void Sequence::printSequence(ostream& out){
 
-       out << ">" << name << endl;
+       out << ">" << name << comment << endl;
        if(isAligned){
                out << aligned << endl;
        }
index ad3a4b40c238c4260d36f7afea027b903539c8a0..c49899411f2dd7919d96ce9b9840c30b2b0dfcac 100644 (file)
@@ -76,6 +76,7 @@ private:
        string unaligned;
        string aligned;
        string pairwise;
+    string comment;
        int numBases;
        int alignmentLength;
        bool isAligned;
index 9d59696ba2f47157dd059c066af108d6bb76e731..98ae223818bd67b290dfbdf0450fa2a33b9b49b2 100644 (file)
@@ -417,10 +417,9 @@ SetCurrentCommand::SetCurrentCommand(string option)  {
                        else if (summaryfile == "not found") {  summaryfile = "";  }
                        if (summaryfile != "") { m->setSummaryFile(summaryfile); }
 
-                       
-                       processors = validParameter.validFile(parameters, "processors", false);
-                       if (processors == "not found") {  processors = "1";  }  
-                       if (processors != "") { m->setProcessors(processors); }
+                       string temp = validParameter.validFile(parameters, "processors", false);
+                       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
                        
                        clearTypes = validParameter.validFile(parameters, "clear", false);                      
                        if (clearTypes == "not found") { clearTypes = ""; }
old mode 100644 (file)
new mode 100755 (executable)
index 399f253..03bcebb
-/*
- *  sffinfocommand.cpp
- *  Mothur
- *
- *  Created by westcott on 7/7/10.
- *  Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#include "sffinfocommand.h"
-#include "endiannessmacros.h"
-#include "trimoligos.h"
-#include "sequence.hpp"
-#include "qualityscores.h"
-
-//**********************************************************************************************************************
-vector<string> SffInfoCommand::setParameters(){        
-       try {           
-               CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);
-        CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);
-               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
-               CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);
-               CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);
-               CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
-               CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
-               CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);
-        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
-               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
-        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
-               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
-        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-               
-               vector<string> myArray;
-               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "setParameters");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-string SffInfoCommand::getHelpString(){        
-       try {
-               string helpString = "";
-               helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
-               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";
-               helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
-               helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
-               helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
-        helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";
-        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
-               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
-               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
-        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
-               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
-               helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";
-               helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
-               helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
-               helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
-               helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
-               helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
-               helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";
-               return helpString;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "getHelpString");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-string SffInfoCommand::getOutputPattern(string type) {
-    try {
-        string pattern = "";
-        
-        if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }
-        else if (type == "flow")    {   pattern =  "[filename],flow";   }
-        else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }
-        else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }
-        else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }
-        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
-        
-        return pattern;
-    }
-    catch(exception& e) {
-        m->errorOut(e, "SffInfoCommand", "getOutputPattern");
-        exit(1);
-    }
-}
-//**********************************************************************************************************************
-SffInfoCommand::SffInfoCommand(){      
-       try {
-               abort = true; calledHelp = true; 
-               setParameters();
-               vector<string> tempOutNames;
-               outputTypes["fasta"] = tempOutNames;
-               outputTypes["flow"] = tempOutNames;
-               outputTypes["sfftxt"] = tempOutNames;
-               outputTypes["qfile"] = tempOutNames;
-        outputTypes["sff"] = tempOutNames;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-
-SffInfoCommand::SffInfoCommand(string option)  {
-       try {
-               abort = false; calledHelp = false;   
-               hasAccnos = false; hasOligos = false;
-        split = 1;
-               
-               //allow user to run help
-               if(option == "help") { help(); abort = true; calledHelp = true; }
-               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-               
-               else {
-                       //valid paramters for this command
-                       vector<string> myArray = setParameters();
-                       
-                       OptionParser parser(option);
-                       map<string, string> parameters = parser.getParameters();
-                       
-                       ValidParameters validParameter;
-                       //check to make sure all parameters are valid for command
-                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
-                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
-                       }
-                       
-                       //initialize outputTypes
-                       vector<string> tempOutNames;
-                       outputTypes["fasta"] = tempOutNames;
-                       outputTypes["flow"] = tempOutNames;
-                       outputTypes["sfftxt"] = tempOutNames;
-                       outputTypes["qfile"] = tempOutNames;
-            outputTypes["sff"] = tempOutNames;
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
-                       
-                       //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
-
-                       sffFilename = validParameter.validFile(parameters, "sff", false);
-                       if (sffFilename == "not found") { sffFilename = "";  }
-                       else { 
-                               m->splitAtDash(sffFilename, filenames);
-                               
-                               //go through files and make sure they are good, if not, then disregard them
-                               for (int i = 0; i < filenames.size(); i++) {
-                                       bool ignore = false;
-                                       if (filenames[i] == "current") { 
-                                               filenames[i] = m->getSFFFile(); 
-                                               if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }
-                                               else {  
-                                                       m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
-                                                       //erase from file list
-                                                       filenames.erase(filenames.begin()+i);
-                                                       i--;
-                                               }
-                                       }
-                                       
-                                       if (!ignore) {
-                                               if (inputDir != "") {
-                                                       string path = m->hasPath(filenames[i]);
-                                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                                       if (path == "") {       filenames[i] = inputDir + filenames[i];         }
-                                               }
-               
-                                               ifstream in;
-                                               int ableToOpen = m->openInputFile(filenames[i], in, "noerror");
-                                       
-                                               //if you can't open it, try default location
-                                               if (ableToOpen == 1) {
-                                                       if (m->getDefaultPath() != "") { //default path is set
-                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);
-                                                               m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                               ifstream in2;
-                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                               in2.close();
-                                                               filenames[i] = tryPath;
-                                                       }
-                                               }
-                                               
-                                               //if you can't open it, try default location
-                                               if (ableToOpen == 1) {
-                                                       if (m->getOutputDir() != "") { //default path is set
-                                                               string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);
-                                                               m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
-                                                               ifstream in2;
-                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                               in2.close();
-                                                               filenames[i] = tryPath;
-                                                       }
-                                               }
-                                               
-                                               in.close();
-                                               
-                                               if (ableToOpen == 1) { 
-                                                       m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();
-                                                       //erase from file list
-                                                       filenames.erase(filenames.begin()+i);
-                                                       i--;
-                                               }else { m->setSFFFile(filenames[i]); }
-                                       }
-                               }
-                               
-                               //make sure there is at least one valid file left
-                               if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
-                       }
-                       
-                       accnosName = validParameter.validFile(parameters, "accnos", false);
-                       if (accnosName == "not found") { accnosName = "";  }
-                       else { 
-                               hasAccnos = true;
-                               m->splitAtDash(accnosName, accnosFileNames);
-                               
-                               //go through files and make sure they are good, if not, then disregard them
-                               for (int i = 0; i < accnosFileNames.size(); i++) {
-                                       bool ignore = false;
-                                       if (accnosFileNames[i] == "current") { 
-                                               accnosFileNames[i] = m->getAccnosFile(); 
-                                               if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
-                                               else {  
-                                                       m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
-                                                       //erase from file list
-                                                       accnosFileNames.erase(accnosFileNames.begin()+i);
-                                                       i--;
-                                               }
-                                       }
-                                       
-                                       if (!ignore) {
-                                       
-                                               if (inputDir != "") {
-                                                       string path = m->hasPath(accnosFileNames[i]);
-                                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                                       if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }
-                                               }
-               
-                                               ifstream in;
-                                               int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");
-                                       
-                                               //if you can't open it, try default location
-                                               if (ableToOpen == 1) {
-                                                       if (m->getDefaultPath() != "") { //default path is set
-                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);
-                                                               m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                               ifstream in2;
-                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                               in2.close();
-                                                               accnosFileNames[i] = tryPath;
-                                                       }
-                                               }
-                                               //if you can't open it, try default location
-                                               if (ableToOpen == 1) {
-                                                       if (m->getOutputDir() != "") { //default path is set
-                                                               string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);
-                                                               m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
-                                                               ifstream in2;
-                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                               in2.close();
-                                                               accnosFileNames[i] = tryPath;
-                                                       }
-                                               }
-                                               in.close();
-                                               
-                                               if (ableToOpen == 1) { 
-                                                       m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
-                                                       //erase from file list
-                                                       accnosFileNames.erase(accnosFileNames.begin()+i);
-                                                       i--;
-                                               }
-                                       }
-                               }
-                               
-                               //make sure there is at least one valid file left
-                               if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
-                       }
-            
-            oligosfile = validParameter.validFile(parameters, "oligos", false);
-                       if (oligosfile == "not found") { oligosfile = "";  }
-                       else { 
-                               hasOligos = true;
-                               m->splitAtDash(oligosfile, oligosFileNames);
-                               
-                               //go through files and make sure they are good, if not, then disregard them
-                               for (int i = 0; i < oligosFileNames.size(); i++) {
-                                       bool ignore = false;
-                                       if (oligosFileNames[i] == "current") { 
-                                               oligosFileNames[i] = m->getOligosFile(); 
-                                               if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }
-                                               else {  
-                                                       m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
-                                                       //erase from file list
-                                                       oligosFileNames.erase(oligosFileNames.begin()+i);
-                                                       i--;
-                                               }
-                                       }
-                                       
-                                       if (!ignore) {
-                        
-                                               if (inputDir != "") {
-                                                       string path = m->hasPath(oligosFileNames[i]);
-                                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                                       if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }
-                                               }
-                        
-                                               ifstream in;
-                                               int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");
-                        
-                                               //if you can't open it, try default location
-                                               if (ableToOpen == 1) {
-                                                       if (m->getDefaultPath() != "") { //default path is set
-                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);
-                                                               m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                                               ifstream in2;
-                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                               in2.close();
-                                                               oligosFileNames[i] = tryPath;
-                                                       }
-                                               }
-                                               //if you can't open it, try default location
-                                               if (ableToOpen == 1) {
-                                                       if (m->getOutputDir() != "") { //default path is set
-                                                               string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);
-                                                               m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
-                                                               ifstream in2;
-                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                                               in2.close();
-                                                               oligosFileNames[i] = tryPath;
-                                                       }
-                                               }
-                                               in.close();
-                                               
-                                               if (ableToOpen == 1) { 
-                                                       m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
-                                                       //erase from file list
-                                                       oligosFileNames.erase(oligosFileNames.begin()+i);
-                                                       i--;
-                                               }
-                                       }
-                               }
-                               
-                               //make sure there is at least one valid file left
-                               if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }
-                       }
-
-                       if (hasOligos) {
-                split = 2;
-                               if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }
-                       }
-            
-                       if (hasAccnos) {
-                               if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }
-                       }
-                       
-                       string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }
-                       qual = m->isTrue(temp); 
-                       
-                       temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }
-                       fasta = m->isTrue(temp); 
-                       
-                       temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }
-                       flow = m->isTrue(temp); 
-                       
-                       temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
-                       trim = m->isTrue(temp); 
-            
-            temp = validParameter.validFile(parameters, "bdiffs", false);              if (temp == "not found") { temp = "0"; }
-                       m->mothurConvert(temp, bdiffs);
-                       
-                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
-                       m->mothurConvert(temp, pdiffs);
-            
-            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
-                       m->mothurConvert(temp, ldiffs);
-            
-            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
-                       m->mothurConvert(temp, sdiffs);
-                       
-                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
-                       m->mothurConvert(temp, tdiffs);
-                       
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
-            
-                       temp = validParameter.validFile(parameters, "sfftxt", false);                           
-                       if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
-                       else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
-                       else {
-                               //you are a filename
-                               if (inputDir != "") {
-                                       map<string,string>::iterator it = parameters.find("sfftxt");
-                                       //user has given a template file
-                                       if(it != parameters.end()){ 
-                                               string path = m->hasPath(it->second);
-                                               //if the user has not given a path then, add inputdir. else leave path alone.
-                                               if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
-                                       }
-                               }
-                               
-                               sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
-                               if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
-                               else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
-                       }
-                       
-                       if ((sfftxtFilename == "") && (filenames.size() == 0)) {  
-                               //if there is a current sff file, use it
-                               string filename = m->getSFFFile(); 
-                               if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }
-                       }
-            
-            
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::execute(){
-       try {
-               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-               
-               for (int s = 0; s < filenames.size(); s++) {
-                       
-                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
-                       
-                       int start = time(NULL);
-                       
-            filenames[s] = m->getFullPathName(filenames[s]);
-                       m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
-                       
-                       string accnos = "";
-                       if (hasAccnos) { accnos = accnosFileNames[s]; }
-            
-            string oligos = "";
-            if (hasOligos) { oligos = oligosFileNames[s]; }
-                       
-                       int numReads = extractSffInfo(filenames[s], accnos, oligos);
-
-                       m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
-               }
-               
-               if (sfftxtFilename != "") {  parseSffTxt(); }
-               
-               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
-               
-               //set fasta file as new current fastafile
-               string current = "";
-               itTypes = outputTypes.find("fasta");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
-               }
-               
-               itTypes = outputTypes.find("qfile");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
-               }
-               
-               itTypes = outputTypes.find("flow");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }
-               }
-               
-               //report output filenames
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
-
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "execute");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
-       try {
-               currentFileName = input;
-               if (outputDir == "") {  outputDir += m->hasPath(input); }
-               
-               if (accnos != "")       {  readAccnosFile(accnos);  }
-               else                            {       seqNames.clear();               }
-         
-        if (oligos != "")   {   readOligos(oligos);  split = 2;   }
-
-               ofstream outSfftxt, outFasta, outQual, outFlow;
-               string outFastaFileName, outQualFileName;
-        string rootName = outputDir + m->getRootName(m->getSimpleName(input));
-        if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
-        
-        map<string, string> variables; 
-               variables["[filename]"] = rootName;
-               string sfftxtFileName = getOutputFileName("sfftxt",variables);
-               string outFlowFileName = getOutputFileName("flow",variables);
-               if (!trim) { variables["[tag]"] = "raw"; }
-               outFastaFileName = getOutputFileName("fasta",variables);
-        outQualFileName = getOutputFileName("qfile",variables);
-        
-               if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
-               if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
-               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
-               if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
-               
-               ifstream in;
-               in.open(input.c_str(), ios::binary);
-               
-               CommonHeader header; 
-               readCommonHeader(in, header);
-       
-               int count = 0;
-               mycount = 0;
-               
-               //check magic number and version
-               if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
-               if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }
-       
-               //print common header
-               if (sfftxt) {   printCommonHeader(outSfftxt, header);           }
-               if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }
-                       
-               //read through the sff file
-               while (!in.eof()) {
-                       
-                       bool print = true;
-                                               
-                       //read data
-                       seqRead read;  Header readheader;
-                       readSeqData(in, read, header.numFlowsPerRead, readheader);
-            bool okay = sanityCheck(readheader, read);
-            if (!okay) { break; }
-            
-                       //if you have provided an accosfile and this seq is not in it, then dont print
-                       if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }
-                       
-                       //print 
-                       if (print) {
-                               if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }
-                               if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }
-                               if (qual)       {       printQualSeqData(outQual, read, readheader);    }
-                               if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }
-                       }
-                       
-                       count++;
-                       mycount++;
-        
-                       //report progress
-                       if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
-               
-                       if (m->control_pressed) { count = 0; break;   }
-                       
-                       if (count >= header.numReads) { break; }
-            //if (count >= 100) { break; }
-               }
-               
-               //report progress
-               if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
-               
-               in.close();
-               
-               if (sfftxt) {  outSfftxt.close();       }
-               if (fasta)      {  outFasta.close();    }
-               if (qual)       {  outQual.close();             }
-               if (flow)       {  outFlow.close();             }
-               
-        if (split > 1) {
-            //create new common headers for each file with the correct number of reads
-            adjustCommonHeader(header);
-            
-                       map<string, string>::iterator it;
-                       set<string> namesToRemove;
-                       for(int i=0;i<filehandles.size();i++){
-                               for(int j=0;j<filehandles[0].size();j++){
-                                       if (filehandles[i][j] != "") {
-                                               if (namesToRemove.count(filehandles[i][j]) == 0) {
-                                                       if(m->isBlank(filehandles[i][j])){
-                                                               m->mothurRemove(filehandles[i][j]);
-                                m->mothurRemove(filehandlesHeaders[i][j]);
-                                                               namesToRemove.insert(filehandles[i][j]);
-                            }
-                                               }
-                                       }
-                               }
-                       }
-            
-            //append new header to reads
-            for (int i = 0; i < filehandles.size(); i++) {
-                for (int j = 0; j < filehandles[i].size(); j++) {
-                    m->appendFiles(filehandles[i][j], filehandlesHeaders[i][j]);
-                    m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);
-                    m->mothurRemove(filehandlesHeaders[i][j]);
-                    if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }
-                }
-            }
-                       
-                       //remove names for outputFileNames, just cleans up the output
-                       for(int i = 0; i < outputNames.size(); i++) { 
-                if (namesToRemove.count(outputNames[i]) != 0) { 
-                    outputNames.erase(outputNames.begin()+i);
-                    i--;
-                } 
-            }
-            
-            if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }
-            else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }
-        }
-        
-               return count;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "extractSffInfo");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){
-       try {
-        
-               if (!in.eof()) {
-
-                       //read magic number
-                       char buffer[4];
-                       in.read(buffer, 4);
-                       header.magicNumber = be_int4(*(unsigned int *)(&buffer));
-            
-                       //read version
-                       char buffer9[4];
-                       in.read(buffer9, 4);
-                       header.version = "";
-                       for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }
-    
-                       //read offset
-                       char buffer2 [8];
-                       in.read(buffer2, 8);
-                       header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));
-                       
-                       //read index length
-                       char buffer3 [4];
-                       in.read(buffer3, 4);
-                       header.indexLength =  be_int4(*(unsigned int *)(&buffer3));
-            
-                       //read num reads
-                       char buffer4 [4];
-                       in.read(buffer4, 4);
-                       header.numReads =  be_int4(*(unsigned int *)(&buffer4));
-            
-            if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }
-                               
-                       //read header length
-                       char buffer5 [2];
-                       in.read(buffer5, 2);
-                       header.headerLength =  be_int2(*(unsigned short *)(&buffer5));
-                                       
-                       //read key length
-                       char buffer6 [2];
-                       in.read(buffer6, 2);
-                       header.keyLength = be_int2(*(unsigned short *)(&buffer6));
-                       
-                       //read number of flow reads
-                       char buffer7 [2];
-                       in.read(buffer7, 2);
-                       header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));
-                               
-                       //read format code
-                       char buffer8 [1];
-                       in.read(buffer8, 1);
-                       header.flogramFormatCode = (int)(buffer8[0]);
-                       
-                       //read flow chars
-                       char* tempBuffer = new char[header.numFlowsPerRead];
-                       in.read(&(*tempBuffer), header.numFlowsPerRead); 
-                       header.flowChars = tempBuffer;
-                       if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }
-                       delete[] tempBuffer;
-                       
-                       //read key
-                       char* tempBuffer2 = new char[header.keyLength];
-                       in.read(&(*tempBuffer2), header.keyLength);
-                       header.keySequence = tempBuffer2;
-                       if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }
-                       delete[] tempBuffer2;
-                       
-                       /* Pad to 8 chars */
-                       unsigned long long spotInFile = in.tellg();
-                       unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
-                       in.seekg(spot);
-            
-        }else{
-                       m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
-               }
-        
-               return 0;
-        
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "readCommonHeader");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::adjustCommonHeader(CommonHeader header){
-       try {
-
-        char* mybuffer = new char[4];
-        ifstream in;
-        in.open(currentFileName.c_str(), ios::binary);
-        
-        //magic number
-        in.read(mybuffer,4);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-        
-        //version
-        mybuffer = new char[4];
-        in.read(mybuffer,4);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-        
-        //offset
-        mybuffer = new char[8];
-        in.read(mybuffer,8);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                unsigned long long offset = 0;
-                char* thisbuffer = new char[8];
-                thisbuffer[0] = (offset >> 56) & 0xFF;
-                thisbuffer[1] = (offset >> 48) & 0xFF;
-                thisbuffer[2] = (offset >> 40) & 0xFF;
-                thisbuffer[3] = (offset >> 32) & 0xFF;
-                thisbuffer[4] = (offset >> 24) & 0xFF;
-                thisbuffer[5] = (offset >> 16) & 0xFF;
-                thisbuffer[6] = (offset >> 8) & 0xFF;
-                thisbuffer[7] = offset & 0xFF;
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(thisbuffer, 8);
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-            
-                       
-        //read index length
-               mybuffer = new char[4];
-        in.read(mybuffer,4);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                unsigned int offset = 0;
-                char* thisbuffer = new char[4];
-                thisbuffer[0] = (offset >> 24) & 0xFF;
-                thisbuffer[1] = (offset >> 16) & 0xFF;
-                thisbuffer[2] = (offset >> 8) & 0xFF;
-                thisbuffer[3] = offset & 0xFF;
-                out.write(thisbuffer, 4);
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-               
-        //change num reads
-        mybuffer = new char[4];
-        in.read(mybuffer,4);
-        delete[] mybuffer;
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                //convert number of reads to 4 byte char*
-                char* thisbuffer = new char[4];
-                if ((m->findEdianness()) == "BIG_ENDIAN") {
-                    thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;
-                    thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;
-                    thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;
-                    thisbuffer[3] = numSplitReads[i][j] & 0xFF;
-                }else {
-                    thisbuffer[0] = numSplitReads[i][j] & 0xFF;
-                    thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;
-                    thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;
-                    thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;
-                 }
-                out.write(thisbuffer, 4);
-                out.close();
-                delete[] thisbuffer;
-            }
-        }
-            
-        //read header length
-        mybuffer = new char[2];
-        in.read(mybuffer,2);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-            
-        //read key length
-        mybuffer = new char[2];
-        in.read(mybuffer,2);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-                       
-        //read number of flow reads
-        mybuffer = new char[2];
-        in.read(mybuffer,2);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-            
-        //read format code
-        mybuffer = new char[1];
-        in.read(mybuffer,1);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-                       
-        //read flow chars
-        mybuffer = new char[header.numFlowsPerRead];
-        in.read(mybuffer,header.numFlowsPerRead);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-                       
-        //read key
-        mybuffer = new char[header.keyLength];
-        in.read(mybuffer,header.keyLength);
-        for (int i = 0; i < filehandlesHeaders.size(); i++) {  
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, in.gcount()); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-        
-                       
-        /* Pad to 8 chars */
-        unsigned long long spotInFile = in.tellg();
-        unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts
-        in.seekg(spot);
-        
-        mybuffer = new char[spot-spotInFile];
-        for (int i = 0; i < filehandlesHeaders.size(); i++) { 
-            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {
-                ofstream out;
-                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);
-                out.write(mybuffer, spot-spotInFile); 
-                out.close();
-            }
-        }
-        delete[] mybuffer;
-        in.close();
-               return 0;
-        
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){
-       try {
-        unsigned long long startSpotInFile = in.tellg();
-               if (!in.eof()) {
-            
-            /*****************************************/
-            //read header
-            
-            //read header length
-                       char buffer [2];
-                       in.read(buffer, 2); 
-                       header.headerLength = be_int2(*(unsigned short *)(&buffer));
-            
-                       //read name length
-                       char buffer2 [2];
-                       in.read(buffer2, 2);
-                       header.nameLength = be_int2(*(unsigned short *)(&buffer2));
-            
-                       //read num bases
-                       char buffer3 [4];
-                       in.read(buffer3, 4);
-                       header.numBases =  be_int4(*(unsigned int *)(&buffer3));
-            
-                       
-                       //read clip qual left
-                       char buffer4 [2];
-                       in.read(buffer4, 2);
-                       header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));
-                       header.clipQualLeft = 5;
-            
-                       
-                       //read clip qual right
-                       char buffer5 [2];
-                       in.read(buffer5, 2);
-                       header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));
-           
-            
-                       //read clipAdapterLeft
-                       char buffer6 [2];
-                       in.read(buffer6, 2);
-                       header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));
-            
-            
-                       //read clipAdapterRight
-                       char buffer7 [2];
-                       in.read(buffer7, 2);
-                       header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));
-            
-            
-                       //read name
-                       char* tempBuffer = new char[header.nameLength];
-                       in.read(&(*tempBuffer), header.nameLength);
-                       header.name = tempBuffer;
-                       if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }
-            
-                       delete[] tempBuffer;
-                       
-                       //extract info from name
-                       decodeName(header.timestamp, header.region, header.xy, header.name);
-                       
-                       /* Pad to 8 chars */
-                       unsigned long long spotInFile = in.tellg();
-                       unsigned long long spot = (spotInFile + 7)& ~7;
-                       in.seekg(spot);
-
-            /*****************************************/
-            //sequence read 
-            
-                       //read flowgram
-                       read.flowgram.resize(numFlowReads);
-                       for (int i = 0; i < numFlowReads; i++) {  
-                               char buffer [2];
-                               in.read(buffer, 2);
-                               read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));
-                       }
-            
-                       //read flowIndex
-                       read.flowIndex.resize(header.numBases);
-                       for (int i = 0; i < header.numBases; i++) {  
-                               char temp[1];
-                               in.read(temp, 1);
-                               read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));
-                       }
-       
-                       //read bases
-                       char* tempBuffer6 = new char[header.numBases];
-                       in.read(&(*tempBuffer6), header.numBases);
-                       read.bases = tempBuffer6;
-                       if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }
-                       delete[] tempBuffer6;
-
-                       //read qual scores
-                       read.qualScores.resize(header.numBases);
-                       for (int i = 0; i < header.numBases; i++) {  
-                               char temp[1];
-                               in.read(temp, 1);
-                               read.qualScores[i] = be_int1(*(unsigned char *)(&temp));
-                       }
-       
-                       /* Pad to 8 chars */
-                       spotInFile = in.tellg();
-                       spot = (spotInFile + 7)& ~7;
-                       in.seekg(spot);
-            
-            if (split > 1) {
-                char * mybuffer;
-                mybuffer = new char [spot-startSpotInFile];
-                ifstream in2;
-                in2.open(currentFileName.c_str(), ios::binary);
-                in2.seekg(startSpotInFile);
-                in2.read(mybuffer,spot-startSpotInFile);
-                in2.close();
-                
-                int barcodeIndex, primerIndex;
-                int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);
-                                
-                if(trashCodeLength == 0){
-                    //cout << header.name << " length = " << spot << '\t' << startSpotInFile << '\t' << in2.gcount() << endl;
-                    
-                    ofstream out;
-                    m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);
-                    out.write(mybuffer, in2.gcount()); 
-                    out.close();
-                    numSplitReads[barcodeIndex][primerIndex]++;
-                               }
-                               else{
-                                       ofstream out;
-                    m->openOutputFileBinaryAppend(noMatchFile, out);
-                    out.write(mybuffer, in2.gcount()); 
-                    out.close();
-                               }
-                               delete[] mybuffer;
-                       }
-               }else{
-                       m->mothurOut("Error reading."); m->mothurOutEndLine();
-               }
-
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "readSeqData");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {
-       try {
-        //find group read belongs to
-        TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
-        
-        int success = 1;
-        string trashCode = "";
-        int currentSeqsDiffs = 0;
-        
-        string seq = read.bases;
-        
-        if (trim) {
-            if(header.clipQualRight < header.clipQualLeft){
-                if (header.clipQualRight == 0) { //don't trim right
-                    seq = seq.substr(header.clipQualLeft-1);
-                }else {
-                    seq = "NNNN";
-                }
-            }
-            else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
-                seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
-            }
-            else {
-                seq = seq.substr(header.clipQualLeft-1);
-            }
-        }else{
-            //if you wanted the sfftxt then you already converted the bases to the right case
-            if (!sfftxt) {
-                int endValue = header.clipQualRight;
-                //make the bases you want to clip lowercase and the bases you want to keep upper case
-                if(endValue == 0){     endValue = seq.length();        }
-                for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
-                for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }
-                for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
-            }
-        }
-        
-        Sequence currSeq(header.name, seq);
-        QualityScores currQual;
-        
-        if(numLinkers != 0){
-            success = trimOligos.stripLinker(currSeq, currQual);
-            if(success > ldiffs)               {       trashCode += 'k';       }
-            else{ currentSeqsDiffs += success;  }
-            
-        }
-        
-        if(barcodes.size() != 0){
-            success = trimOligos.stripBarcode(currSeq, currQual, barcode);
-            if(success > bdiffs)               {       trashCode += 'b';       }
-            else{ currentSeqsDiffs += success;  }
-        }
-        
-        if(numSpacers != 0){
-            success = trimOligos.stripSpacer(currSeq, currQual);
-            if(success > sdiffs)               {       trashCode += 's';       }
-            else{ currentSeqsDiffs += success;  }
-            
-        }
-        
-        if(numFPrimers != 0){
-            success = trimOligos.stripForward(currSeq, currQual, primer, true);
-            if(success > pdiffs)               {       trashCode += 'f';       }
-            else{ currentSeqsDiffs += success;  }
-        }
-        
-        if (currentSeqsDiffs > tdiffs) {       trashCode += 't';   }
-        
-        if(revPrimer.size() != 0){
-            success = trimOligos.stripReverse(currSeq, currQual);
-            if(!success)                               {       trashCode += 'r';       }
-        }
-
-        
-        return trashCode.length();
-    }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "findGroup");
-               exit(1);
-       }
-}     
-//**********************************************************************************************************************
-int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
-       try {
-               
-               if (name.length() >= 6) {
-                       string time = name.substr(0, 6);
-                       unsigned int timeNum = m->fromBase36(time);
-                       
-                       int q1 = timeNum / 60;
-                       int sec = timeNum - 60 * q1;
-                       int q2 = q1 / 60;
-                       int minute = q1 - 60 * q2;
-                       int q3 = q2 / 24;
-                       int hr = q2 - 24 * q3;
-                       int q4 = q3 / 32;
-                       int day = q3 - 32 * q4;
-                       int q5 = q4 / 13;
-                       int mon = q4 - 13 * q5;
-                       int year = 2000 + q5;
-               
-                       timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
-               }
-               
-               if (name.length() >= 9) {
-                       region = name.substr(7, 2);
-               
-                       string xyNum = name.substr(9);
-                       unsigned int myXy = m->fromBase36(xyNum);
-                       int x = myXy >> 12;
-                       int y = myXy & 4095;
-               
-                       xy = toString(x) + "_" + toString(y);
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "decodeName");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {
-       try {
-       
-               out << "Common Header:\nMagic Number: " << header.magicNumber << endl;
-               out << "Version: " << header.version << endl;
-               out << "Index Offset: " << header.indexOffset << endl;
-               out << "Index Length: " << header.indexLength << endl;
-               out << "Number of Reads: " << header.numReads << endl;
-               out << "Header Length: " << header.headerLength << endl;
-               out << "Key Length: " << header.keyLength << endl;
-               out << "Number of Flows: " << header.numFlowsPerRead << endl;
-               out << "Format Code: " << header.flogramFormatCode << endl;
-               out << "Flow Chars: " << header.flowChars << endl;
-               out << "Key Sequence: " << header.keySequence << endl << endl;
-                       
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "printCommonHeader");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printHeader(ofstream& out, Header& header) {
-       try {
-               
-               out << ">" << header.name << endl;
-               out << "Run Prefix: " << header.timestamp << endl;
-               out << "Region #:  " << header.region << endl;
-               out << "XY Location: " << header.xy << endl << endl;
-               
-               out << "Run Name:  " << endl;
-               out << "Analysis Name:  " << endl;
-               out << "Full Path: " << endl << endl;
-               
-               out << "Read Header Len: " << header.headerLength << endl;
-               out << "Name Length: " << header.nameLength << endl;
-               out << "# of Bases: " << header.numBases << endl;
-               out << "Clip Qual Left: " << header.clipQualLeft << endl;
-               out << "Clip Qual Right: " << header.clipQualRight << endl;
-               out << "Clip Adap Left: " << header.clipAdapterLeft << endl;
-               out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "printHeader");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {
-       try {
-        bool okay = true;
-        string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";
-        
-        if (header.clipQualLeft > read.bases.length()) {
-            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
-        }
-        if (header.clipQualRight > read.bases.length()) {
-            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";
-        }
-        if (header.clipQualLeft > read.qualScores.size()) {
-            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
-        }
-        if (header.clipQualRight > read.qualScores.size()) {
-            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";
-        }
-        
-        if (okay == false) {
-            m->mothurOut(message); m->mothurOutEndLine();
-        }
-        
-               return okay;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "sanityCheck");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {
-       try {
-               out << "Flowgram: ";
-               for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }
-               
-               out << endl <<  "Flow Indexes: ";
-               int sum = 0;
-               for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }
-               
-               //make the bases you want to clip lowercase and the bases you want to keep upper case
-        int endValue = header.clipQualRight;
-               if(endValue == 0){      endValue = read.bases.length(); }
-               for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }
-               for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }
-               for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }
-               
-               out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";
-               for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
-       
-               
-               out << endl << endl;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {
-       try {
-               string seq = read.bases;
-               
-        if (trim) {
-                       if(header.clipQualRight < header.clipQualLeft){
-                               if (header.clipQualRight == 0) { //don't trim right
-                    seq = seq.substr(header.clipQualLeft-1);
-                }else {
-                    seq = "NNNN";
-                }
-                       }
-                       else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
-                               seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));
-                       }
-                       else {
-                               seq = seq.substr(header.clipQualLeft-1);
-                       }
-               }else{
-                       //if you wanted the sfftxt then you already converted the bases to the right case
-                       if (!sfftxt) {
-                int endValue = header.clipQualRight;
-                               //make the bases you want to clip lowercase and the bases you want to keep upper case
-                               if(endValue == 0){      endValue = seq.length();        }
-                               for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }
-                               for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }
-                               for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }
-                       }
-               }
-               
-               out << ">" << header.name  << " xy=" << header.xy << endl;
-               out << seq << endl;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "printFastaSeqData");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {
-       try {
-               
-               if (trim) {
-                       if(header.clipQualRight < header.clipQualLeft){
-                if (header.clipQualRight == 0) { //don't trim right
-                    out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;
-                    for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';      }       
-                }else {
-                    out << ">" << header.name << " xy=" << header.xy << endl;
-                    out << "0\t0\t0\t0";
-                }
-                       }
-                       else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){
-                               out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
-                               for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }
-                       }
-                       else{
-                               out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;
-                               for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       
-                       }
-               }else{
-                       out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;
-                       for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }
-               }
-               
-               out << endl;
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "printQualSeqData");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {
-       try {
-        
-        int endValue = header.clipQualRight;
-        if (header.clipQualRight == 0) {
-            endValue = read.flowIndex.size();
-            if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }
-        }
-        if(endValue > header.clipQualLeft){
-            
-            int rightIndex = 0;
-            for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];     }
-            
-            out << header.name << ' ' << rightIndex;
-            for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }
-            out << endl;
-        }
-               
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "printFlowSeqData");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::readAccnosFile(string filename) {
-       try {
-               //remove old names
-               seqNames.clear();
-               
-               ifstream in;
-               m->openInputFile(filename, in);
-               string name;
-               
-               while(!in.eof()){
-                       in >> name; m->gobble(in);
-                                               
-                       seqNames.insert(name);
-                       
-                       if (m->control_pressed) { seqNames.clear(); break; }
-               }
-               in.close();             
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "readAccnosFile");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-int SffInfoCommand::parseSffTxt() {
-       try {
-               
-               ifstream inSFF;
-               m->openInputFile(sfftxtFilename, inSFF);
-               
-               if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
-               
-               //output file names
-               ofstream outFasta, outQual, outFlow;
-               string outFastaFileName, outQualFileName;
-               string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));
-               if (fileRoot.length() > 0) {
-                       //rip off last .
-                       fileRoot = fileRoot.substr(0, fileRoot.length()-1);
-                       fileRoot = m->getRootName(fileRoot);
-               }
-               
-        map<string, string> variables; 
-               variables["[filename]"] = fileRoot;
-               string sfftxtFileName = getOutputFileName("sfftxt",variables);
-               string outFlowFileName = getOutputFileName("flow",variables);
-               if (!trim) { variables["[tag]"] = "raw"; }
-               outFastaFileName = getOutputFileName("fasta",variables);
-        outQualFileName = getOutputFileName("qfile",variables);
-               
-               if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
-               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
-               if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
-               
-               //read common header
-               string commonHeader = m->getline(inSFF);
-               string magicNumber = m->getline(inSFF); 
-               string version = m->getline(inSFF);
-               string indexOffset = m->getline(inSFF);
-               string indexLength = m->getline(inSFF);
-               int numReads = parseHeaderLineToInt(inSFF);
-               string headerLength = m->getline(inSFF);
-               string keyLength = m->getline(inSFF);
-               int numFlows = parseHeaderLineToInt(inSFF);
-               string flowgramCode = m->getline(inSFF);
-               string flowChars = m->getline(inSFF);
-               string keySequence = m->getline(inSFF);
-               m->gobble(inSFF);
-               
-               string seqName;
-               
-               if (flow)       {       outFlow << numFlows << endl;    }
-               
-               for(int i=0;i<numReads;i++){
-                       
-                       //sanity check
-                       if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
-                       
-                       Header header;
-                       
-                       //parse read header
-                       inSFF >> seqName;
-                       seqName = seqName.substr(1);
-                       m->gobble(inSFF);
-                       header.name = seqName;
-                       
-                       string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
-                       string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
-                       string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
-                       m->gobble(inSFF);
-                               
-                       string runName = parseHeaderLineToString(inSFF);
-                       string analysisName = parseHeaderLineToString(inSFF);
-                       string fullPath = parseHeaderLineToString(inSFF);
-                       m->gobble(inSFF);
-                       
-                       string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
-                       string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
-                       int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
-                       string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
-                       int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
-                       string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
-                       string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
-                       m->gobble(inSFF);
-                               
-                       seqRead read;
-                       
-                       //parse read
-                       vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
-                       vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
-                       
-                       //adjust for print
-                       vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
-                       for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
-                       read.flowIndex = flowIndicesAdjusted;
-                       
-                       string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
-                       vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
-                       m->gobble(inSFF);
-                                       
-                       //if you have provided an accosfile and this seq is not in it, then dont print
-                       bool print = true;
-                       if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
-                       
-                       //print 
-                       if (print) {
-                               if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
-                               if (qual)       {       printQualSeqData(outQual, read, header);        }
-                               if (flow)       {       printFlowSeqData(outFlow, read, header);        }
-                       }
-                       
-                       //report progress
-                       if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
-                       
-                       if (m->control_pressed) {  break;  }
-               }
-               
-               //report progress
-               if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
-               
-               inSFF.close();
-               
-               if (fasta)      {  outFasta.close();    }
-               if (qual)       {  outQual.close();             }
-               if (flow)       {  outFlow.close();             }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "parseSffTxt");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-
-int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
-       try {
-               int number;
-               
-               while (!file.eof())     {
-                       
-                       char c = file.get(); 
-                       if (c == ':'){
-                               file >> number;
-                               break;
-                       }
-                       
-               }
-               m->gobble(file);
-               return number;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
-               exit(1);
-       }
-       
-}
-
-//**********************************************************************************************************************
-
-string SffInfoCommand::parseHeaderLineToString(ifstream& file){
-       try {
-               string text;
-               
-               while (!file.eof())     {
-                       char c = file.get(); 
-                       
-                       if (c == ':'){
-                               //m->gobble(file);
-                               //text = m->getline(file);      
-                               file >> text;
-                               break;
-                       }
-               }
-               m->gobble(file);
-               
-               return text;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
-       try {
-               vector<unsigned short> floatVector(length);
-               
-               while (!file.eof())     {
-                       char c = file.get(); 
-                       if (c == ':'){
-                               float temp;
-                               for(int i=0;i<length;i++){
-                                       file >> temp;
-                                       floatVector[i] = temp * 100;
-                               }
-                               break;
-                       }
-               }
-               m->gobble(file);        
-               return floatVector;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
-       try {
-               vector<unsigned int> intVector(length);
-               
-               while (!file.eof())     {
-                       char c = file.get(); 
-                       if (c == ':'){
-                               for(int i=0;i<length;i++){
-                                       file >> intVector[i];
-                               }
-                               break;
-                       }
-               }
-               m->gobble(file);        
-               return intVector;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-
-bool SffInfoCommand::readOligos(string oligoFile){
-       try {
-        filehandles.clear();
-        numSplitReads.clear();
-        filehandlesHeaders.clear();
-        
-               ifstream inOligos;
-               m->openInputFile(oligoFile, inOligos);
-               
-               string type, oligo, group;
-        
-               int indexPrimer = 0;
-               int indexBarcode = 0;
-               
-               while(!inOligos.eof()){
-            
-                       inOligos >> type; 
-            
-                       if(type[0] == '#'){
-                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-                               m->gobble(inOligos);
-                       }
-                       else{
-                               m->gobble(inOligos);
-                               //make type case insensitive
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
-                               
-                               inOligos >> oligo;
-                               
-                               for(int i=0;i<oligo.length();i++){
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                               
-                               if(type == "FORWARD"){
-                                       group = "";
-                                       
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {       
-                                               char c = inOligos.get(); 
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       } 
-                                       
-                                       //check for repeat barcodes
-                                       map<string, int>::iterator itPrime = primers.find(oligo);
-                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                                       
-                                       primers[oligo]=indexPrimer; indexPrimer++;              
-                                       primerNameVector.push_back(group);
-                               }else if(type == "REVERSE"){
-                                       //Sequence oligoRC("reverse", oligo);
-                                       //oligoRC.reverseComplement();
-                    string oligoRC = reverseOligo(oligo);
-                                       revPrimer.push_back(oligoRC);
-                               }
-                               else if(type == "BARCODE"){
-                                       inOligos >> group;
-                                       
-                                       //check for repeat barcodes
-                                       map<string, int>::iterator itBar = barcodes.find(oligo);
-                                       if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                    
-                                       barcodes[oligo]=indexBarcode; indexBarcode++;
-                                       barcodeNameVector.push_back(group);
-                               }else if(type == "LINKER"){
-                                       linker.push_back(oligo);
-                               }else if(type == "SPACER"){
-                                       spacer.push_back(oligo);
-                               }
-                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
-                       }
-                       m->gobble(inOligos);
-               }       
-               inOligos.close();
-               
-               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }
-               
-               //add in potential combos
-               if(barcodeNameVector.size() == 0){
-                       barcodes[""] = 0;
-                       barcodeNameVector.push_back("");                        
-               }
-               
-               if(primerNameVector.size() == 0){
-                       primers[""] = 0;
-                       primerNameVector.push_back("");                 
-               }
-               
-               filehandles.resize(barcodeNameVector.size());
-               for(int i=0;i<filehandles.size();i++){
-                       filehandles[i].assign(primerNameVector.size(), "");
-               }
-                       
-               if(split > 1){
-                       set<string> uniqueNames; //used to cleanup outputFileNames
-                       for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
-                               for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-                                       
-                                       string primerName = primerNameVector[itPrimer->second];
-                                       string barcodeName = barcodeNameVector[itBar->second];
-                                       
-                                       string comboGroupName = "";
-                                       string fastaFileName = "";
-                                       string qualFileName = "";
-                                       string nameFileName = "";
-                                       
-                                       if(primerName == ""){
-                                               comboGroupName = barcodeNameVector[itBar->second];
-                                       }
-                                       else{
-                                               if(barcodeName == ""){
-                                                       comboGroupName = primerNameVector[itPrimer->second];
-                                               }
-                                               else{
-                                                       comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
-                                               }
-                                       }
-                                       
-                                       ofstream temp;
-                    map<string, string> variables; 
-                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
-                    variables["[group]"] = comboGroupName;
-                                       string thisFilename = getOutputFileName("sff",variables);
-                                       if (uniqueNames.count(thisFilename) == 0) {
-                                               outputNames.push_back(thisFilename);
-                                               outputTypes["sff"].push_back(thisFilename);
-                                               uniqueNames.insert(thisFilename);
-                                       }
-                                       
-                                       filehandles[itBar->second][itPrimer->second] = thisFilename;
-                                       temp.open(thisFilename.c_str(), ios::binary);           temp.close();
-                               }
-                       }
-               }
-               numFPrimers = primers.size();
-        numLinkers = linker.size();
-        numSpacers = spacer.size();
-        map<string, string> variables; 
-        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));
-        variables["[group]"] = "scrap";
-               noMatchFile = getOutputFileName("sff",variables);
-        m->mothurRemove(noMatchFile);
-        
-               bool allBlank = true;
-               for (int i = 0; i < barcodeNameVector.size(); i++) {
-                       if (barcodeNameVector[i] != "") {
-                               allBlank = false;
-                               break;
-                       }
-               }
-               for (int i = 0; i < primerNameVector.size(); i++) {
-                       if (primerNameVector[i] != "") {
-                               allBlank = false;
-                               break;
-                       }
-               }
-               
-        filehandlesHeaders.resize(filehandles.size());
-        numSplitReads.resize(filehandles.size());
-        for (int i = 0; i < filehandles.size(); i++) { 
-            numSplitReads[i].resize(filehandles[i].size(), 0); 
-            for (int j = 0; j < filehandles[i].size(); j++) {
-                filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");
-            }
-        }
-                             
-               if (allBlank) {
-                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();
-                       split = 1;
-                       return false;
-               }
-               
-               return true;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "readOligos");
-               exit(1);
-       }
-}
-//********************************************************************/
-string SffInfoCommand::reverseOligo(string oligo){
-       try {
-        string reverse = "";
-        
-        for(int i=oligo.length()-1;i>=0;i--){
-            
-            if(oligo[i] == 'A')                {       reverse += 'T'; }
-            else if(oligo[i] == 'T'){  reverse += 'A'; }
-            else if(oligo[i] == 'U'){  reverse += 'A'; }
-            
-            else if(oligo[i] == 'G'){  reverse += 'C'; }
-            else if(oligo[i] == 'C'){  reverse += 'G'; }
-            
-            else if(oligo[i] == 'R'){  reverse += 'Y'; }
-            else if(oligo[i] == 'Y'){  reverse += 'R'; }
-            
-            else if(oligo[i] == 'M'){  reverse += 'K'; }
-            else if(oligo[i] == 'K'){  reverse += 'M'; }
-            
-            else if(oligo[i] == 'W'){  reverse += 'W'; }
-            else if(oligo[i] == 'S'){  reverse += 'S'; }
-            
-            else if(oligo[i] == 'B'){  reverse += 'V'; }
-            else if(oligo[i] == 'V'){  reverse += 'B'; }
-            
-            else if(oligo[i] == 'D'){  reverse += 'H'; }
-            else if(oligo[i] == 'H'){  reverse += 'D'; }
-            
-            else                                               {       reverse += 'N'; }
-        }
-        
-        
-        return reverse;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "reverseOligo");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-
-                               
-                               
+/*\r
+ *  sffinfocommand.cpp\r
+ *  Mothur\r
+ *\r
+ *  Created by westcott on 7/7/10.\r
+ *  Copyright 2010 Schloss Lab. All rights reserved.\r
+ *\r
+ */\r
+\r
+#include "sffinfocommand.h"\r
+#include "endiannessmacros.h"\r
+#include "trimoligos.h"\r
+#include "sequence.hpp"\r
+#include "qualityscores.h"\r
+\r
+//**********************************************************************************************************************\r
+vector<string> SffInfoCommand::setParameters(){        \r
+       try {           \r
+               CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
+        CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+        CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
+               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
+               CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
+               CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
+               CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);\r
+               CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);\r
+               CommandParameter pqfile("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqfile);\r
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);\r
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);\r
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);\r
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);\r
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);\r
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);\r
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);\r
+               \r
+               vector<string> myArray;\r
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }\r
+               return myArray;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "setParameters");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+string SffInfoCommand::getHelpString(){        \r
+       try {\r
+               string helpString = "";\r
+               helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
+               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+               helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
+               helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
+               helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
+        helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
+        helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
+               helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";\r
+               helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";\r
+               helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
+               helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";\r
+               helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";\r
+               helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";\r
+               helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n";\r
+               return helpString;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "getHelpString");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+string SffInfoCommand::getOutputPattern(string type) {\r
+    try {\r
+        string pattern = "";\r
+        \r
+        if (type == "fasta")            {   pattern =  "[filename],fasta-[filename],[tag],fasta";   }\r
+        else if (type == "flow")    {   pattern =  "[filename],flow";   }\r
+        else if (type == "sfftxt")        {   pattern =  "[filename],sff.txt";   }\r
+        else if (type == "sff")        {   pattern =  "[filename],[group],sff";   }\r
+        else if (type == "qfile")       {   pattern =  "[filename],qual-[filename],[tag],qual";   }\r
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }\r
+        \r
+        return pattern;\r
+    }\r
+    catch(exception& e) {\r
+        m->errorOut(e, "SffInfoCommand", "getOutputPattern");\r
+        exit(1);\r
+    }\r
+}\r
+//**********************************************************************************************************************\r
+SffInfoCommand::SffInfoCommand(){      \r
+       try {\r
+               abort = true; calledHelp = true; \r
+               setParameters();\r
+               vector<string> tempOutNames;\r
+               outputTypes["fasta"] = tempOutNames;\r
+               outputTypes["flow"] = tempOutNames;\r
+               outputTypes["sfftxt"] = tempOutNames;\r
+               outputTypes["qfile"] = tempOutNames;\r
+        outputTypes["sff"] = tempOutNames;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+\r
+SffInfoCommand::SffInfoCommand(string option)  {\r
+       try {\r
+               abort = false; calledHelp = false;   \r
+               hasAccnos = false; hasOligos = false; hasGroup = false;\r
+        split = 1;\r
+               \r
+               //allow user to run help\r
+               if(option == "help") { help(); abort = true; calledHelp = true; }\r
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}\r
+               \r
+               else {\r
+                       //valid paramters for this command\r
+                       vector<string> myArray = setParameters();\r
+                       \r
+                       OptionParser parser(option);\r
+                       map<string, string> parameters = parser.getParameters();\r
+                       \r
+                       ValidParameters validParameter;\r
+                       //check to make sure all parameters are valid for command\r
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { \r
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }\r
+                       }\r
+                       \r
+                       //initialize outputTypes\r
+                       vector<string> tempOutNames;\r
+                       outputTypes["fasta"] = tempOutNames;\r
+                       outputTypes["flow"] = tempOutNames;\r
+                       outputTypes["sfftxt"] = tempOutNames;\r
+                       outputTypes["qfile"] = tempOutNames;\r
+            outputTypes["sff"] = tempOutNames;\r
+                       \r
+                       //if the user changes the output directory command factory will send this info to us in the output parameter \r
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }\r
+                       \r
+                       //if the user changes the input directory command factory will send this info to us in the output parameter \r
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }\r
+\r
+                       sffFilename = validParameter.validFile(parameters, "sff", false);\r
+                       if (sffFilename == "not found") { sffFilename = "";  }\r
+                       else { \r
+                               m->splitAtDash(sffFilename, filenames);\r
+                               \r
+                               //go through files and make sure they are good, if not, then disregard them\r
+                               for (int i = 0; i < filenames.size(); i++) {\r
+                                       bool ignore = false;\r
+                                       if (filenames[i] == "current") { \r
+                                               filenames[i] = m->getSFFFile(); \r
+                                               if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current."); m->mothurOutEndLine(); }\r
+                                               else {  \r
+                                                       m->mothurOut("You have no current sfffile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
+                                                       //erase from file list\r
+                                                       filenames.erase(filenames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                                       \r
+                                       if (!ignore) {\r
+                                               if (inputDir != "") {\r
+                                                       string path = m->hasPath(filenames[i]);\r
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.\r
+                                                       if (path == "") {       filenames[i] = inputDir + filenames[i];         }\r
+                                               }\r
+               \r
+                                               ifstream in;\r
+                                               int ableToOpen = m->openInputFile(filenames[i], in, "noerror");\r
+                                       \r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getDefaultPath() != "") { //default path is set\r
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(filenames[i]);\r
+                                                               m->mothurOut("Unable to open " + filenames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               filenames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               \r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getOutputDir() != "") { //default path is set\r
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(filenames[i]);\r
+                                                               m->mothurOut("Unable to open " + filenames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               filenames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               \r
+                                               in.close();\r
+                                               \r
+                                               if (ableToOpen == 1) { \r
+                                                       m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+                                                       //erase from file list\r
+                                                       filenames.erase(filenames.begin()+i);\r
+                                                       i--;\r
+                                               }else { m->setSFFFile(filenames[i]); }\r
+                                       }\r
+                               }\r
+                               \r
+                               //make sure there is at least one valid file left\r
+                               if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
+                       }\r
+                       \r
+                       accnosName = validParameter.validFile(parameters, "accnos", false);\r
+                       if (accnosName == "not found") { accnosName = "";  }\r
+                       else { \r
+                               hasAccnos = true;\r
+                               m->splitAtDash(accnosName, accnosFileNames);\r
+                               \r
+                               //go through files and make sure they are good, if not, then disregard them\r
+                               for (int i = 0; i < accnosFileNames.size(); i++) {\r
+                                       bool ignore = false;\r
+                                       if (accnosFileNames[i] == "current") { \r
+                                               accnosFileNames[i] = m->getAccnosFile(); \r
+                                               if (accnosFileNames[i] != "") {  m->mothurOut("Using " + accnosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
+                                               else {  \r
+                                                       m->mothurOut("You have no current accnosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
+                                                       //erase from file list\r
+                                                       accnosFileNames.erase(accnosFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                                       \r
+                                       if (!ignore) {\r
+                                       \r
+                                               if (inputDir != "") {\r
+                                                       string path = m->hasPath(accnosFileNames[i]);\r
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.\r
+                                                       if (path == "") {       accnosFileNames[i] = inputDir + accnosFileNames[i];             }\r
+                                               }\r
+               \r
+                                               ifstream in;\r
+                                               int ableToOpen = m->openInputFile(accnosFileNames[i], in, "noerror");\r
+                                       \r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getDefaultPath() != "") { //default path is set\r
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(accnosFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               accnosFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getOutputDir() != "") { //default path is set\r
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(accnosFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + accnosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               accnosFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               in.close();\r
+                                               \r
+                                               if (ableToOpen == 1) { \r
+                                                       m->mothurOut("Unable to open " + accnosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+                                                       //erase from file list\r
+                                                       accnosFileNames.erase(accnosFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                               }\r
+                               \r
+                               //make sure there is at least one valid file left\r
+                               if (accnosFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }\r
+                       }\r
+            \r
+            oligosfile = validParameter.validFile(parameters, "oligos", false);\r
+                       if (oligosfile == "not found") { oligosfile = "";  }\r
+                       else { \r
+                               hasOligos = true;\r
+                               m->splitAtDash(oligosfile, oligosFileNames);\r
+                               \r
+                               //go through files and make sure they are good, if not, then disregard them\r
+                               for (int i = 0; i < oligosFileNames.size(); i++) {\r
+                                       bool ignore = false;\r
+                                       if (oligosFileNames[i] == "current") { \r
+                                               oligosFileNames[i] = m->getOligosFile(); \r
+                                               if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
+                                               else {  \r
+                                                       m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
+                                                       //erase from file list\r
+                                                       oligosFileNames.erase(oligosFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                                       \r
+                                       if (!ignore) {\r
+                        \r
+                                               if (inputDir != "") {\r
+                                                       string path = m->hasPath(oligosFileNames[i]);\r
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.\r
+                                                       if (path == "") {       oligosFileNames[i] = inputDir + oligosFileNames[i];             }\r
+                                               }\r
+                        \r
+                                               ifstream in;\r
+                                               int ableToOpen = m->openInputFile(oligosFileNames[i], in, "noerror");\r
+                        \r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getDefaultPath() != "") { //default path is set\r
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(oligosFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               oligosFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getOutputDir() != "") { //default path is set\r
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(oligosFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + oligosFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               oligosFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               in.close();\r
+                                               \r
+                                               if (ableToOpen == 1) { \r
+                                                       m->mothurOut("Unable to open " + oligosFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+                                                       //erase from file list\r
+                                                       oligosFileNames.erase(oligosFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                               }\r
+                               \r
+                               //make sure there is at least one valid file left\r
+                               if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
+                       }\r
+            \r
+            groupfile = validParameter.validFile(parameters, "group", false);\r
+                       if (groupfile == "not found") { groupfile = "";  }\r
+                       else {\r
+                               hasGroup = true;\r
+                               m->splitAtDash(groupfile, groupFileNames);\r
+                               \r
+                               //go through files and make sure they are good, if not, then disregard them\r
+                               for (int i = 0; i < groupFileNames.size(); i++) {\r
+                                       bool ignore = false;\r
+                                       if (groupFileNames[i] == "current") {\r
+                                               groupFileNames[i] = m->getGroupFile();\r
+                                               if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
+                                               else {\r
+                                                       m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
+                                                       //erase from file list\r
+                                                       groupFileNames.erase(groupFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                                       \r
+                                       if (!ignore) {\r
+                        \r
+                                               if (inputDir != "") {\r
+                                                       string path = m->hasPath(groupFileNames[i]);\r
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.\r
+                                                       if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }\r
+                                               }\r
+                        \r
+                                               ifstream in;\r
+                                               int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
+                        \r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getDefaultPath() != "") { //default path is set\r
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               groupFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getOutputDir() != "") { //default path is set\r
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               groupFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               in.close();\r
+                                               \r
+                                               if (ableToOpen == 1) {\r
+                                                       m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+                                                       //erase from file list\r
+                                                       groupFileNames.erase(groupFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                               }\r
+                               \r
+                               //make sure there is at least one valid file left\r
+                               if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
+                       }\r
+\r
+                       if (hasGroup) {\r
+                split = 2;\r
+                               if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+                       }\r
+            \r
+            if (hasOligos) {\r
+                split = 2;\r
+                               if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+                       }\r
+            \r
+            if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
+            \r
+                       if (hasAccnos) {\r
+                               if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+                       }\r
+                       \r
+                       string temp = validParameter.validFile(parameters, "qfile", false);                     if (temp == "not found"){       temp = "T";                             }\r
+                       qual = m->isTrue(temp); \r
+                       \r
+                       temp = validParameter.validFile(parameters, "fasta", false);                            if (temp == "not found"){       temp = "T";                             }\r
+                       fasta = m->isTrue(temp); \r
+                       \r
+                       temp = validParameter.validFile(parameters, "flow", false);                                     if (temp == "not found"){       temp = "T";                             }\r
+                       flow = m->isTrue(temp); \r
+                       \r
+                       temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }\r
+                       trim = m->isTrue(temp); \r
+            \r
+            temp = validParameter.validFile(parameters, "bdiffs", false);              if (temp == "not found") { temp = "0"; }\r
+                       m->mothurConvert(temp, bdiffs);\r
+                       \r
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }\r
+                       m->mothurConvert(temp, pdiffs);\r
+            \r
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }\r
+                       m->mothurConvert(temp, ldiffs);\r
+            \r
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }\r
+                       m->mothurConvert(temp, sdiffs);\r
+                       \r
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }\r
+                       m->mothurConvert(temp, tdiffs);\r
+                       \r
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }\r
+            \r
+                       temp = validParameter.validFile(parameters, "sfftxt", false);                           \r
+                       if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }\r
+                       else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }\r
+                       else {\r
+                               //you are a filename\r
+                               if (inputDir != "") {\r
+                                       map<string,string>::iterator it = parameters.find("sfftxt");\r
+                                       //user has given a template file\r
+                                       if(it != parameters.end()){ \r
+                                               string path = m->hasPath(it->second);\r
+                                               //if the user has not given a path then, add inputdir. else leave path alone.\r
+                                               if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }\r
+                                       }\r
+                               }\r
+                               \r
+                               sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);\r
+                               if (sfftxtFilename == "not found") { sfftxtFilename = "";  }\r
+                               else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }\r
+                       }\r
+                       \r
+                       if ((sfftxtFilename == "") && (filenames.size() == 0)) {  \r
+                               //if there is a current sff file, use it\r
+                               string filename = m->getSFFFile(); \r
+                               if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }\r
+                               else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }\r
+                       }\r
+            \r
+            \r
+               }\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::execute(){\r
+       try {\r
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }\r
+               \r
+               for (int s = 0; s < filenames.size(); s++) {\r
+                       \r
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
+                       \r
+                       int start = time(NULL);\r
+                       \r
+            filenames[s] = m->getFullPathName(filenames[s]);\r
+                       m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();\r
+                       \r
+                       string accnos = "";\r
+                       if (hasAccnos) { accnos = accnosFileNames[s]; }\r
+            \r
+            string oligos = "";\r
+            if (hasOligos) { oligos = oligosFileNames[s]; }\r
+            if (hasGroup) { oligos = groupFileNames[s]; }\r
+            \r
+                       int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
+\r
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
+               }\r
+               \r
+               if (sfftxtFilename != "") {  parseSffTxt(); }\r
+               \r
+               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }\r
+               \r
+               //set fasta file as new current fastafile\r
+               string current = "";\r
+               itTypes = outputTypes.find("fasta");\r
+               if (itTypes != outputTypes.end()) {\r
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }\r
+               }\r
+               \r
+               itTypes = outputTypes.find("qfile");\r
+               if (itTypes != outputTypes.end()) {\r
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }\r
+               }\r
+               \r
+               itTypes = outputTypes.find("flow");\r
+               if (itTypes != outputTypes.end()) {\r
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFlowFile(current); }\r
+               }\r
+               \r
+               //report output filenames\r
+               m->mothurOutEndLine();\r
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();\r
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }\r
+               m->mothurOutEndLine();\r
+\r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "execute");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
+       try {\r
+               currentFileName = input;\r
+               if (outputDir == "") {  outputDir += m->hasPath(input); }\r
+               \r
+               if (accnos != "")       {  readAccnosFile(accnos);  }\r
+               else                            {       seqNames.clear();               }\r
+        \r
+        if (hasOligos)   {   readOligos(oligos);    split = 2;      }\r
+        if (hasGroup)    {   readGroup(oligos);     split = 2;      }\r
+        \r
+               ofstream outSfftxt, outFasta, outQual, outFlow;\r
+               string outFastaFileName, outQualFileName;\r
+        string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
+        if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }\r
+        \r
+        map<string, string> variables; \r
+               variables["[filename]"] = rootName;\r
+               string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
+               string outFlowFileName = getOutputFileName("flow",variables);\r
+               if (!trim) { variables["[tag]"] = "raw"; }\r
+               outFastaFileName = getOutputFileName("fasta",variables);\r
+        outQualFileName = getOutputFileName("qfile",variables);\r
+        \r
+               if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }\r
+               if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
+               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
+               if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
+               \r
+               ifstream in;\r
+               m->openInputFileBinary(input, in);\r
+               \r
+               CommonHeader header;\r
+               readCommonHeader(in, header);\r
+        \r
+               int count = 0;\r
+               \r
+               //check magic number and version\r
+               if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
+               if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
+       \r
+               //print common header\r
+               if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
+               if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }\r
+               \r
+               //read through the sff file\r
+               while (!in.eof()) {\r
+                       \r
+                       bool print = true;\r
+                                               \r
+                       //read data\r
+                       seqRead read;  Header readheader;\r
+            readSeqData(in, read, header.numFlowsPerRead, readheader);\r
+            \r
+            bool okay = sanityCheck(readheader, read);\r
+            if (!okay) { break; }\r
+            \r
+                       //if you have provided an accosfile and this seq is not in it, then dont print\r
+                       if (seqNames.size() != 0) {   if (seqNames.count(readheader.name) == 0) { print = false; }  }\r
+                       \r
+                       //print \r
+                       if (print) {\r
+                               if (sfftxt) { printHeader(outSfftxt, readheader); printSffTxtSeqData(outSfftxt, read, readheader); }\r
+                               if (fasta)      {       printFastaSeqData(outFasta, read, readheader);  }\r
+                               if (qual)       {       printQualSeqData(outQual, read, readheader);    }\r
+                               if (flow)       {       printFlowSeqData(outFlow, read, readheader);    }\r
+                       }\r
+                       \r
+                       count++;\r
+            \r
+                       //report progress\r
+                       if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }\r
+               \r
+                       if (m->control_pressed) { count = 0; break;   }\r
+                       \r
+                       if (count >= header.numReads) { break; }\r
+               }\r
+               \r
+               //report progress\r
+               if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }\r
+               \r
+               in.close();\r
+               \r
+               if (sfftxt) {  outSfftxt.close();       }\r
+               if (fasta)      {  outFasta.close();    }\r
+               if (qual)       {  outQual.close();             }\r
+               if (flow)       {  outFlow.close();             }\r
+               \r
+        if (split > 1) {\r
+            //create new common headers for each file with the correct number of reads\r
+            adjustCommonHeader(header);\r
+            \r
+            if (hasGroup) { delete groupMap; }\r
+            \r
+            //cout << "here" << endl;\r
+                       map<string, string>::iterator it;\r
+                       set<string> namesToRemove;\r
+                       for(int i=0;i<filehandles.size();i++){\r
+                               for(int j=0;j<filehandles[0].size();j++){\r
+                    //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << endl;\r
+                                       if (filehandles[i][j] != "") {\r
+                                               if (namesToRemove.count(filehandles[i][j]) == 0) {\r
+                                                       if(m->isBlank(filehandles[i][j])){\r
+                                //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
+                                                               m->mothurRemove(filehandles[i][j]);\r
+                                m->mothurRemove(filehandlesHeaders[i][j]);\r
+                                                               namesToRemove.insert(filehandles[i][j]);\r
+                            }\r
+                                               }\r
+                                       }\r
+                               }\r
+                       }\r
+            //cout << "here2" << endl;\r
+            //append new header to reads\r
+            for (int i = 0; i < filehandles.size(); i++) {\r
+                for (int j = 0; j < filehandles[i].size(); j++) {\r
+                    m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
+                    m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
+                    m->mothurRemove(filehandlesHeaders[i][j]);\r
+                    //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
+                    if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
+                }\r
+            }\r
+                       //cout << "here3" << endl;\r
+                       //remove names for outputFileNames, just cleans up the output\r
+                       for(int i = 0; i < outputNames.size(); i++) { \r
+                if (namesToRemove.count(outputNames[i]) != 0) {\r
+                    //cout << "erasing " << i << '\t' << outputNames[i] << endl;\r
+                    outputNames.erase(outputNames.begin()+i);\r
+                    i--;\r
+                } \r
+            }\r
+            //cout << "here4" << endl;\r
+            if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }\r
+            else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
+        }\r
+        \r
+               return count;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "extractSffInfo");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::readCommonHeader(ifstream& in, CommonHeader& header){\r
+       try {\r
+        \r
+               if (!in.eof()) {\r
+\r
+                       //read magic number\r
+                       char buffer[4];\r
+                       in.read(buffer, 4);\r
+                       header.magicNumber = be_int4(*(unsigned int *)(&buffer));\r
+            \r
+                       //read version\r
+                       char buffer9[4];\r
+                       in.read(buffer9, 4);\r
+                       header.version = "";\r
+                       for (int i = 0; i < 4; i++) {  header.version += toString((int)(buffer9[i]));  }\r
+    \r
+                       //read offset\r
+                       char buffer2 [8];\r
+                       in.read(buffer2, 8);\r
+                       header.indexOffset =  be_int8(*(unsigned long long *)(&buffer2));\r
+                       \r
+                       //read index length\r
+                       char buffer3 [4];\r
+                       in.read(buffer3, 4);\r
+                       header.indexLength =  be_int4(*(unsigned int *)(&buffer3));\r
+            \r
+                       //read num reads\r
+                       char buffer4 [4];\r
+                       in.read(buffer4, 4);\r
+                       header.numReads =  be_int4(*(unsigned int *)(&buffer4));\r
+            \r
+            if (m->debug) { m->mothurOut("[DEBUG]: numReads = " + toString(header.numReads) + "\n"); }\r
+                               \r
+                       //read header length\r
+                       char buffer5 [2];\r
+                       in.read(buffer5, 2);\r
+                       header.headerLength =  be_int2(*(unsigned short *)(&buffer5));\r
+                                       \r
+                       //read key length\r
+                       char buffer6 [2];\r
+                       in.read(buffer6, 2);\r
+                       header.keyLength = be_int2(*(unsigned short *)(&buffer6));\r
+                       \r
+                       //read number of flow reads\r
+                       char buffer7 [2];\r
+                       in.read(buffer7, 2);\r
+                       header.numFlowsPerRead =  be_int2(*(unsigned short *)(&buffer7));\r
+                               \r
+                       //read format code\r
+                       char buffer8 [1];\r
+                       in.read(buffer8, 1);\r
+                       header.flogramFormatCode = (int)(buffer8[0]);\r
+                       \r
+                       //read flow chars\r
+                       char* tempBuffer = new char[header.numFlowsPerRead];\r
+                       in.read(&(*tempBuffer), header.numFlowsPerRead); \r
+                       header.flowChars = tempBuffer;\r
+                       if (header.flowChars.length() > header.numFlowsPerRead) { header.flowChars = header.flowChars.substr(0, header.numFlowsPerRead);  }\r
+                       delete[] tempBuffer;\r
+                       \r
+                       //read key\r
+                       char* tempBuffer2 = new char[header.keyLength];\r
+                       in.read(&(*tempBuffer2), header.keyLength);\r
+                       header.keySequence = tempBuffer2;\r
+                       if (header.keySequence.length() > header.keyLength) { header.keySequence = header.keySequence.substr(0, header.keyLength);  }\r
+                       delete[] tempBuffer2;\r
+                       \r
+                       /* Pad to 8 chars */\r
+                       unsigned long long spotInFile = in.tellg();\r
+                       unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
+                       in.seekg(spot);\r
+            \r
+        }else{\r
+                       m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();\r
+               }\r
+        \r
+               return 0;\r
+        \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "readCommonHeader");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::adjustCommonHeader(CommonHeader header){\r
+       try {\r
+        string endian = m->findEdianness();\r
+        char* mybuffer = new char[4];\r
+        ifstream in;\r
+        m->openInputFileBinary(currentFileName, in);\r
+        \r
+        ofstream outNoMatchHeader;\r
+        string tempNoHeader = "tempNoMatchHeader";\r
+        m->openOutputFileBinary(tempNoHeader, outNoMatchHeader);\r
+        \r
+        //magic number\r
+        in.read(mybuffer,4);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+        \r
+        //version\r
+        mybuffer = new char[4];\r
+        in.read(mybuffer,4);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+        \r
+        //offset\r
+        mybuffer = new char[8];\r
+        in.read(mybuffer,8);\r
+        unsigned long long offset = 0;\r
+        char* thisbuffer = new char[8];\r
+        thisbuffer[0] = (offset >> 56) & 0xFF;\r
+        thisbuffer[1] = (offset >> 48) & 0xFF;\r
+        thisbuffer[2] = (offset >> 40) & 0xFF;\r
+        thisbuffer[3] = (offset >> 32) & 0xFF;\r
+        thisbuffer[4] = (offset >> 24) & 0xFF;\r
+        thisbuffer[5] = (offset >> 16) & 0xFF;\r
+        thisbuffer[6] = (offset >> 8) & 0xFF;\r
+        thisbuffer[7] = offset & 0xFF;\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(thisbuffer, 8);\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(thisbuffer, 8);\r
+        delete[] thisbuffer;\r
+        delete[] mybuffer;\r
+            \r
+                       \r
+        //read index length\r
+               mybuffer = new char[4];\r
+        in.read(mybuffer,4);\r
+        offset = 0;\r
+        char* thisbuffer2 = new char[4];\r
+        thisbuffer2[0] = (offset >> 24) & 0xFF;\r
+        thisbuffer2[1] = (offset >> 16) & 0xFF;\r
+        thisbuffer2[2] = (offset >> 8) & 0xFF;\r
+        thisbuffer2[3] = offset & 0xFF;\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(thisbuffer2, 4);\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(thisbuffer2, 4);\r
+        delete[] thisbuffer2;\r
+        delete[] mybuffer;\r
+               \r
+        //change num reads\r
+        mybuffer = new char[4];\r
+        in.read(mybuffer,4);\r
+        delete[] mybuffer;\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                char* thisbuffer = new char[4];\r
+                if (endian == "BIG_ENDIAN") {\r
+                    thisbuffer[0] = (numSplitReads[i][j] >> 24) & 0xFF;\r
+                    thisbuffer[1] = (numSplitReads[i][j] >> 16) & 0xFF;\r
+                    thisbuffer[2] = (numSplitReads[i][j] >> 8) & 0xFF;\r
+                    thisbuffer[3] = numSplitReads[i][j] & 0xFF;\r
+                }else {\r
+                    thisbuffer[0] = numSplitReads[i][j] & 0xFF;\r
+                    thisbuffer[1] = (numSplitReads[i][j] >> 8) & 0xFF;\r
+                    thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
+                    thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
+                 }\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(thisbuffer, 4);\r
+                out.close();\r
+                delete[] thisbuffer;\r
+            }\r
+        }\r
+        char* thisbuffer3 = new char[4];\r
+        if (endian == "BIG_ENDIAN") {\r
+            thisbuffer3[0] = (numNoMatch >> 24) & 0xFF;\r
+            thisbuffer3[1] = (numNoMatch >> 16) & 0xFF;\r
+            thisbuffer3[2] = (numNoMatch >> 8) & 0xFF;\r
+            thisbuffer3[3] = numNoMatch & 0xFF;\r
+        }else {\r
+            thisbuffer3[0] = numNoMatch & 0xFF;\r
+            thisbuffer3[1] = (numNoMatch >> 8) & 0xFF;\r
+            thisbuffer3[2] = (numNoMatch >> 16) & 0xFF;\r
+            thisbuffer3[3] = (numNoMatch >> 24) & 0xFF;\r
+        }\r
+        outNoMatchHeader.write(thisbuffer3, 4);\r
+        delete[] thisbuffer3;\r
+        \r
+        \r
+        //read header length\r
+        mybuffer = new char[2];\r
+        in.read(mybuffer,2);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+            \r
+        //read key length\r
+        mybuffer = new char[2];\r
+        in.read(mybuffer,2);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+                       \r
+        //read number of flow reads\r
+        mybuffer = new char[2];\r
+        in.read(mybuffer,2);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+            \r
+        //read format code\r
+        mybuffer = new char[1];\r
+        in.read(mybuffer,1);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+                       \r
+        //read flow chars\r
+        mybuffer = new char[header.numFlowsPerRead];\r
+        in.read(mybuffer,header.numFlowsPerRead);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+                       \r
+        //read key\r
+        mybuffer = new char[header.keyLength];\r
+        in.read(mybuffer,header.keyLength);\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, in.gcount());\r
+        delete[] mybuffer;\r
+        \r
+                       \r
+        /* Pad to 8 chars */\r
+        unsigned long long spotInFile = in.tellg();\r
+        unsigned long long spot = (spotInFile + 7)& ~7;  // ~ inverts\r
+        in.seekg(spot);\r
+        \r
+        mybuffer = new char[spot-spotInFile];\r
+        for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
+            for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, spot-spotInFile);\r
+                out.close();\r
+            }\r
+        }\r
+        outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
+        outNoMatchHeader.close();\r
+        delete[] mybuffer;\r
+        in.close();\r
+        \r
+        m->appendBinaryFiles(noMatchFile, tempNoHeader);\r
+        m->renameFile(tempNoHeader, noMatchFile);\r
+        m->mothurRemove(tempNoHeader);\r
+        \r
+               return 0;\r
+        \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "adjustCommonHeader");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
+       try {\r
+        unsigned long long startSpotInFile = in.tellg();\r
+               if (!in.eof()) {\r
+            \r
+            /*****************************************/\r
+            //read header\r
+            \r
+            //read header length\r
+                       char buffer [2];\r
+                       in.read(buffer, 2); \r
+                       header.headerLength = be_int2(*(unsigned short *)(&buffer));\r
+            \r
+                       //read name length\r
+                       char buffer2 [2];\r
+                       in.read(buffer2, 2);\r
+                       header.nameLength = be_int2(*(unsigned short *)(&buffer2));\r
+            \r
+                       //read num bases\r
+                       char buffer3 [4];\r
+                       in.read(buffer3, 4);\r
+                       header.numBases =  be_int4(*(unsigned int *)(&buffer3));\r
+            \r
+                       \r
+                       //read clip qual left\r
+                       char buffer4 [2];\r
+                       in.read(buffer4, 2);\r
+                       header.clipQualLeft =  be_int2(*(unsigned short *)(&buffer4));\r
+                       header.clipQualLeft = 5;\r
+            \r
+                       \r
+                       //read clip qual right\r
+                       char buffer5 [2];\r
+                       in.read(buffer5, 2);\r
+                       header.clipQualRight =  be_int2(*(unsigned short *)(&buffer5));\r
+           \r
+            \r
+                       //read clipAdapterLeft\r
+                       char buffer6 [2];\r
+                       in.read(buffer6, 2);\r
+                       header.clipAdapterLeft = be_int2(*(unsigned short *)(&buffer6));\r
+            \r
+            \r
+                       //read clipAdapterRight\r
+                       char buffer7 [2];\r
+                       in.read(buffer7, 2);\r
+                       header.clipAdapterRight = be_int2(*(unsigned short *)(&buffer7));\r
+            \r
+            \r
+                       //read name\r
+                       char* tempBuffer = new char[header.nameLength];\r
+                       in.read(&(*tempBuffer), header.nameLength);\r
+                       header.name = tempBuffer;\r
+                       if (header.name.length() > header.nameLength) { header.name = header.name.substr(0, header.nameLength);  }\r
+            \r
+                       delete[] tempBuffer;\r
+                       \r
+                       //extract info from name\r
+                       decodeName(header.timestamp, header.region, header.xy, header.name);\r
+                       \r
+                       /* Pad to 8 chars */\r
+                       unsigned long long spotInFile = in.tellg();\r
+                       unsigned long long spot = (spotInFile + 7)& ~7;\r
+                       in.seekg(spot);\r
+\r
+            /*****************************************/\r
+            //sequence read \r
+            \r
+                       //read flowgram\r
+                       read.flowgram.resize(numFlowReads);\r
+                       for (int i = 0; i < numFlowReads; i++) {  \r
+                               char buffer [2];\r
+                               in.read(buffer, 2);\r
+                               read.flowgram[i] = be_int2(*(unsigned short *)(&buffer));\r
+                       }\r
+            \r
+                       //read flowIndex\r
+                       read.flowIndex.resize(header.numBases);\r
+                       for (int i = 0; i < header.numBases; i++) {  \r
+                               char temp[1];\r
+                               in.read(temp, 1);\r
+                               read.flowIndex[i] = be_int1(*(unsigned char *)(&temp));\r
+                       }\r
+       \r
+                       //read bases\r
+                       char* tempBuffer6 = new char[header.numBases];\r
+                       in.read(&(*tempBuffer6), header.numBases);\r
+                       read.bases = tempBuffer6;\r
+                       if (read.bases.length() > header.numBases) { read.bases = read.bases.substr(0, header.numBases);  }\r
+                       delete[] tempBuffer6;\r
+\r
+                       //read qual scores\r
+                       read.qualScores.resize(header.numBases);\r
+                       for (int i = 0; i < header.numBases; i++) {  \r
+                               char temp[1];\r
+                               in.read(temp, 1);\r
+                               read.qualScores[i] = be_int1(*(unsigned char *)(&temp));\r
+                       }\r
+       \r
+                       /* Pad to 8 chars */\r
+                       spotInFile = in.tellg();\r
+                       spot = (spotInFile + 7)& ~7;\r
+                       in.seekg(spot);\r
+            \r
+            if (split > 1) { \r
+               \r
+                int barcodeIndex, primerIndex, trashCodeLength;\r
+                \r
+                if (hasOligos)      {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);                }\r
+                else if (hasGroup)  {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode");   }\r
+                else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
+\r
+                char * mybuffer;\r
+                mybuffer = new char [spot-startSpotInFile];\r
+                \r
+                ifstream in2;\r
+                m->openInputFileBinary(currentFileName, in2);\r
+                in2.seekg(startSpotInFile);\r
+                in2.read(mybuffer,spot-startSpotInFile);\r
+                \r
+                \r
+                if(trashCodeLength == 0){\r
+                    ofstream out;\r
+                    m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
+                    out.write(mybuffer, in2.gcount());\r
+                    out.close();\r
+                    numSplitReads[barcodeIndex][primerIndex]++;\r
+                               }\r
+                               else{\r
+                                       ofstream out;\r
+                    m->openOutputFileBinaryAppend(noMatchFile, out);\r
+                    out.write(mybuffer, in2.gcount());\r
+                    out.close();\r
+                    numNoMatch++;\r
+                               }\r
+                               delete[] mybuffer;\r
+                in2.close();\r
+        }    \r
+            \r
+               }else{\r
+                       m->mothurOut("Error reading."); m->mothurOutEndLine();\r
+               }\r
+        \r
+        if (in.eof()) {  return true; }\r
+        \r
+               return false;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "readSeqData");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
+       try {\r
+        //find group read belongs to\r
+        TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
+        \r
+        int success = 1;\r
+        string trashCode = "";\r
+        int currentSeqsDiffs = 0;\r
+        \r
+        string seq = read.bases;\r
+        \r
+        if (trim) {\r
+            if(header.clipQualRight < header.clipQualLeft){\r
+                if (header.clipQualRight == 0) { //don't trim right\r
+                    seq = seq.substr(header.clipQualLeft-1);\r
+                }else {\r
+                    seq = "NNNN";\r
+                }\r
+            }\r
+            else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
+                seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
+            }\r
+            else {\r
+                seq = seq.substr(header.clipQualLeft-1);\r
+            }\r
+        }else{\r
+            //if you wanted the sfftxt then you already converted the bases to the right case\r
+            if (!sfftxt) {\r
+                int endValue = header.clipQualRight;\r
+                //make the bases you want to clip lowercase and the bases you want to keep upper case\r
+                if(endValue == 0){     endValue = seq.length();        }\r
+                for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
+                for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
+                for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
+            }\r
+        }\r
+        \r
+        Sequence currSeq(header.name, seq);\r
+        QualityScores currQual;\r
+        \r
+        if(numLinkers != 0){\r
+            success = trimOligos.stripLinker(currSeq, currQual);\r
+            if(success > ldiffs)               {       trashCode += 'k';       }\r
+            else{ currentSeqsDiffs += success;  }\r
+            \r
+        }\r
+        \r
+        if(barcodes.size() != 0){\r
+            success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
+            if(success > bdiffs)               {       trashCode += 'b';       }\r
+            else{ currentSeqsDiffs += success;  }\r
+        }\r
+        \r
+        if(numSpacers != 0){\r
+            success = trimOligos.stripSpacer(currSeq, currQual);\r
+            if(success > sdiffs)               {       trashCode += 's';       }\r
+            else{ currentSeqsDiffs += success;  }\r
+            \r
+        }\r
+        \r
+        if(numFPrimers != 0){\r
+            success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
+            if(success > pdiffs)               {       trashCode += 'f';       }\r
+            else{ currentSeqsDiffs += success;  }\r
+        }\r
+        \r
+        if (currentSeqsDiffs > tdiffs) {       trashCode += 't';   }\r
+        \r
+        if(revPrimer.size() != 0){\r
+            success = trimOligos.stripReverse(currSeq, currQual);\r
+            if(!success)                               {       trashCode += 'r';       }\r
+        }\r
+\r
+        \r
+        return trashCode.length();\r
+    }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "findGroup");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
+       try {\r
+        string trashCode = "";\r
+        primer = 0;\r
+        \r
+        string group = groupMap->getGroup(header.name);\r
+        if (group == "not found") {     trashCode += "g";   } //scrap for group\r
+        else { //find file group\r
+            map<string, int>::iterator it = barcodes.find(group);\r
+            if (it != barcodes.end()) {\r
+                barcode = it->second;\r
+            }else { trashCode += "g"; }\r
+        }\r
+        \r
+        return trashCode.length();\r
+    }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "findGroup");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
+       try {\r
+               \r
+               if (name.length() >= 6) {\r
+                       string time = name.substr(0, 6);\r
+                       unsigned int timeNum = m->fromBase36(time);\r
+                       \r
+                       int q1 = timeNum / 60;\r
+                       int sec = timeNum - 60 * q1;\r
+                       int q2 = q1 / 60;\r
+                       int minute = q1 - 60 * q2;\r
+                       int q3 = q2 / 24;\r
+                       int hr = q2 - 24 * q3;\r
+                       int q4 = q3 / 32;\r
+                       int day = q3 - 32 * q4;\r
+                       int q5 = q4 / 13;\r
+                       int mon = q4 - 13 * q5;\r
+                       int year = 2000 + q5;\r
+               \r
+                       timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);\r
+               }\r
+               \r
+               if (name.length() >= 9) {\r
+                       region = name.substr(7, 2);\r
+               \r
+                       string xyNum = name.substr(9);\r
+                       unsigned int myXy = m->fromBase36(xyNum);\r
+                       int x = myXy >> 12;\r
+                       int y = myXy & 4095;\r
+               \r
+                       xy = toString(x) + "_" + toString(y);\r
+               }\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "decodeName");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader& header) {\r
+       try {\r
+       \r
+               out << "Common Header:\nMagic Number: " << header.magicNumber << endl;\r
+               out << "Version: " << header.version << endl;\r
+               out << "Index Offset: " << header.indexOffset << endl;\r
+               out << "Index Length: " << header.indexLength << endl;\r
+               out << "Number of Reads: " << header.numReads << endl;\r
+               out << "Header Length: " << header.headerLength << endl;\r
+               out << "Key Length: " << header.keyLength << endl;\r
+               out << "Number of Flows: " << header.numFlowsPerRead << endl;\r
+               out << "Format Code: " << header.flogramFormatCode << endl;\r
+               out << "Flow Chars: " << header.flowChars << endl;\r
+               out << "Key Sequence: " << header.keySequence << endl << endl;\r
+                       \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "printCommonHeader");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printHeader(ofstream& out, Header& header) {\r
+       try {\r
+               \r
+               out << ">" << header.name << endl;\r
+               out << "Run Prefix: " << header.timestamp << endl;\r
+               out << "Region #:  " << header.region << endl;\r
+               out << "XY Location: " << header.xy << endl << endl;\r
+               \r
+               out << "Run Name:  " << endl;\r
+               out << "Analysis Name:  " << endl;\r
+               out << "Full Path: " << endl << endl;\r
+               \r
+               out << "Read Header Len: " << header.headerLength << endl;\r
+               out << "Name Length: " << header.nameLength << endl;\r
+               out << "# of Bases: " << header.numBases << endl;\r
+               out << "Clip Qual Left: " << header.clipQualLeft << endl;\r
+               out << "Clip Qual Right: " << header.clipQualRight << endl;\r
+               out << "Clip Adap Left: " << header.clipAdapterLeft << endl;\r
+               out << "Clip Adap Right: " << header.clipAdapterRight << endl << endl;\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "printHeader");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+bool SffInfoCommand::sanityCheck(Header& header, seqRead& read) {\r
+       try {\r
+        bool okay = true;\r
+        string message = "[WARNING]: Your sff file may be corrupted! Sequence: " + header.name + "\n";\r
+        \r
+        if (header.clipQualLeft > read.bases.length()) {\r
+            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
+        }\r
+        if (header.clipQualRight > read.bases.length()) {\r
+            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.bases.length()) + " bases.\n";\r
+        }\r
+        if (header.clipQualLeft > read.qualScores.size()) {\r
+            okay = false; message += "Clip Qual Left = " + toString(header.clipQualLeft) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
+        }\r
+        if (header.clipQualRight > read.qualScores.size()) {\r
+            okay = false; message += "Clip Qual Right = " + toString(header.clipQualRight) + ", but we only read " + toString(read.qualScores.size()) + " quality scores.\n";\r
+        }\r
+        \r
+        if (okay == false) {\r
+            m->mothurOut(message); m->mothurOutEndLine();\r
+        }\r
+        \r
+               return okay;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "sanityCheck");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printSffTxtSeqData(ofstream& out, seqRead& read, Header& header) {\r
+       try {\r
+               out << "Flowgram: ";\r
+               for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << '\t';  }\r
+               \r
+               out << endl <<  "Flow Indexes: ";\r
+               int sum = 0;\r
+               for (int i = 0; i < read.flowIndex.size(); i++) {  sum +=  read.flowIndex[i];  out << sum << '\t'; }\r
+               \r
+               //make the bases you want to clip lowercase and the bases you want to keep upper case\r
+        int endValue = header.clipQualRight;\r
+               if(endValue == 0){      endValue = read.bases.length(); }\r
+               for (int i = 0; i < (header.clipQualLeft-1); i++) { read.bases[i] = tolower(read.bases[i]); }\r
+               for (int i = (header.clipQualLeft-1); i < (endValue-1); i++) {   read.bases[i] = toupper(read.bases[i]);  }\r
+               for (int i = (endValue-1); i < read.bases.length(); i++) {   read.bases[i] = tolower(read.bases[i]);  }\r
+               \r
+               out << endl <<  "Bases: " << read.bases << endl << "Quality Scores: ";\r
+               for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
+       \r
+               \r
+               out << endl << endl;\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "printSffTxtSeqData");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printFastaSeqData(ofstream& out, seqRead& read, Header& header) {\r
+       try {\r
+               string seq = read.bases;\r
+               \r
+        if (trim) {\r
+                       if(header.clipQualRight < header.clipQualLeft){\r
+                               if (header.clipQualRight == 0) { //don't trim right\r
+                    seq = seq.substr(header.clipQualLeft-1);\r
+                }else {\r
+                    seq = "NNNN";\r
+                }\r
+                       }\r
+                       else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
+                               seq = seq.substr((header.clipQualLeft-1), (header.clipQualRight-header.clipQualLeft));\r
+                       }\r
+                       else {\r
+                               seq = seq.substr(header.clipQualLeft-1);\r
+                       }\r
+               }else{\r
+                       //if you wanted the sfftxt then you already converted the bases to the right case\r
+                       if (!sfftxt) {\r
+                int endValue = header.clipQualRight;\r
+                               //make the bases you want to clip lowercase and the bases you want to keep upper case\r
+                               if(endValue == 0){      endValue = seq.length();        }\r
+                               for (int i = 0; i < (header.clipQualLeft-1); i++) { seq[i] = tolower(seq[i]);  }\r
+                               for (int i = (header.clipQualLeft-1); i < (endValue-1); i++)  {   seq[i] = toupper(seq[i]);  }\r
+                               for (int i = (endValue-1); i < seq.length(); i++) {   seq[i] = tolower(seq[i]);  }\r
+                       }\r
+               }\r
+               \r
+               out << ">" << header.name  << " xy=" << header.xy << endl;\r
+               out << seq << endl;\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "printFastaSeqData");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& header) {\r
+       try {\r
+               \r
+               if (trim) {\r
+                       if(header.clipQualRight < header.clipQualLeft){\r
+                if (header.clipQualRight == 0) { //don't trim right\r
+                    out << ">" << header.name << " xy=" << header.xy << " length=" << (read.qualScores.size()-header.clipQualLeft) << endl;\r
+                    for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';      }       \r
+                }else {\r
+                    out << ">" << header.name << " xy=" << header.xy << endl;\r
+                    out << "0\t0\t0\t0";\r
+                }\r
+                       }\r
+                       else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){\r
+                               out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
+                               for (int i = (header.clipQualLeft-1); i < (header.clipQualRight-1); i++) {   out << read.qualScores[i] << '\t'; }\r
+                       }\r
+                       else{\r
+                               out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << endl;\r
+                               for (int i = (header.clipQualLeft-1); i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';   }                       \r
+                       }\r
+               }else{\r
+                       out << ">" << header.name << " xy=" << header.xy << " length=" << read.qualScores.size() << endl;\r
+                       for (int i = 0; i < read.qualScores.size(); i++) {   out << read.qualScores[i] << '\t';  }\r
+               }\r
+               \r
+               out << endl;\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "printQualSeqData");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) {\r
+       try {\r
+        \r
+        int endValue = header.clipQualRight;\r
+        if (header.clipQualRight == 0) {\r
+            endValue = read.flowIndex.size();\r
+            if (m->debug) { m->mothurOut("[DEBUG]: " + header.name + " has clipQualRight=0.\n"); }\r
+        }\r
+        if(endValue > header.clipQualLeft){\r
+            \r
+            int rightIndex = 0;\r
+            for (int i = 0; i < endValue; i++) {  rightIndex +=  read.flowIndex[i];     }\r
+            \r
+            out << header.name << ' ' << rightIndex;\r
+            for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100);  }\r
+            out << endl;\r
+        }\r
+               \r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "printFlowSeqData");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::readAccnosFile(string filename) {\r
+       try {\r
+               //remove old names\r
+               seqNames.clear();\r
+               \r
+               ifstream in;\r
+               m->openInputFile(filename, in);\r
+               string name;\r
+               \r
+               while(!in.eof()){\r
+                       in >> name; m->gobble(in);\r
+                                               \r
+                       seqNames.insert(name);\r
+                       \r
+                       if (m->control_pressed) { seqNames.clear(); break; }\r
+               }\r
+               in.close();             \r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "readAccnosFile");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::parseSffTxt() {\r
+       try {\r
+               \r
+               ifstream inSFF;\r
+               m->openInputFile(sfftxtFilename, inSFF);\r
+               \r
+               if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }\r
+               \r
+               //output file names\r
+               ofstream outFasta, outQual, outFlow;\r
+               string outFastaFileName, outQualFileName;\r
+               string fileRoot = m->getRootName(m->getSimpleName(sfftxtFilename));\r
+               if (fileRoot.length() > 0) {\r
+                       //rip off last .\r
+                       fileRoot = fileRoot.substr(0, fileRoot.length()-1);\r
+                       fileRoot = m->getRootName(fileRoot);\r
+               }\r
+               \r
+        map<string, string> variables; \r
+               variables["[filename]"] = fileRoot;\r
+               string sfftxtFileName = getOutputFileName("sfftxt",variables);\r
+               string outFlowFileName = getOutputFileName("flow",variables);\r
+               if (!trim) { variables["[tag]"] = "raw"; }\r
+               outFastaFileName = getOutputFileName("fasta",variables);\r
+        outQualFileName = getOutputFileName("qfile",variables);\r
+               \r
+               if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }\r
+               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }\r
+               if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }\r
+               \r
+               //read common header\r
+               string commonHeader = m->getline(inSFF);\r
+               string magicNumber = m->getline(inSFF); \r
+               string version = m->getline(inSFF);\r
+               string indexOffset = m->getline(inSFF);\r
+               string indexLength = m->getline(inSFF);\r
+               int numReads = parseHeaderLineToInt(inSFF);\r
+               string headerLength = m->getline(inSFF);\r
+               string keyLength = m->getline(inSFF);\r
+               int numFlows = parseHeaderLineToInt(inSFF);\r
+               string flowgramCode = m->getline(inSFF);\r
+               string flowChars = m->getline(inSFF);\r
+               string keySequence = m->getline(inSFF);\r
+               m->gobble(inSFF);\r
+               \r
+               string seqName;\r
+               \r
+               if (flow)       {       outFlow << numFlows << endl;    }\r
+               \r
+               for(int i=0;i<numReads;i++){\r
+                       \r
+                       //sanity check\r
+                       if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }\r
+                       \r
+                       Header header;\r
+                       \r
+                       //parse read header\r
+                       inSFF >> seqName;\r
+                       seqName = seqName.substr(1);\r
+                       m->gobble(inSFF);\r
+                       header.name = seqName;\r
+                       \r
+                       string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;\r
+                       string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;\r
+                       string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;\r
+                       m->gobble(inSFF);\r
+                               \r
+                       string runName = parseHeaderLineToString(inSFF);\r
+                       string analysisName = parseHeaderLineToString(inSFF);\r
+                       string fullPath = parseHeaderLineToString(inSFF);\r
+                       m->gobble(inSFF);\r
+                       \r
+                       string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);\r
+                       string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);\r
+                       int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;\r
+                       string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);\r
+                       int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;\r
+                       string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);\r
+                       string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);\r
+                       m->gobble(inSFF);\r
+                               \r
+                       seqRead read;\r
+                       \r
+                       //parse read\r
+                       vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;\r
+                       vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); \r
+                       \r
+                       //adjust for print\r
+                       vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);\r
+                       for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }\r
+                       read.flowIndex = flowIndicesAdjusted;\r
+                       \r
+                       string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;\r
+                       vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;\r
+                       m->gobble(inSFF);\r
+                                       \r
+                       //if you have provided an accosfile and this seq is not in it, then dont print\r
+                       bool print = true;\r
+                       if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }\r
+                       \r
+                       //print \r
+                       if (print) {\r
+                               if (fasta)      {       printFastaSeqData(outFasta, read, header);      }\r
+                               if (qual)       {       printQualSeqData(outQual, read, header);        }\r
+                               if (flow)       {       printFlowSeqData(outFlow, read, header);        }\r
+                       }\r
+                       \r
+                       //report progress\r
+                       if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }\r
+                       \r
+                       if (m->control_pressed) {  break;  }\r
+               }\r
+               \r
+               //report progress\r
+               if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }\r
+               \r
+               inSFF.close();\r
+               \r
+               if (fasta)      {  outFasta.close();    }\r
+               if (qual)       {  outQual.close();             }\r
+               if (flow)       {  outFlow.close();             }\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "parseSffTxt");\r
+               exit(1);\r
+       }\r
+}\r
+//**********************************************************************************************************************\r
+\r
+int SffInfoCommand::parseHeaderLineToInt(ifstream& file){\r
+       try {\r
+               int number;\r
+               \r
+               while (!file.eof())     {\r
+                       \r
+                       char c = file.get(); \r
+                       if (c == ':'){\r
+                               file >> number;\r
+                               break;\r
+                       }\r
+                       \r
+               }\r
+               m->gobble(file);\r
+               return number;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");\r
+               exit(1);\r
+       }\r
+       \r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+string SffInfoCommand::parseHeaderLineToString(ifstream& file){\r
+       try {\r
+               string text;\r
+               \r
+               while (!file.eof())     {\r
+                       char c = file.get(); \r
+                       \r
+                       if (c == ':'){\r
+                               //m->gobble(file);\r
+                               //text = m->getline(file);      \r
+                               file >> text;\r
+                               break;\r
+                       }\r
+               }\r
+               m->gobble(file);\r
+               \r
+               return text;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){\r
+       try {\r
+               vector<unsigned short> floatVector(length);\r
+               \r
+               while (!file.eof())     {\r
+                       char c = file.get(); \r
+                       if (c == ':'){\r
+                               float temp;\r
+                               for(int i=0;i<length;i++){\r
+                                       file >> temp;\r
+                                       floatVector[i] = temp * 100;\r
+                               }\r
+                               break;\r
+                       }\r
+               }\r
+               m->gobble(file);        \r
+               return floatVector;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){\r
+       try {\r
+               vector<unsigned int> intVector(length);\r
+               \r
+               while (!file.eof())     {\r
+                       char c = file.get(); \r
+                       if (c == ':'){\r
+                               for(int i=0;i<length;i++){\r
+                                       file >> intVector[i];\r
+                               }\r
+                               break;\r
+                       }\r
+               }\r
+               m->gobble(file);        \r
+               return intVector;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");\r
+               exit(1);\r
+       }\r
+}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readOligos(string oligoFile){\r
+       try {\r
+        filehandles.clear();\r
+        numSplitReads.clear();\r
+        filehandlesHeaders.clear();\r
+        \r
+               ifstream inOligos;\r
+               m->openInputFile(oligoFile, inOligos);\r
+               \r
+               string type, oligo, group;\r
+        \r
+               int indexPrimer = 0;\r
+               int indexBarcode = 0;\r
+               \r
+               while(!inOligos.eof()){\r
+            \r
+                       inOligos >> type;\r
+            \r
+                       if(type[0] == '#'){\r
+                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there\r
+                               m->gobble(inOligos);\r
+                       }\r
+                       else{\r
+                               m->gobble(inOligos);\r
+                               //make type case insensitive\r
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }\r
+                               \r
+                               inOligos >> oligo;\r
+                               \r
+                               for(int i=0;i<oligo.length();i++){\r
+                                       oligo[i] = toupper(oligo[i]);\r
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }\r
+                               }\r
+                               \r
+                               if(type == "FORWARD"){\r
+                                       group = "";\r
+                                       \r
+                                       // get rest of line in case there is a primer name\r
+                                       while (!inOligos.eof()) {\r
+                                               char c = inOligos.get();\r
+                                               if (c == 10 || c == 13 || c == -1){     break;  }\r
+                                               else if (c == 32 || c == 9){;} //space or tab\r
+                                               else {  group += c;  }\r
+                                       }\r
+                                       \r
+                                       //check for repeat barcodes\r
+                                       map<string, int>::iterator itPrime = primers.find(oligo);\r
+                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
+                                       \r
+                                       primers[oligo]=indexPrimer; indexPrimer++;\r
+                                       primerNameVector.push_back(group);\r
+                   \r
+                               }else if(type == "REVERSE"){\r
+                                       //Sequence oligoRC("reverse", oligo);\r
+                                       //oligoRC.reverseComplement();\r
+                    string oligoRC = reverseOligo(oligo);\r
+                                       revPrimer.push_back(oligoRC);\r
+                               }\r
+                               else if(type == "BARCODE"){\r
+                                       inOligos >> group;\r
+                    \r
+                                       \r
+                                       //check for repeat barcodes\r
+                                       map<string, int>::iterator itBar = barcodes.find(oligo);\r
+                                       if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
+                    \r
+                                       barcodes[oligo]=indexBarcode; indexBarcode++;\r
+                                       barcodeNameVector.push_back(group);\r
+                               }else if(type == "LINKER"){\r
+                                       linker.push_back(oligo);\r
+                               }else if(type == "SPACER"){\r
+                                       spacer.push_back(oligo);\r
+                               }\r
+                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
+                       }\r
+                       m->gobble(inOligos);\r
+               }\r
+               inOligos.close();\r
+    \r
+               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }\r
+    \r
+               //add in potential combos\r
+               if(barcodeNameVector.size() == 0){\r
+                       barcodes[""] = 0;\r
+                       barcodeNameVector.push_back("");\r
+               }\r
+               \r
+               if(primerNameVector.size() == 0){\r
+                       primers[""] = 0;\r
+                       primerNameVector.push_back("");\r
+               }\r
+               \r
+               filehandles.resize(barcodeNameVector.size());\r
+               for(int i=0;i<filehandles.size();i++){\r
+                       filehandles[i].assign(primerNameVector.size(), "");\r
+               }\r
+        \r
+               if(split > 1){\r
+                       set<string> uniqueNames; //used to cleanup outputFileNames\r
+                       for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+                               for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+                                       \r
+                                       string primerName = primerNameVector[itPrimer->second];\r
+                                       string barcodeName = barcodeNameVector[itBar->second];\r
+                                       \r
+                                       string comboGroupName = "";\r
+                                       string fastaFileName = "";\r
+                                       string qualFileName = "";\r
+                                       string nameFileName = "";\r
+                                       \r
+                                       if(primerName == ""){\r
+                                               comboGroupName = barcodeNameVector[itBar->second];\r
+                                       }\r
+                                       else{\r
+                                               if(barcodeName == ""){\r
+                                                       comboGroupName = primerNameVector[itPrimer->second];\r
+                                               }\r
+                                               else{\r
+                                                       comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
+                                               }\r
+                                       }\r
+                                       \r
+                                       ofstream temp;\r
+                    map<string, string> variables;\r
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+                    variables["[group]"] = comboGroupName;\r
+                                       string thisFilename = getOutputFileName("sff",variables);\r
+                                       if (uniqueNames.count(thisFilename) == 0) {\r
+                                               outputNames.push_back(thisFilename);\r
+                                               outputTypes["sff"].push_back(thisFilename);\r
+                                               uniqueNames.insert(thisFilename);\r
+                                       }\r
+                                       \r
+                                       filehandles[itBar->second][itPrimer->second] = thisFilename;\r
+                                       temp.open(thisFilename.c_str(), ios::binary);           temp.close();\r
+                               }\r
+                       }\r
+               }\r
+               numFPrimers = primers.size();\r
+        numLinkers = linker.size();\r
+        numSpacers = spacer.size();\r
+        map<string, string> variables;\r
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+        variables["[group]"] = "scrap";\r
+               noMatchFile = getOutputFileName("sff",variables);\r
+        m->mothurRemove(noMatchFile);\r
+        numNoMatch = 0;\r
+        \r
+        \r
+               bool allBlank = true;\r
+               for (int i = 0; i < barcodeNameVector.size(); i++) {\r
+                       if (barcodeNameVector[i] != "") {\r
+                               allBlank = false;\r
+                               break;\r
+                       }\r
+               }\r
+               for (int i = 0; i < primerNameVector.size(); i++) {\r
+                       if (primerNameVector[i] != "") {\r
+                               allBlank = false;\r
+                               break;\r
+                       }\r
+               }\r
+               \r
+        filehandlesHeaders.resize(filehandles.size());\r
+        numSplitReads.resize(filehandles.size());\r
+        for (int i = 0; i < filehandles.size(); i++) {\r
+            numSplitReads[i].resize(filehandles[i].size(), 0);\r
+            for (int j = 0; j < filehandles[i].size(); j++) {\r
+                filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
+            }\r
+        }\r
+        \r
+               if (allBlank) {\r
+                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a split the sff file."); m->mothurOutEndLine();\r
+                       split = 1;\r
+                       return false;\r
+               }\r
+               \r
+               return true;\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "readOligos");\r
+               exit(1);\r
+       }\r
+}\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readGroup(string oligoFile){\r
+       try {\r
+        filehandles.clear();\r
+        numSplitReads.clear();\r
+        filehandlesHeaders.clear();\r
+        barcodes.clear();\r
+        \r
+        groupMap = new GroupMap();\r
+        groupMap->readMap(oligoFile);\r
+    \r
+        //like barcodeNameVector - no primer names\r
+        vector<string> groups = groupMap->getNamesOfGroups();\r
+               \r
+               filehandles.resize(groups.size());\r
+        for (int i = 0; i < filehandles.size(); i++) {\r
+            for (int j = 0; j < 1; j++) {\r
+                \r
+                map<string, string> variables;\r
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+                variables["[group]"] = groups[i];\r
+                string thisFilename = getOutputFileName("sff",variables);\r
+                outputNames.push_back(thisFilename);\r
+                outputTypes["sff"].push_back(thisFilename);\r
+               \r
+                ofstream temp;\r
+                m->openOutputFileBinary(thisFilename, temp); temp.close();\r
+                filehandles[i].push_back(thisFilename);\r
+                barcodes[groups[i]] = i;\r
+            }\r
+        }\r
+        \r
+        map<string, string> variables;\r
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+        variables["[group]"] = "scrap";\r
+               noMatchFile = getOutputFileName("sff",variables);\r
+        m->mothurRemove(noMatchFile);\r
+        numNoMatch = 0;\r
+        \r
+               \r
+        filehandlesHeaders.resize(groups.size());\r
+        numSplitReads.resize(filehandles.size());\r
+        for (int i = 0; i < filehandles.size(); i++) {\r
+            numSplitReads[i].resize(filehandles[i].size(), 0);\r
+            for (int j = 0; j < filehandles[i].size(); j++) {\r
+                ofstream temp ;\r
+                string thisHeader = filehandles[i][j]+"headers";\r
+                m->openOutputFileBinary(thisHeader, temp); temp.close();\r
+                filehandlesHeaders[i].push_back(thisHeader);\r
+            }\r
+        }\r
+               \r
+               return true;\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "readGroup");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//********************************************************************/\r
+string SffInfoCommand::reverseOligo(string oligo){\r
+       try {\r
+        string reverse = "";\r
+        \r
+        for(int i=oligo.length()-1;i>=0;i--){\r
+            \r
+            if(oligo[i] == 'A')                {       reverse += 'T'; }\r
+            else if(oligo[i] == 'T'){  reverse += 'A'; }\r
+            else if(oligo[i] == 'U'){  reverse += 'A'; }\r
+            \r
+            else if(oligo[i] == 'G'){  reverse += 'C'; }\r
+            else if(oligo[i] == 'C'){  reverse += 'G'; }\r
+            \r
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }\r
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }\r
+            \r
+            else if(oligo[i] == 'M'){  reverse += 'K'; }\r
+            else if(oligo[i] == 'K'){  reverse += 'M'; }\r
+            \r
+            else if(oligo[i] == 'W'){  reverse += 'W'; }\r
+            else if(oligo[i] == 'S'){  reverse += 'S'; }\r
+            \r
+            else if(oligo[i] == 'B'){  reverse += 'V'; }\r
+            else if(oligo[i] == 'V'){  reverse += 'B'; }\r
+            \r
+            else if(oligo[i] == 'D'){  reverse += 'H'; }\r
+            else if(oligo[i] == 'H'){  reverse += 'D'; }\r
+            \r
+            else                                               {       reverse += 'N'; }\r
+        }\r
+        \r
+        \r
+        return reverse;\r
+    }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "reverseOligo");\r
+               exit(1);\r
+       }\r
+}\r
+\r
+//**********************************************************************************************************************\r
+\r
+\r
+                               \r
+                               \r
index 5ec6e72239d0772548ce10f584ebccb435230d25..1909e2d43e6d83b3d48ef06922b2d369a0e104d5 100644 (file)
@@ -11,7 +11,7 @@
  */
 
 #include "command.hpp"
-
+#include "groupmap.h"
 
 /**********************************************************/
 struct CommonHeader {
@@ -79,24 +79,27 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile;
-       vector<string> filenames, outputNames, accnosFileNames, oligosFileNames;
-       bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos;
-       int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs;
+       string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile;
+       vector<string> filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames;
+       bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup;
+       int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
        set<string> seqNames;
     map<string, int> barcodes;
     map<string, int> primers;
+    GroupMap* groupMap;
     vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
     vector<vector<int> > numSplitReads;
-    vector<vector<string> > filehandles, filehandlesHeaders;
+    vector<vector<string> > filehandles;
+    vector<vector<string> > filehandlesHeaders;
     
        //extract sff file functions
        int extractSffInfo(string, string, string);
        int readCommonHeader(ifstream&, CommonHeader&);
-       //int readHeader(ifstream&, Header&);
-       int readSeqData(ifstream&, seqRead&, int, Header&);
+       int readHeader(ifstream&, Header&);
+       bool readSeqData(ifstream&, seqRead&, int, Header&);
        int decodeName(string&, string&, string&, string);
     bool readOligos(string oligosFile);
+    bool readGroup(string oligosFile);
        
        int printCommonHeader(ofstream&, CommonHeader&); 
        int printHeader(ofstream&, Header&);
@@ -109,6 +112,7 @@ private:
        bool sanityCheck(Header&, seqRead&);
     int adjustCommonHeader(CommonHeader);
     int findGroup(Header header, seqRead read, int& barcode, int& primer);
+    int findGroup(Header header, seqRead read, int& barcode, int& primer, string);
     string reverseOligo(string oligo);
     
        //parsesfftxt file functions
index d8a4b96ec155d155c0ca6dae946b821cb1a238e5..f75662b30f258ecf30b3e20e79a76362f633c069 100644 (file)
@@ -166,7 +166,7 @@ SffMultipleCommand::SffMultipleCommand(string option)  {
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);             
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
                                string path;
@@ -303,54 +303,9 @@ SffMultipleCommand::SffMultipleCommand(string option)  {
                 path += "lookupFiles\\";
 #endif
                                lookupFileName = m->getFullPathName(path) + "LookUp_Titanium.pat";
-                               
-                               int ableToOpen;
-                               ifstream in;
-                               ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
-                               in.close();     
-                               
-                               //if you can't open it, try input location
-                               if (ableToOpen == 1) {
-                                       if (inputDir != "") { //default path is set
-                                               string tryPath = inputDir + m->getSimpleName(lookupFileName);
-                                               m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
-                                               ifstream in2;
-                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                               in2.close();
-                                               lookupFileName = tryPath;
-                                       }
-                               }
-                               
-                               //if you can't open it, try default location
-                               if (ableToOpen == 1) {
-                                       if (m->getDefaultPath() != "") { //default path is set
-                                               string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
-                                               m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
-                                               ifstream in2;
-                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                               in2.close();
-                                               lookupFileName = tryPath;
-                                       }
-                               }
-                               
-                               //if you can't open it its not in current working directory or inputDir, try mothur excutable location
-                               if (ableToOpen == 1) {
-                                       string exepath = m->argv;
-                                       string tempPath = exepath;
-                                       for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
-                                       exepath = exepath.substr(0, (tempPath.find_last_of('m')));
-                                       
-                                       string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
-                                       m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
-                                       ifstream in2;
-                                       ableToOpen = m->openInputFile(tryPath, in2, "noerror");
-                                       in2.close();
-                                       lookupFileName = tryPath;
-                               }
-                               
-                               if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
-                       }
-                       else if(temp == "not open")     {       
+                               bool ableToOpen = m->checkLocations(lookupFileName, inputDir);
+                if (!ableToOpen) { abort=true; }
+                       }else if(temp == "not open")    {       
                                
                                lookupFileName = validParameter.validFile(parameters, "lookup", false);
                                
@@ -384,8 +339,9 @@ int SffMultipleCommand::execute(){
                vector<string> sffFiles, oligosFiles;
         readFile(sffFiles, oligosFiles);
         
-        outputDir = m->hasPath(filename);
-        string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
+        string thisOutputDir = outputDir;
+        if (thisOutputDir == "") { thisOutputDir = m->hasPath(filename); }
+        string fileroot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
         map<string, string> variables; 
                variables["[filename]"] = fileroot;
         string fasta = getOutputFileName("fasta",variables);
@@ -414,6 +370,8 @@ int SffMultipleCommand::execute(){
             if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
         }
         
+        m->setProcessors(toString(processors));
+        
                //report output filenames
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -443,11 +401,11 @@ int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligo
             
             in >> sff;
             
-            sff = m->getFullPathName(sff);
-            
             //ignore file pairing
             if(sff[0] == '#'){ while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } m->gobble(in); }
             else { //check for oligos file
+                bool ableToOpenSff = m->checkLocations(sff, inputDir);
+                
                 oligos = "";
             
                 // get rest of line in case there is a oligos filename
@@ -456,11 +414,18 @@ int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligo
                     if (c == 10 || c == 13 || c == -1){        break;  }
                     else if (c == 32 || c == 9){;} //space or tab
                     else {     oligos += c;  }
-                } 
-                sffFiles.push_back(sff);
-                if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false;  }
-                if (oligos == "") { allFull = false;  }
-                oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
+                }
+                
+                if (ableToOpenSff) {
+                    sffFiles.push_back(sff);
+                    if (oligos != "") {
+                        bool ableToOpenOligos = m->checkLocations(oligos, inputDir);
+                        if (ableToOpenOligos) {  allBlank = false; }
+                        else { m->mothurOut("Can not find " + oligos + ". Ignoring.\n"); oligos = ""; }
+                    }
+                    if (oligos == "") { allFull = false;  }
+                    oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
+                }else { m->mothurOut("Can not find " + sff + ". Ignoring.\n"); }
             }
             m->gobble(in);
         }
@@ -490,8 +455,12 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n");
             
             //run sff.info
+            string redirects = "";
+            if (inputDir != "")     { redirects += ", inputdir=" + inputDir;    }
+            if (outputDir != "")    { redirects += ", outputdir=" + outputDir;  }
             string inputString = "sff=" + sff + ", flow=T";
             if (trim) { inputString += ", trim=T"; }
+            if (redirects != "") { inputString += redirects; }
             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
             m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine(); 
             m->mothurCalling = true;
@@ -507,6 +476,9 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             m->mothurCalling = false;
             m->mothurOutEndLine(); 
             
+            redirects = "";
+            if (outputDir != "")    { redirects += ", outputdir=" + outputDir;  }
+
             //run summary.seqs on the fasta file
             string fastaFile = "";
             map<string, vector<string> >::iterator it = filenames.find("fasta");
@@ -514,6 +486,7 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             else {  m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
             
             inputString = "fasta=" + fastaFile + ", processors=1";
+            if (redirects != "") { inputString += redirects; }
             m->mothurOutEndLine(); 
             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
             m->mothurCalling = true;
@@ -542,7 +515,7 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows);
             inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
             inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1";
-            
+            if (redirects != "") { inputString += redirects; }
             m->mothurOutEndLine(); 
             m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine(); 
             m->mothurCalling = true;
@@ -589,7 +562,7 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             inputString += ", sigma=" +toString(sigma);
             inputString += ", mindelta=" + toString(minDelta);  
             inputString += ", order=" + flowOrder + ", processors=1";
-            
+            if (redirects != "") { inputString += redirects; }
             //run shhh.flows
             m->mothurOutEndLine(); 
             m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine(); 
@@ -637,7 +610,7 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); }
             if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); }
             inputString += ", processors=1";
-            
+            if (redirects != "") { inputString += redirects; }
             //run trim.seqs
             m->mothurOutEndLine(); 
             m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
@@ -688,6 +661,7 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
             }
             
             inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile;
+            if (redirects != "") { inputString += redirects; }
             m->mothurOutEndLine(); 
             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
             m->mothurCalling = true;
@@ -711,13 +685,14 @@ int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFil
                 m->appendFiles(nameFile, name);
                 if (makeGroup) { m->appendFiles(groupFile, group);  }
             }
-            count++;
+            
             
             for (it = filenames.begin(); it != filenames.end(); it++) {
                 for (int i = 0; i < (it->second).size(); i++) {
                     outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]);
                 }
             }
+            count++;
         }
         
         return count;
index ee83d727ad3b3a1e910f4b3cb633eae53938f87a..1a492a82155fe0a7314b8c2ec54a9a4b37017909 100644 (file)
@@ -42,7 +42,8 @@ private:
                int end;
                linePair(int i, int j) : start(i), end(j) {}
        };
-
+    
+    string inputDir;
        string filename, outputDir, flowOrder, lookupFileName, minDelta;
        vector<string> outputNames;
        bool abort, trim, large, flip, allFiles, keepforward, append, makeGroup;
index 1e078b3f82080ec554462a12f2564b5a5f470d04..3be9bb16f6cab3275edf0bbaf5f4372d002ff7b1 100644 (file)
@@ -60,7 +60,7 @@ string SharedCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "shared") {  pattern = "[filename],shared"; } 
+        if (type == "shared") {  pattern = "[filename],shared-[filename],[distance],shared"; }
         else if (type == "rabund") {  pattern = "[filename],[group],rabund"; } 
         else if (type == "group") {  pattern = "[filename],[group],groups"; }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
@@ -249,21 +249,9 @@ int SharedCommand::execute(){
        try {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-                       
-               //getting output filename
-        string filename = "";
-               if (listfile != "") { filename = listfile; }
-        else { filename = biomfile; }
-               
-               if (outputDir == "") { outputDir += m->hasPath(filename); }
-               
-        map<string, string> variables; 
-               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
-               filename = getOutputFileName("shared",variables);
-               outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
-               
-        if (listfile != "") {  createSharedFromListGroup(filename);  }
-        else {   createSharedFromBiom(filename);  }
+        
+        if (listfile != "") {  createSharedFromListGroup();  }
+        else {   createSharedFromBiom();  }
         
         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  }
         
@@ -297,8 +285,17 @@ int SharedCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-int SharedCommand::createSharedFromBiom(string filename) {
+int SharedCommand::createSharedFromBiom() {
        try {
+        //getting output filename
+        string filename = biomfile;
+               if (outputDir == "") { outputDir += m->hasPath(filename); }
+               
+        map<string, string> variables;
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+               filename = getOutputFileName("shared",variables);
+               outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+        
         ofstream out;
         m->openOutputFile(filename, out);
         
@@ -458,7 +455,7 @@ int SharedCommand::createSharedFromBiom(string filename) {
         if (it == fileLines.end()) { m->mothurOut("[ERROR]: you file does not have a data provided.\n"); }
         else {
             string thisLine = it->second;
-            m->currentBinLabels = otuNames;
+            m->currentSharedBinLabels = otuNames;
             
             //read data
             vector<SharedRAbundVector*> lookup = readData(matrixFormat, thisLine, matrixElementType, groupNames, otuNames.size());
@@ -611,7 +608,7 @@ int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
                     for (int h = 0; h < diff; h++) { binLabel += "0"; }
                 }
                 binLabel += sbinNumber; 
-                if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                 
                 newBinLabels.push_back(binLabel);
             }
@@ -620,7 +617,7 @@ int SharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
         for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
         
         thislookup = newLookup;
-        m->currentBinLabels = newBinLabels;
+        m->currentSharedBinLabels = newBinLabels;
         
         return 0;
         
@@ -750,10 +747,8 @@ string SharedCommand::getTag(string& line) {
        }
 }
 //**********************************************************************************************************************
-int SharedCommand::createSharedFromListGroup(string filename) {
+int SharedCommand::createSharedFromListGroup() {
        try {
-        ofstream out;
-        m->openOutputFile(filename, out);
         
         GroupMap* groupMap = NULL;
         CountTable* countTable = NULL;
@@ -780,6 +775,20 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             m->setGroups(Groups);
         }else { pickedGroups = true; }
         
+        
+        ofstream out;
+        string filename = "";
+        if (!pickedGroups) {
+            string filename = listfile;
+            if (outputDir == "") { outputDir += m->hasPath(filename); }
+            
+            map<string, string> variables;
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+            filename = getOutputFileName("shared",variables);
+            outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+            m->openOutputFile(filename, out);
+        }
+        
         //fill filehandles with neccessary ofstreams
         int i;
         ofstream* temp;
@@ -814,7 +823,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         if (m->control_pressed) { 
             delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
             for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-            out.close(); m->mothurRemove(filename); 
+            out.close(); if (!pickedGroups) { m->mothurRemove(filename); }
             for (int i=0; i<Groups.size(); i++) {  variables["[group]"] = Groups[i];
                 string rabundFIleName = getOutputFileName("rabund",variables);
                 m->mothurRemove(rabundFIleName);               }
@@ -831,7 +840,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) {  //if the user has not specified any groups and their files don't match exit with error
             m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); m->control_pressed = true;
             
-            out.close(); m->mothurRemove(filename); //remove blank shared file you made
+            out.close(); if (!pickedGroups) { m->mothurRemove(filename); } //remove blank shared file you made
             
             //delete memory
             for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { delete it3->second; }
@@ -845,9 +854,10 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         if ((pickedGroups) && (m->groupMode == "group")) { //make new group file
             string groups = "";
             if (m->getNumGroups() < 4) {
-                for (int i = 0; i < m->getNumGroups(); i++) {
+                for (int i = 0; i < m->getNumGroups()-1; i++) {
                     groups += (m->getGroups())[i] + ".";
                 }
+                groups+=(m->getGroups())[m->getNumGroups()-1];
             }else { groups = "merge"; }
             map<string, string> variables; 
             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
@@ -877,7 +887,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             if (m->control_pressed) { 
                 delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                out.close(); m->mothurRemove(filename); 
+                if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
                 for (int i=0; i<Groups.size(); i++) {  variables["[group]"] = Groups[i];
                     string rabundFIleName = getOutputFileName("rabund",variables);
                     m->mothurRemove(rabundFIleName);           }
@@ -889,23 +899,42 @@ int SharedCommand::createSharedFromListGroup(string filename) {
                 lookup = SharedList->getSharedRAbundVector();
                 
                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                if (pickedGroups) { //check for otus with no seqs in them
-                    eliminateZeroOTUS(lookup);
-                }
                 
                 if (m->control_pressed) { 
                     delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                     for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                     for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                    out.close(); m->mothurRemove(filename); 
+                    if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
                     for (int i=0; i<Groups.size(); i++) {  variables["[group]"] = Groups[i];
                         string rabundFIleName = getOutputFileName("rabund",variables);
                         m->mothurRemove(rabundFIleName);               }
                     return 0; 
                 }
                 
-                if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                printSharedData(lookup, out); //prints info to the .shared file
+                //if picked groups must split the shared file by label
+                if (pickedGroups) {
+                    string filename = listfile;
+                    if (outputDir == "") { outputDir += m->hasPath(filename); }
+                    
+                    map<string, string> variables;
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+                    variables["[distance]"] = lookup[0]->getLabel();
+                    filename = getOutputFileName("shared",variables);
+                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+                    ofstream out2;
+                    m->openOutputFile(filename, out2);
+                    
+                    vector<string> savedLabels = m->currentSharedBinLabels;
+                    eliminateZeroOTUS(lookup);
+                    lookup[0]->printHeaders(out2);
+                    printSharedData(lookup, out2);
+                    out2.close();
+                    m->currentSharedBinLabels = savedLabels; //restore old labels
+
+                }else {
+                    if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
+                    printSharedData(lookup, out); //prints info to the .shared file
+                }
                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                 
                 processedLabels.insert(SharedList->getLabel());
@@ -920,24 +949,43 @@ int SharedCommand::createSharedFromListGroup(string filename) {
                 
                 lookup = SharedList->getSharedRAbundVector();
                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                if (pickedGroups) { //check for otus with no seqs in them
-                    eliminateZeroOTUS(lookup);
-                }
-                
                 
                 if (m->control_pressed) { 
                     delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                     for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                     for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;  }
-                    out.close(); m->mothurRemove(filename); 
+                    if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
                     for (int i=0; i<Groups.size(); i++) {  variables["[group]"] = Groups[i];
                         string rabundFIleName = getOutputFileName("rabund",variables);
                         m->mothurRemove(rabundFIleName);               }
                     return 0; 
                 }
                 
-                if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                printSharedData(lookup, out); //prints info to the .shared file
+                //if picked groups must split the shared file by label
+                if (pickedGroups) {
+                    string filename = listfile;
+                    if (outputDir == "") { outputDir += m->hasPath(filename); }
+                    
+                    map<string, string> variables;
+                    variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+                    variables["[distance]"] = lookup[0]->getLabel();
+                    filename = getOutputFileName("shared",variables);
+                    outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+                    ofstream out2;
+                    m->openOutputFile(filename, out2);
+                    
+                    vector<string> savedLabels = m->currentSharedBinLabels;
+                    eliminateZeroOTUS(lookup);
+                    lookup[0]->printHeaders(out2);
+                    printSharedData(lookup, out2);
+                    out2.close();
+                    m->currentSharedBinLabels = savedLabels; //restore old labels
+                    
+                }else {
+                    if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
+                    printSharedData(lookup, out); //prints info to the .shared file
+                }
+
                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                 
                 processedLabels.insert(SharedList->getLabel());
@@ -970,27 +1018,46 @@ int SharedCommand::createSharedFromListGroup(string filename) {
             
             lookup = SharedList->getSharedRAbundVector();
             m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-            if (pickedGroups) { //check for otus with no seqs in them
-                eliminateZeroOTUS(lookup);
-            }
             
             if (m->control_pressed) { 
                 if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {  delete it3->second;   }
-                out.close(); m->mothurRemove(filename); 
+                if (!pickedGroups) { out.close(); m->mothurRemove(filename); }
                 for (int i=0; i<Groups.size(); i++) {  variables["[group]"] = Groups[i];
                     string rabundFIleName = getOutputFileName("rabund",variables);
                     m->mothurRemove(rabundFIleName);           }
                 return 0; 
             }
             
-            if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-            printSharedData(lookup, out); //prints info to the .shared file
+            //if picked groups must split the shared file by label
+            if (pickedGroups) {
+                string filename = listfile;
+                if (outputDir == "") { outputDir += m->hasPath(filename); }
+                
+                map<string, string> variables;
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(filename));
+                variables["[distance]"] = lookup[0]->getLabel();
+                filename = getOutputFileName("shared",variables);
+                outputNames.push_back(filename); outputTypes["shared"].push_back(filename);
+                ofstream out2;
+                m->openOutputFile(filename, out2);
+                
+                vector<string> savedLabels = m->currentSharedBinLabels;
+                eliminateZeroOTUS(lookup);
+                lookup[0]->printHeaders(out2);
+                printSharedData(lookup, out2);
+                out2.close();
+                m->currentSharedBinLabels = savedLabels; //restore old labels
+                
+            }else {
+                if (!m->printedSharedHeaders) { lookup[0]->printHeaders(out); }
+                printSharedData(lookup, out); //prints info to the .shared file
+            }
             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
             delete SharedList;
         }
         
-        out.close();
+        if (!pickedGroups) { out.close(); }
         
         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
             delete it3->second;
@@ -999,7 +1066,7 @@ int SharedCommand::createSharedFromListGroup(string filename) {
         if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; }
                
         if (m->control_pressed) { 
-            m->mothurRemove(filename); 
+            if (!pickedGroups) { m->mothurRemove(filename); }
             for (int i=0; i<Groups.size(); i++) {  variables["[group]"] = Groups[i];
                 string rabundFIleName = getOutputFileName("rabund",variables);
                 m->mothurRemove(rabundFIleName);               }
index 39ef9a7a1ebc17f830103cf33d84520aca975f97..a916c854280c74d035968bfc84ec4ab88a4f70e6 100644 (file)
@@ -45,8 +45,8 @@ private:
        bool isValidGroup(string, vector<string>);
        int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
        int ListGroupSameSeqs(vector<string>&, SharedListVector*);
-    int createSharedFromListGroup(string);
-    int createSharedFromBiom(string);
+    int createSharedFromListGroup();
+    int createSharedFromBiom();
     string getTag(string&);
     vector<string> readRows(string, int&); 
     int getDims(string, int&, int&);
index 2ce250ba908b4b45fa200b845275af22c3ca3be6..8fabc5b605b40f06ea1e7217a766f36bc8bfded9 100644 (file)
@@ -35,16 +35,70 @@ SharedListVector::SharedListVector(ifstream& f) : DataVector(), maxRank(0), numB
             countTable->readTable(m->getCountTableFile(), true, false);
         }
 
-               int hold;
-               string inputData;
-               f >> label >> hold;
-       
-               data.assign(hold, "");
+        int hold;
+        
+               //are we at the beginning of the file??
+               if (m->saveNextLabel == "") {
+                       f >> label;
+            
+                       //is this a shared file that has headers
+                       if (label == "label") {
+                               
+                               //gets "numOtus"
+                               f >> label; m->gobble(f);
+                               
+                               //eat rest of line
+                               label = m->getline(f); m->gobble(f);
+                               
+                               //parse labels to save
+                               istringstream iStringStream(label);
+                               m->listBinLabelsInFile.clear();
+                               while(!iStringStream.eof()){
+                                       if (m->control_pressed) { break; }
+                                       string temp;
+                                       iStringStream >> temp;  m->gobble(iStringStream);
+                    
+                                       m->listBinLabelsInFile.push_back(temp);
+                               }
+                               
+                               f >> label >> hold;
+                       }else {
+                //read in first row
+                f >> hold;
+                
+                //make binlabels because we don't have any
+                string snumBins = toString(hold);
+                m->listBinLabelsInFile.clear();
+                for (int i = 0; i < hold; i++) {
+                    //if there is a bin label use it otherwise make one
+                    string binLabel = "Otu";
+                    string sbinNumber = toString(i+1);
+                    if (sbinNumber.length() < snumBins.length()) {
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    m->listBinLabelsInFile.push_back(binLabel);
+                }
+            }
+            m->saveNextLabel = label;
+               }else {
+            f >> label >> hold;
+            m->saveNextLabel = label;
+        }
+        
+        binLabels.assign(m->listBinLabelsInFile.begin(), m->listBinLabelsInFile.begin()+hold);
                
+               data.assign(hold, "");
+               string inputData = "";
+        
                for(int i=0;i<hold;i++){
                        f >> inputData;
                        set(i, inputData);
                }
+               m->gobble(f);
+        
+        if (f.eof()) { m->saveNextLabel = ""; }
                
        }
        catch(exception& e) {
@@ -79,7 +133,59 @@ void SharedListVector::set(int binNumber, string seqNames){
 string SharedListVector::get(int index){
        return data[index];
 }
+/***********************************************************************/
+
+void SharedListVector::setLabels(vector<string> labels){
+       try {
+               binLabels = labels;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SharedListVector", "setLabels");
+               exit(1);
+       }
+}
 
+/***********************************************************************/
+//could potentially end up with duplicate binlabel names with code below.
+//we don't currently use them in a way that would do that.
+//if you had a listfile that had been subsampled and then added to it, dup names would be possible.
+vector<string> SharedListVector::getLabels(){
+    try {
+        string tagHeader = "Otu";
+        if (m->sharedHeaderMode == "tax") { tagHeader = "PhyloType"; }
+        
+        if (binLabels.size() < data.size()) {
+            string snumBins = toString(numBins);
+            
+            for (int i = 0; i < numBins; i++) {
+                string binLabel = tagHeader;
+                
+                if (i < binLabels.size()) { //label exists, check leading zeros length
+                    string sbinNumber = m->getSimpleLabel(binLabels[i]);
+                    if (sbinNumber.length() < snumBins.length()) {
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    binLabels[i] = binLabel;
+                }else{
+                    string sbinNumber = toString(i+1);
+                    if (sbinNumber.length() < snumBins.length()) {
+                        int diff = snumBins.length() - sbinNumber.length();
+                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                    }
+                    binLabel += sbinNumber;
+                    binLabels.push_back(binLabel);
+                }
+            }
+        }
+        return binLabels;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SharedListVector", "getLabels");
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 void SharedListVector::push_back(string seqNames){
@@ -237,6 +343,8 @@ SharedOrderVector* SharedListVector::getSharedOrderVector(){
 /***********************************************************************/
 SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
        try {
+        m->currentSharedBinLabels = binLabels;
+        
                SharedRAbundVector rav(data.size());
                
                for(int i=0;i<numBins;i++){
@@ -272,6 +380,8 @@ SharedRAbundVector SharedListVector::getSharedRAbundVector(string groupName) {
 /***********************************************************************/
 vector<SharedRAbundVector*> SharedListVector::getSharedRAbundVector() {
        try {
+        m->currentSharedBinLabels = binLabels;
+        
                SharedUtil* util;
                util = new SharedUtil();
                vector<SharedRAbundVector*> lookup;  //contains just the groups the user selected
index 81779257346d4b25dbecf092a6c730162d7d4991..13fed9791fb347806f33723893aacd15eb535690 100644 (file)
@@ -33,7 +33,7 @@ public:
        SharedListVector();
        SharedListVector(int);
        SharedListVector(ifstream&);
-       SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs){ groupmap = NULL; countTable = NULL; };
+       SharedListVector(const SharedListVector& lv) : DataVector(lv.label), data(lv.data), maxRank(lv.maxRank), numBins(lv.numBins), numSeqs(lv.numSeqs), binLabels(lv.binLabels) { groupmap = NULL; countTable = NULL; };
        ~SharedListVector(){ if (groupmap != NULL) { delete groupmap; } if (countTable != NULL) { delete countTable; } };
        
        int getNumBins()                                                        {       return numBins;         }
@@ -42,6 +42,8 @@ public:
 
        void set(int, string);  
        string get(int);
+    vector<string> getLabels();
+    void setLabels(vector<string>);
        void push_back(string);
        void resize(int);
        void clear();
@@ -63,6 +65,7 @@ private:
        int maxRank;
        int numBins;
        int numSeqs;
+    vector<string> binLabels;
 
 };
 
index 5231b3cd654ff6f1321bbc12f2cf4fe602a30553..80b8fb9bd45a8596e91afe30fdec3984b2411de6 100644 (file)
@@ -55,13 +55,13 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() {  //reads in a
                                
                                //parse labels to save
                                istringstream iStringStream(label);
-                               m->binLabelsInFile.clear();
+                               m->sharedBinLabelsInFile.clear();
                                while(!iStringStream.eof()){
                                        if (m->control_pressed) { break; }
                                        string temp;
                                        iStringStream >> temp;  m->gobble(iStringStream);
                                        
-                                       m->binLabelsInFile.push_back(temp);
+                                       m->sharedBinLabelsInFile.push_back(temp);
                                }
                                
                                f >> label;
@@ -69,7 +69,7 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() {  //reads in a
                }else { label = m->saveNextLabel; }
                
                //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
-               m->currentBinLabels = m->binLabelsInFile;
+               m->currentSharedBinLabels = m->sharedBinLabelsInFile;
                
                //read in first row since you know there is at least 1 group.
                f >> groupN >> num;
index 71c868b158ab41c2b8d72e7dac301998e2c39537..9eeb0f21bf3626620806c49372ab36efd0ff0831 100644 (file)
@@ -62,13 +62,13 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma
                                
                                //parse labels to save
                                istringstream iStringStream(label);
-                               m->binLabelsInFile.clear();
+                               m->sharedBinLabelsInFile.clear();
                                while(!iStringStream.eof()){
                                        if (m->control_pressed) { break; }
                                        string temp;
                                        iStringStream >> temp;  m->gobble(iStringStream);
                     
-                                       m->binLabelsInFile.push_back(temp);
+                                       m->sharedBinLabelsInFile.push_back(temp);
                                }
                                
                                f >> label >> groupN >> num;
@@ -78,7 +78,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma
                 
                 //make binlabels because we don't have any
                 string snumBins = toString(num);
-                m->binLabelsInFile.clear();
+                m->sharedBinLabelsInFile.clear();
                 for (int i = 0; i < num; i++) {  
                     //if there is a bin label use it otherwise make one
                     string binLabel = "Otu";
@@ -88,7 +88,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma
                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
                     }
                     binLabel += sbinNumber;
-                    m->binLabelsInFile.push_back(binLabel);
+                    m->sharedBinLabelsInFile.push_back(binLabel);
                 }
             }
                }else { 
@@ -99,7 +99,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma
         }
                
                //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
-               m->currentBinLabels = m->binLabelsInFile;
+               m->currentSharedBinLabels = m->sharedBinLabelsInFile;
                
                holdLabel = label;
                
@@ -295,7 +295,7 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber;
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                output << binLabel << '\t'; 
                        }
@@ -310,7 +310,7 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber;
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                output << binLabel << '\t'; 
                        }
@@ -318,7 +318,7 @@ void SharedRAbundFloatVector::printHeaders(ostream& output){
                        output << endl;
                }
                
-               m->printedHeaders = true;
+               m->printedSharedHeaders = true;
        }
        catch(exception& e) {
                m->errorOut(e, "SharedRAbundVector", "printHeaders");
@@ -550,7 +550,7 @@ int SharedRAbundFloatVector::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                newBinLabels.push_back(binLabel);
                        }
@@ -559,7 +559,7 @@ int SharedRAbundFloatVector::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
                
                thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
+               m->currentSharedBinLabels = newBinLabels;
                
                return 0;
                
index 9b2bb678ca7688777aa87a9f60015baba5d336d9..0f2e48e8ac7bbdc18b10f15f1b764aed7ee58890 100644 (file)
@@ -86,13 +86,13 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
                                
                                //parse labels to save
                                istringstream iStringStream(label);
-                               m->binLabelsInFile.clear();
+                               m->sharedBinLabelsInFile.clear();
                                while(!iStringStream.eof()){
                                        if (m->control_pressed) { break; }
                                        string temp;
                                        iStringStream >> temp;  m->gobble(iStringStream);
                
-                                       m->binLabelsInFile.push_back(temp);
+                                       m->sharedBinLabelsInFile.push_back(temp);
                                }
                                
                                f >> label >> groupN >> num;
@@ -102,7 +102,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
                 
                 //make binlabels because we don't have any
                 string snumBins = toString(num);
-                m->binLabelsInFile.clear();
+                m->sharedBinLabelsInFile.clear();
                 for (int i = 0; i < num; i++) {  
                     //if there is a bin label use it otherwise make one
                     string binLabel = "Otu";
@@ -112,7 +112,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
                     }
                     binLabel += sbinNumber;
-                    m->binLabelsInFile.push_back(binLabel);
+                    m->sharedBinLabelsInFile.push_back(binLabel);
                 }
             }
                }else { 
@@ -125,7 +125,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0),
         }
                
                //reset labels, currentLabels may have gotten changed as otus were eliminated because of group choices or sampling
-               m->currentBinLabels = m->binLabelsInFile;
+               m->currentSharedBinLabels = m->sharedBinLabelsInFile;
                
                holdLabel = label;
                
@@ -385,7 +385,7 @@ void SharedRAbundVector::printHeaders(ostream& output){
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber;
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                output << binLabel << '\t'; 
                        }
@@ -400,14 +400,14 @@ void SharedRAbundVector::printHeaders(ostream& output){
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber;
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                output << binLabel << '\t'; 
                        }
                        
                        output << endl;
                }
-               m->printedHeaders = true;
+               m->printedSharedHeaders = true;
        }
        catch(exception& e) {
                m->errorOut(e, "SharedRAbundVector", "printHeaders");
@@ -533,7 +533,7 @@ int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislooku
                                                for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                        }
                                        binLabel += sbinNumber; 
-                                       if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                                       if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                        
                                        newBinLabels.push_back(binLabel);
                                }
@@ -542,7 +542,7 @@ int SharedRAbundVector::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislooku
                        for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
                        
                        thislookup = newLookup;
-                       m->currentBinLabels = newBinLabels;
+                       m->currentSharedBinLabels = newBinLabels;
                        
                        return 0;
                        
index b53b09afc0091e91baf6609aa9c8d5b0ab528b63..204b61c64d779ebe2946d537e5803d9ed36d3d76 100644 (file)
@@ -57,7 +57,7 @@ EstOutput SharedSobsCS::getValues(vector<SharedRAbundVector*> shared, vector<str
                        }
                        
                        //they are shared
-                       if (sharedByAll == true) {  observed++;  labels.push_back(m->currentBinLabels[i]); }
+                       if (sharedByAll == true) {  observed++;  labels.push_back(m->currentSharedBinLabels[i]); }
                }
         
                data[0] = observed;
index a7f5d78ad37c4c515821fd4686fb769d74ecd68f..4c9049a8ebf6247a5be965f21317aba881e82e51 100644 (file)
@@ -317,7 +317,7 @@ int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
         cout.setf(ios::showpoint);
         
         vector<vector<float> > sharedVector;
-        vector<string> otuNames = m->currentBinLabels;
+        vector<string> otuNames = m->currentSharedBinLabels;
         
         //fill sharedVector to pass to CalcSparcc
         for (int i = 0; i < lookup.size(); i++) {
index 0fff42804cf702b0ba74cbbe245b51a0f91c1bff..47372cce680a9fa907200db12926194afe8c6b00 100644 (file)
@@ -501,17 +501,19 @@ int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBi
             variables["[tag]"] = tag;
             variables["[tag2]"] = "rare";
                        string rare = getOutputFileName("list",variables);
-                       m->openOutputFile(rare, rout);
+                       m->openOutputFile(rare+".temp", rout);
                        outputNames.push_back(rare); outputTypes["list"].push_back(rare);
                        
             variables["[tag2]"] = "abund";
                        string abund = getOutputFileName("list",variables);
-                       m->openOutputFile(abund, aout);
+                       m->openOutputFile(abund+".temp", aout);
                        outputNames.push_back(abund); outputTypes["list"].push_back(abund);
 
                        if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
                        if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
-
+            
+            vector<string> binLabels = thisList->getLabels();
+            string rareHeader = "label\tnumOtus\t"; string abundHeader = "label\tnumOtus\t";
                        for (int i = 0; i < thisList->getNumBins(); i++) {
                                if (m->control_pressed) { break; }
                        
@@ -525,8 +527,8 @@ int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBi
                     for (int j = 0; j < names.size(); j++) {  size += ct.getNumSeqs(names[j]); }
                 }
                        
-                               if (size <= cutoff) {  rout << bin << '\t';  }
-                               else                            {  aout << bin << '\t'; }
+                               if (size <= cutoff) {  rout << bin << '\t';  rareHeader += binLabels[i] + '\t'; }
+                               else                            {  aout << bin << '\t';  abundHeader += binLabels[i] + '\t'; }
                        }
                        
                        if (rareNames.size() != 0)      { rout << endl; }
@@ -534,6 +536,21 @@ int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBi
                        
                        rout.close();
                        aout.close();
+            
+            //add headers
+            ofstream r;
+            m->openOutputFile(rare, r);
+            r << rareHeader << endl;
+            r.close();
+            m->appendFiles(rare+".temp", rare);
+            m->mothurRemove(rare+".temp");
+            
+            ofstream a;
+            m->openOutputFile(abund, a);
+            a << abundHeader << endl;
+            a.close();
+            m->appendFiles(abund+".temp", abund);
+            m->mothurRemove(abund+".temp");
                        
                }else{ //parse names by abundance and group
                        string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
@@ -564,14 +581,16 @@ int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBi
                        }
                        
                        map<string, string> groupVector;
+            map<string, string> groupLabels;
                        map<string, string>::iterator itGroup;
                        map<string, int> groupNumBins;
                
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
                                groupNumBins[it3->first] = 0;
                                groupVector[it3->first] = "";
+                groupLabels[it3->first] = "label\tnumOtus\t";
                        }
-               
+            vector<string> binLabels = thisList->getLabels();
                        for (int i = 0; i < thisList->getNumBins(); i++) {
                                if (m->control_pressed) { break; }
                        
@@ -622,12 +641,14 @@ int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBi
                        
                        
                                for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
-                                       groupVector[itGroup->first] +=  itGroup->second + '\t'; 
+                                       groupVector[itGroup->first] +=  itGroup->second + '\t';
+                    groupLabels[itGroup->first] += binLabels[i] + '\t';
                                }
                        }
                        
                        //end list vector
                        for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
                                (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
                                (*(filehandles[it3->first])).close();
                                delete it3->second;
diff --git a/sracommand.cpp b/sracommand.cpp
new file mode 100644 (file)
index 0000000..65ccda4
--- /dev/null
@@ -0,0 +1,370 @@
+//
+//  sracommand.cpp
+//  Mothur
+//
+//  Created by SarahsWork on 10/28/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sracommand.h"
+#include "sffinfocommand.h"
+#include "parsefastaqcommand.h"
+
+//**********************************************************************************************************************
+vector<string> SRACommand::setParameters(){
+       try {
+        CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(psff);
+        CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(pgroup);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(poligos);
+        CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfile);
+               CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfastq);
+        //choose only one multiple options
+        CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
+        
+         //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SRACommand::getHelpString(){
+       try {
+               string helpString = "";
+               helpString += "The sra command creates a sequence read archive from sff or fastq files.\n";
+               helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n";
+               helpString += "The sffiles parameter is used to provide a file containing a \n";
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+
+               helpString += "The new command should be in the following format: \n";
+               helpString += "new(...)\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string SRACommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "sra") {  pattern = "[filename],sra"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SRACommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
+SRACommand::SRACommand(){
+       try {
+               abort = true; calledHelp = true;
+               setParameters();
+        vector<string> tempOutNames;
+               outputTypes["sra"] = tempOutNames; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "SRACommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SRACommand::SRACommand(string option)  {
+       try {
+               abort = false; calledHelp = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+               
+               else {
+                       //valid paramters for this command
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) {
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+            
+                string path;
+                               it = parameters.find("sff");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["sff"] = inputDir + it->second;              }
+                               }
+                               
+                               it = parameters.find("fastq");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
+                               }
+                
+                it = parameters.find("file");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["file"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                
+                it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
+            }
+            
+                       //check for parameters
+            fastqfile = validParameter.validFile(parameters, "fastq", true);
+                       if (fastqfile == "not open") { fastqfile = "";  abort = true; }
+                       else if (fastqfile == "not found") { fastqfile = ""; }
+                       
+                       sfffile = validParameter.validFile(parameters, "sff", true);
+                       if (sfffile == "not open") {  sfffile = "";  abort = true; }
+                       else if (sfffile == "not found") { sfffile = ""; }
+            
+            file = validParameter.validFile(parameters, "file", true);
+                       if (file == "not open") {  file = "";  abort = true; }
+                       else if (file == "not found") { file = ""; }
+            
+            groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") {  groupfile = "";  abort = true; }
+                       else if (groupfile == "not found") { groupfile = ""; }
+            else {  m->setGroupFile(groupfile); }
+            
+            oligosfile = validParameter.validFile(parameters, "oligos", true);
+                       if (oligosfile == "not found")      {   oligosfile = "";        }
+                       else if(oligosfile == "not open")       {       abort = true;           }
+                       else {  m->setOligosFile(oligosfile); }
+            
+            
+            file = validParameter.validFile(parameters, "file", true);
+                       if (file == "not open") {  file = "";  abort = true; }
+                       else if (file == "not found") { file = ""; }
+                       
+                       if ((fastqfile == "") && (sfffile == "") && (sfffile == "")) {
+                m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (oligosfile != "")) {
+                m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((fastqfile != "") || (sfffile != "")) {
+                if ((groupfile == "") && (oligosfile == "")) {
+                    oligosfile = m->getOligosFile();
+                                       if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
+                                       else {
+                                               groupfile = m->getGroupFile();
+                        if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                        else {
+                            m->mothurOut("[ERROR]: You must provide groupfile or oligos file if splitting a fastq or sff file."); m->mothurOutEndLine(); abort = true;
+                        }
+                                       }
+                }
+            }
+                                   
+            //use only one Mutliple type
+                       platform = validParameter.validFile(parameters, "platform", false);
+                       if (platform == "not found") { platform = "454"; }
+                       
+                       if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { }
+                       else { m->mothurOut("Not a valid platform option.  Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; }
+            
+            
+            string temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found"){       temp = "0";             }
+                       m->mothurConvert(temp, bdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
+                       m->mothurConvert(temp, pdiffs);
+                       
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
+                       
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+            
+
+                               
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "SRACommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SRACommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //parse files
+        vector<string> filesBySample;
+        
+        if (file != "")             {       readFile(filesBySample);        }
+        else if (sfffile != "")     {       parseSffFile(filesBySample);    }
+        else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
+        
+        
+        
+               
+        //output files created by command
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+        return 0;
+               
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "SRACommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SRACommand::readFile(vector<string>& files){
+       try {
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SRACommand::parseSffFile(vector<string>& files){
+       try {
+        //run sffinfo to parse sff file into individual sampled sff files
+        string commandString = "sff=" + sfffile;
+        if (groupfile != "") { commandString += ", group=" + groupfile; }
+        else if (oligosfile != "") {
+            commandString += ", oligos=" + oligosfile;
+            //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+            if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+            if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+            if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+            if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+            if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        }
+        m->mothurOutEndLine();
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
+        m->mothurCalling = true;
+        
+        Command* sffinfoCommand = new SffInfoCommand(commandString);
+        sffinfoCommand->execute();
+        
+        map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
+        map<string, vector<string> >::iterator it = filenames.find("sff");
+        if (it != filenames.end()) { files = it->second; }
+        else { m->control_pressed = true; } // error in sffinfo
+        
+        delete sffinfoCommand;
+        m->mothurCalling = false;
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int SRACommand::parseFastqFile(vector<string>& files){
+       try {
+        
+        //run sffinfo to parse sff file into individual sampled sff files
+        string commandString = "fastq=" + fastqfile;
+        if (groupfile != "") { commandString += ", group=" + groupfile; }
+        else if (oligosfile != "") {
+            commandString += ", oligos=" + oligosfile;
+            //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+            if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+            if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+            if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+            if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+            if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        }
+        m->mothurOutEndLine();
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
+        m->mothurCalling = true;
+        
+        Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
+        fastqinfoCommand->execute();
+        
+        map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
+        map<string, vector<string> >::iterator it = filenames.find("fastq");
+        if (it != filenames.end()) { files = it->second; }
+        else { m->control_pressed = true; } // error in sffinfo
+        
+        delete fastqinfoCommand;
+        m->mothurCalling = false;
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/sracommand.h b/sracommand.h
new file mode 100644 (file)
index 0000000..7bf764b
--- /dev/null
@@ -0,0 +1,52 @@
+//
+//  sracommand.h
+//  Mothur
+//
+//  Created by SarahsWork on 10/28/13.
+//  Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sracommand_h
+#define Mothur_sracommand_h
+
+#include "command.hpp"
+
+
+/**************************************************************************************************/
+
+class SRACommand : public Command {
+public:
+    SRACommand(string);
+    SRACommand();
+    ~SRACommand(){}
+    
+    vector<string> setParameters();
+    string getCommandName()                    { return "sra";                 }
+    string getCommandCategory()                { return "Sequence Processing";         }
+    
+    string getOutputPattern(string);
+    
+       string getHelpString();
+    string getCitation() { return "http://www.mothur.org/wiki/sra"; }
+    string getDescription()            { return "create a Sequence Read Archive / SRA"; }
+    
+    int execute();
+    void help() { m->mothurOut(getHelpString()); }
+    
+private:
+    bool abort;
+    int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
+    string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
+    vector<string> outputNames;
+    
+    int readFile(vector<string>&);
+    int parseSffFile(vector<string>&);
+    int parseFastqFile(vector<string>&);
+    
+};
+
+/**************************************************************************************************/
+
+
+
+#endif
index 392f97bd51f1d2ac67d6d0a810a408ab507fdcee..2eb1d497494849e316b58103f84a2c95a475a832 100644 (file)
@@ -108,7 +108,7 @@ vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int
        try {
                
                //save mothurOut's binLabels to restore for next label
-               vector<string> saveBinLabels = m->currentBinLabels;
+               vector<string> saveBinLabels = m->currentSharedBinLabels;
                
                int numBins = thislookup[0]->getNumBins();
                for (int i = 0; i < thislookup.size(); i++) {           
@@ -136,7 +136,7 @@ vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int
                                
                                for (int j = 0; j < size; j++) {
                                        
-                                       if (m->control_pressed) {  return m->currentBinLabels; }
+                                       if (m->control_pressed) {  return m->currentSharedBinLabels; }
                                        
                                        int bin = order.get(j);
                                        
@@ -149,11 +149,11 @@ vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int
                //subsampling may have created some otus with no sequences in them
                eliminateZeroOTUS(thislookup);
         
-               if (m->control_pressed) { return m->currentBinLabels; }
+               if (m->control_pressed) { return m->currentSharedBinLabels; }
                
                //save mothurOut's binLabels to restore for next label
-        vector<string> subsampleBinLabels = m->currentBinLabels;
-               m->currentBinLabels = saveBinLabels;
+        vector<string> subsampleBinLabels = m->currentSharedBinLabels;
+               m->currentSharedBinLabels = saveBinLabels;
                
                return subsampleBinLabels;
                
@@ -200,7 +200,7 @@ int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
                                        for (int h = 0; h < diff; h++) { binLabel += "0"; }
                                }
                                binLabel += sbinNumber; 
-                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               if (i < m->currentSharedBinLabels.size()) {  binLabel = m->currentSharedBinLabels[i]; }
                                
                                newBinLabels.push_back(binLabel);
                        }
@@ -210,7 +210,7 @@ int SubSample::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
                thislookup.clear();
                
                thislookup = newLookup;
-               m->currentBinLabels = newBinLabels;
+               m->currentSharedBinLabels = newBinLabels;
                
                return 0;
                
index 6c0d1bcbe71a3c3c2a7fccda9695f7aef6aec25b..3e954759b078ff9371f5c28b64b3de86ef3c89cc 100644 (file)
@@ -76,7 +76,7 @@ string SubSampleCommand::getOutputPattern(string type) {
         else if (type == "name")        {   pattern = "[filename],subsample,[extension]";    }
         else if (type == "group")       {   pattern = "[filename],subsample,[extension]";    }
         else if (type == "count")       {   pattern = "[filename],subsample,[extension]";    }
-        else if (type == "list")        {   pattern = "[filename],subsample,[extension]";    }
+        else if (type == "list")        {   pattern = "[filename],[distance],subsample,[extension]";    }
         else if (type == "taxonomy")    {   pattern = "[filename],subsample,[extension]";    }
         else if (type == "shared")      {   pattern = "[filename],[distance],subsample,[extension]";    }
         else if (type == "rabund")      {   pattern = "[filename],subsample,[extension]";    }
@@ -947,7 +947,7 @@ int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
        try {
                
                //save mothurOut's binLabels to restore for next label
-               vector<string> saveBinLabels = m->currentBinLabels;
+               vector<string> saveBinLabels = m->currentSharedBinLabels;
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
@@ -965,7 +965,7 @@ int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
                m->openOutputFile(outputFileName, out);
                outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
                
-        m->currentBinLabels = subsampledLabels;
+        m->currentSharedBinLabels = subsampledLabels;
         
                thislookup[0]->printHeaders(out);
                
@@ -977,7 +977,7 @@ int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup) {
         
         
         //save mothurOut's binLabels to restore for next label
-               m->currentBinLabels = saveBinLabels;
+               m->currentSharedBinLabels = saveBinLabels;
                
                return 0;
                
@@ -993,16 +993,6 @@ int SubSampleCommand::getSubSampleList() {
         
                if (namefile != "") { m->readNames(namefile, nameMap); }
         
-               string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
-               map<string, string> variables; 
-        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
-        variables["[extension]"] = m->getExtension(listfile);
-               string outputFileName = getOutputFileName("list", variables);   
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
-               
                InputData* input = new InputData(listfile, "list");
                ListVector* list = input->getListVector();
                string lastLabel = list->getLabel();
@@ -1029,7 +1019,7 @@ int SubSampleCommand::getSubSampleList() {
                        //file mismatch quit
                        if (list->getNumSeqs() != groupMap.getNumSeqs()) { 
                                m->mothurOut("[ERROR]: your list file contains " + toString(list->getNumSeqs()) + " sequences, and your groupfile contains " + toString(groupMap.getNumSeqs()) + ", please correct."); 
-                               m->mothurOutEndLine(); delete list; delete input; out.close(); outGroup.close(); return 0;
+                               m->mothurOutEndLine(); delete list; delete input;  outGroup.close(); return 0;
                        }                       
                }else if (countfile != "") {
             if (ct.hasGroupInfo()) {
@@ -1193,13 +1183,13 @@ int SubSampleCommand::getSubSampleList() {
                //as long as you are not at the end of the file or done wih the lines you want
                while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        
-                       if (m->control_pressed) {  delete list; delete input;  out.close();  return 0;  }
+                       if (m->control_pressed) {  delete list; delete input;  return 0;  }
                        
                        if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
                                
                                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                                
-                               processList(list, out, subset);
+                               processList(list,  subset);
                                
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
@@ -1213,7 +1203,7 @@ int SubSampleCommand::getSubSampleList() {
                                list = input->getListVector(lastLabel);
                                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                                
-                               processList(list, out, subset);
+                               processList(list,  subset);
                                
                                processedLabels.insert(list->getLabel());
                                userLabels.erase(list->getLabel());
@@ -1231,7 +1221,7 @@ int SubSampleCommand::getSubSampleList() {
                }
                
                
-               if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); return 0;  }
+               if (m->control_pressed) {  if (list != NULL) { delete list; } delete input;  return 0;  }
                
                //output error messages about any remaining user labels
                set<string>::iterator it;
@@ -1254,12 +1244,11 @@ int SubSampleCommand::getSubSampleList() {
                        
                        m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                        
-                       processList(list, out, subset);
+                       processList(list, subset);
                        
                        delete list; list = NULL;
                }
                
-               out.close();  
                if (list != NULL) { delete list; }
                delete input;
         
@@ -1336,14 +1325,26 @@ int SubSampleCommand::getSubSampleList() {
        }
 }
 //**********************************************************************************************************************
-int SubSampleCommand::processList(ListVector*& list, ofstream& out, set<string>& subset) {
+int SubSampleCommand::processList(ListVector*& list, set<string>& subset) {
        try {
-                               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
+               map<string, string> variables;
+        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[extension]"] = m->getExtension(listfile);
+        variables["[distance]"] = list->getLabel();
+               string outputFileName = getOutputFileName("list", variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
+               
                int numBins = list->getNumBins();
 
                ListVector* temp = new ListVector();
                temp->setLabel(list->getLabel());
                
+        vector<string> binLabels = list->getLabels();
+        vector<string> newLabels;
                for (int i = 0; i < numBins; i++) {
                        
                        if (m->control_pressed) { break; }
@@ -1359,15 +1360,19 @@ int SubSampleCommand::processList(ListVector*& list, ofstream& out, set<string>&
                        if (newNames != "") { 
                                newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
                                temp->push_back(newNames);
+                newLabels.push_back(binLabels[i]);
                        }
                }
                
+        temp->setLabels(newLabels);
                delete list;
                list = temp;
                
-               if (m->control_pressed) { return 0; }
+               if (m->control_pressed) { out.close(); return 0; }
                
+        list->printHeaders(out);
                list->print(out);
+        out.close();
                
                return 0;
                
index 7b251eee11c2fa7ad5be7baa6cea4427c90265c7..db4001a5c07337d51ea96aedfa9d3f20458b6685 100644 (file)
@@ -57,7 +57,7 @@ private:
        int processShared(vector<SharedRAbundVector*>&);
        int processRabund(RAbundVector*&, ofstream&);
        int processSabund(SAbundVector*&, ofstream&);
-       int processList(ListVector*&, ofstream&, set<string>&);
+       int processList(ListVector*&, set<string>&);
        int getNames();
        int readNames();
     int getTax(set<string>&);
index d93fb6e775392b5436b6dadbb99c88761e862de8..2c9399bda1dd363d04c1fed89ff9b12ea8c3aace 100644 (file)
@@ -184,9 +184,6 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
                        all = m->isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
-                       createPhylip = m->isTrue(temp);
-                       
             temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
                        m->mothurConvert(temp, iters); 
             
@@ -204,6 +201,10 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
             
             if (subsample == false) { iters = 0; }
             
+            temp = validParameter.validFile(parameters, "distance", false);                                    if (temp == "not found") { temp = "false"; }
+                       createPhylip = m->isTrue(temp);
+            if (subsample) { createPhylip = true; }
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors); 
index f96264f4af2ad46a478ec9ada9b26ec03cea8865..4883c48741717c677641c6b48bf7fe4ada0245f1 100644 (file)
@@ -359,7 +359,7 @@ int UnifracWeightedCommand::execute() {
                 variables["[tag]"] = toString(i+1);
                 string wFileName = getOutputFileName("weighted", variables);
                 output = new ColumnFile(wFileName, itersString);
-                               outputNames.push_back(wFileName); outputTypes["wweighted"].push_back(wFileName);
+                               outputNames.push_back(wFileName); outputTypes["weighted"].push_back(wFileName);
             } 
             
             userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
index 66dbb8eac3ee13c069b85374503dd87cc0ae538f..8e91b8a93c03e83687ebf574fab99e6b731bd306 100644 (file)
--- a/venn.cpp
+++ b/venn.cpp
@@ -162,7 +162,9 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                if (vCalcs[i]->getName() == "sharedsobs") {
                                        singleCalc = new Sobs();
                     if (sharedOtus &&  (labels.size() != 0)) {
-                        string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
+                        string groupsTag = "";
+                        for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; }  groupsTag += lookup[lookup.size()-1]->getGroup();
+                        string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus";
                         
                         outputNames.push_back(filenameShared);
                         ofstream outShared;
@@ -455,7 +457,9 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                                        
                     ofstream outShared;
                     if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
-                        string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
+                        string groupsTag = "";
+                        for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; }  groupsTag += lookup[lookup.size()-1]->getGroup();
+                        string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus";
                         
                         outputNames.push_back(filenameShared);
                        
@@ -660,7 +664,9 @@ vector<string> Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculato
                     
                     ofstream outShared;
                     if (sharedOtus && (vCalcs[i]->getName() == "sharedsobs")) {
-                        string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + ".sharedotus";
+                        string groupsTag = "";
+                        for (int h = 0; h < lookup.size()-1; h++) { groupsTag += lookup[h]->getGroup() + "-"; }  groupsTag += lookup[lookup.size()-1]->getGroup();
+                        string filenameShared = outputDir + m->getRootName(m->getSimpleName(inputfile)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + groupsTag + ".sharedotus";
                         
                         outputNames.push_back(filenameShared);
                         
index 98796b3466fcc344b5e4c990015316a5ae2a0582..3767687bff20dd66f754d8e55920135d7609fe68 100644 (file)
@@ -29,8 +29,7 @@ vector<string> VennCommand::setParameters(){
                CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pnseqs);
         CommandParameter psharedotus("sharedotus", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(psharedotus);
                CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "","",false,false); parameters.push_back(pfontsize);
-               CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppermute);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        CommandParameter ppermute("permute", "Multiple", "1-2-3-4", "4", "", "", "","",false,false); parameters.push_back(ppermute);           CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
@@ -56,7 +55,7 @@ string VennCommand::getHelpString(){
                helpString += "The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n";
                helpString += "The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a shared file.\n";
                helpString += "The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n";
-               helpString += "If you have more than 4 groups, the permute parameter will find all possible combos of 4 of your groups and create pictures for them, default=F.\n";
+               helpString += "If you have more than 4 groups, you can use the permute parameter to set the number of groups you would like mothur to divide the samples into to draw the venn diagrams for all possible combos. Default=4.\n";
                helpString += "The only estimators available four 4 groups are sharedsobs and sharedchao.\n";
         helpString += "The sharedotus parameter can be used with the sharedsobs calculator to get the names of the OTUs in each section of the venn diagram. Default=t.\n";
                helpString += "The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n";
@@ -215,8 +214,10 @@ VennCommand::VennCommand(string option)  {
                        temp = validParameter.validFile(parameters, "nseqs", false);            if (temp == "not found"){       temp = "f";                             }
                        nseqs = m->isTrue(temp); 
 
-                       temp = validParameter.validFile(parameters, "permute", false);          if (temp == "not found"){       temp = "f";                             }
-                       perm = m->isTrue(temp); 
+                       temp = validParameter.validFile(parameters, "permute", false);          if (temp == "not found"){       temp = "4";                             }
+                       m->mothurConvert(temp, perm);
+            if ((perm == 1) || (perm == 2) || (perm == 3) || (perm == 4)) { }
+            else { m->mothurOut("[ERROR]: Not a valid permute value.  Valid values are 1, 2, 3, and 4."); m->mothurOutEndLine(); abort = true;  }
             
             temp = validParameter.validFile(parameters, "sharedotus", false);          if (temp == "not found"){       temp = "t";                             }
                        sharedOtus = m->isTrue(temp); 
@@ -280,7 +281,7 @@ int VennCommand::execute(){
                        lookup = input->getSharedRAbundVectors();
                        lastLabel = lookup[0]->getLabel();
                        
-                       if ((lookup.size() > 4) && (perm)) { combosOfFour = findCombinations(lookup.size()); }
+                       if ((lookup.size() > 4)) { combos = findCombinations(lookup.size()); }
                }else if (format == "list") {
                        sabund = input->getSAbundVector();
                        lastLabel = sabund->getLabel();
@@ -308,17 +309,12 @@ int VennCommand::execute(){
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
                                        
-                                       if ((lookup.size() > 4) && (!perm)){
-                                               m->mothurOut("Error: Too many groups chosen.  You may use up to 4 groups with the venn command.  I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
-                                               for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
                                        
-                                               vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
-                                               for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]);  outputTypes["svg"].push_back(outfilenames[i]);  } }
 
-                                       }else if ((lookup.size() > 4) && (perm)) {
+                                       if (lookup.size() > 4) {
                                                set< set<int> >::iterator it3;
                                                set<int>::iterator it2;
-                                               for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {  
+                                               for (it3 = combos.begin(); it3 != combos.end(); it3++) {  
                        
                                                        set<int> poss = *it3;
                                                        vector<SharedRAbundVector*> subset;
@@ -343,17 +339,10 @@ int VennCommand::execute(){
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
 
-                                       if ((lookup.size() > 4) && (!perm)){
-                                               m->mothurOut("Error: Too many groups chosen.  You may use up to 4 groups with the venn command.  I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
-                                               for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
-                                       
-                                               vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
-                                               for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]);  outputTypes["svg"].push_back(outfilenames[i]);  }  }
-
-                                       }else if ((lookup.size() > 4) && (perm)) {
+                                       if (lookup.size() > 4) {
                                                set< set<int> >::iterator it3;
                                                set<int>::iterator it2;
-                                               for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {  
+                                               for (it3 = combos.begin(); it3 != combos.end(); it3++) {  
                        
                                                        set<int> poss = *it3;
                                                        vector<SharedRAbundVector*> subset;
@@ -409,17 +398,10 @@ int VennCommand::execute(){
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
 
-                                       if ((lookup.size() > 4) && (!perm)){
-                                               m->mothurOut("Error: Too many groups chosen.  You may use up to 4 groups with the venn command.  I will use the first four groups in your groupfile. If you set perm=t, I will find all possible combos of 4 groups."); m->mothurOutEndLine();
-                                               for (int i = lookup.size(); i > 4; i--) { lookup.pop_back(); } //no memmory leak because pop_back calls destructor
-                                       
-                                               vector<string> outfilenames = venn->getPic(lookup, vennCalculators);
-                                               for(int i = 0; i < outfilenames.size(); i++) { if (outfilenames[i] != "control" ) { outputNames.push_back(outfilenames[i]);  outputTypes["svg"].push_back(outfilenames[i]); }  }
-
-                                       }else if ((lookup.size() > 4) && (perm)) {
+                                       if (lookup.size() > 4) {
                                                set< set<int> >::iterator it3;
                                                set<int>::iterator it2;
-                                               for (it3 = combosOfFour.begin(); it3 != combosOfFour.end(); it3++) {  
+                                               for (it3 = combos.begin(); it3 != combos.end(); it3++) {  
                        
                                                        set<int> poss = *it3;
                                                        vector<SharedRAbundVector*> subset;
@@ -552,7 +534,7 @@ int VennCommand::execute(){
        }
 }
 //**********************************************************************************************************************
-//returns a vector of sets containing the group combinations
+//returns a vector of sets containing the group combinations
 set< set<int> > VennCommand::findCombinations(int lookupSize){
        try {
                set< set<int> > combos;
@@ -561,7 +543,7 @@ set< set<int> > VennCommand::findCombinations(int lookupSize){
                for (int i = 0; i < lookupSize; i++) {  possibles.insert(i);  }
                
                getCombos(possibles, combos);
-               
+        
                return combos;
                
        }
@@ -571,11 +553,11 @@ set< set<int> > VennCommand::findCombinations(int lookupSize){
        }
 }
 //**********************************************************************************************************************
-//recusively finds combos of 4
+//recusively finds combos of length perm
 int VennCommand::getCombos(set<int> possibles, set< set<int> >& combos){
        try {
                
-               if (possibles.size() == 4) { //done
+               if (possibles.size() == perm) { //done
                        if (combos.count(possibles) == 0) { //no dups
                                combos.insert(possibles);
                        }
index 98a59d404099dcb32052e07e93644be9f01e2869..4762902c51d4f1195e8f3fc604b1781ddb1df39f 100644 (file)
@@ -40,11 +40,11 @@ private:
        Venn* venn;
        vector<Calculator*> vennCalculators;    
        vector<SharedRAbundVector*> lookup;
-       set< set<int> > combosOfFour;
+       set< set<int> > combos;
        SAbundVector* sabund;
-       int abund, fontsize;
+       int abund, fontsize, perm;
        
-       bool abort, allLines, nseqs, perm, sharedOtus;
+       bool abort, allLines, nseqs, sharedOtus;
        set<string> labels; //holds labels to be used
        string format, groups, calc, label, outputDir, sharedfile, listfile, inputfile;
        vector<string> Estimators, Groups, outputNames;