]> git.donarmstrong.com Git - mothur.git/commitdiff
removed parse.sff command and made its functionality part of sffinfo command. fixed...
authorwestcott <westcott>
Tue, 4 Jan 2011 15:40:55 +0000 (15:40 +0000)
committerwestcott <westcott>
Tue, 4 Jan 2011 15:40:55 +0000 (15:40 +0000)
14 files changed:
Mothur.xcodeproj/project.pbxproj
chimeraslayer.cpp
chimeraslayer.h
chimeraslayercommand.cpp
chimeraslayercommand.h
commandfactory.cpp
corraxescommand.cpp
getseqscommand.cpp
makefile
mothur
parsesffcommand.cpp [deleted file]
parsesffcommand.h [deleted file]
sffinfocommand.cpp
sffinfocommand.h

index 72be7732a88856925aa54c085ed92389c1272d0a..edb26d37714cb4702183fc00dc182ba632f15506 100644 (file)
@@ -29,8 +29,6 @@
                7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = "<group>"; };
                7E99CA1312C8B0970041246B /* shhhercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shhhercommand.h; sourceTree = "<group>"; };
                7E99CA1412C8B0970041246B /* shhhercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shhhercommand.cpp; sourceTree = "<group>"; };
-               7E99D77C12CBAD780041246B /* untitled.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = untitled.h; path = ../untitled.h; sourceTree = SOURCE_ROOT; };
-               7E99D77D12CBAD780041246B /* untitled.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = untitled.cpp; path = ../untitled.cpp; sourceTree = SOURCE_ROOT; };
                7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = "<group>"; };
                7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = "<group>"; };
                7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = "<group>"; };
                A7DF0AE0121EBB14004A03EA /* getopt_long.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getopt_long.h; sourceTree = "<group>"; };
                A7DF0AE1121EBB14004A03EA /* prng.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prng.cpp; sourceTree = "<group>"; };
                A7DF0AE2121EBB14004A03EA /* prng.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prng.h; sourceTree = "<group>"; };
-               A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsesffcommand.cpp; sourceTree = "<group>"; };
-               A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsesffcommand.h; sourceTree = "<group>"; };
                A7F0C06412B7D64F0048BC64 /* odum.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = odum.h; sourceTree = "<group>"; };
                A7F0C06512B7D64F0048BC64 /* odum.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = odum.cpp; sourceTree = "<group>"; };
                A7F0C08A12B7EAE80048BC64 /* canberra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = canberra.h; sourceTree = "<group>"; };
                08FB7794FE84155DC02AAC07 /* mothur */ = {
                        isa = PBXGroup;
                        children = (
-                               7E99D77C12CBAD780041246B /* untitled.h */,
-                               7E99D77D12CBAD780041246B /* untitled.cpp */,
                                A768D95D1248FEAA008AB1D0 /* mothur */,
                                A7639F8D1175DF35008F5578 /* makefile */,
                                A7DA1FF0113FECD400BF472F /* alignment.cpp */,
                                A724C2B71254961E006BB1C7 /* parsefastaqcommand.cpp */,
                                A7DA20BD113FECD400BF472F /* parselistscommand.h */,
                                A7DA20BC113FECD400BF472F /* parselistscommand.cpp */,
-                               A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */,
-                               A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */,
                                A7DA20C1113FECD400BF472F /* parsimonycommand.h */,
                                A7DA20C0113FECD400BF472F /* parsimonycommand.cpp */,
                                A7DA20C3113FECD400BF472F /* pcacommand.h */,
index ee449cc9fd30f7ec521a03311473a805739159d1..78c9f9e9e24d0138b158791d7d3bce2f39eca026 100644 (file)
@@ -44,7 +44,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num
        }
 }
 //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, int k, int ms, int mms, int win, float div, 
+ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, 
                                                         int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
        try {
                fastafile = file; templateSeqs = readSeqs(fastafile);
@@ -64,6 +64,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode,
                increment = inc;
                numWanted = numw;
                realign = r; 
+               includeAbunds = abunds;
                
                //read name file and create nameMapRank
                readNameFile(name);
@@ -283,10 +284,26 @@ vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
                //create list of names we want to put into the template
                set<string> namesToAdd;
                for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
-                       if ((itRank->second).size() > thisRank) {
-                               //you are more abundant than me
-                               for (int i = 0; i < (itRank->second).size(); i++) {
-                                       namesToAdd.insert((itRank->second)[i]);
+                       if (itRank->first != thisName) {
+                               if (includeAbunds == "greaterequal") {
+                                       if ((itRank->second).size() >= thisRank) {
+                                               //you are more abundant than me or equal to my abundance
+                                               for (int i = 0; i < (itRank->second).size(); i++) {
+                                                       namesToAdd.insert((itRank->second)[i]);
+                                               }
+                                       }
+                               }else if (includeAbunds == "greater") {
+                                       if ((itRank->second).size() > thisRank) {
+                                               //you are more abundant than me
+                                               for (int i = 0; i < (itRank->second).size(); i++) {
+                                                       namesToAdd.insert((itRank->second)[i]);
+                                               }
+                                       }
+                               }else if (includeAbunds == "all") {
+                                       //add everyone
+                                       for (int i = 0; i < (itRank->second).size(); i++) {
+                                               namesToAdd.insert((itRank->second)[i]);
+                                       }
                                }
                        }
                }
index b6cae49f0709922ac9067ef1d39effab77d3d5be..fbe880fde4ed0723dff1e3ec8e982942360e993a 100644 (file)
@@ -26,7 +26,7 @@ class ChimeraSlayer : public Chimera {
        
        public:
                ChimeraSlayer(string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
-               ChimeraSlayer(string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
+               ChimeraSlayer(string, string, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
 
                ~ChimeraSlayer();
                
@@ -48,7 +48,7 @@ class ChimeraSlayer : public Chimera {
                map<string, vector<string> > nameMapRank;  //sequence name to rank so you can construct a template of the abundant sequences if the user uses itself as template
                
                vector<data_struct>  chimeraResults;
-               string chimeraFlags, searchMethod, fastafile;
+               string chimeraFlags, searchMethod, fastafile, includeAbunds;
                bool realign;
                int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment;
                float divR;
index 394172c34cc80feaa85f4ced1b6883d63058bb87..b94d4450e321df7af165cdcf917de1e7cd7d1a02 100644 (file)
@@ -14,7 +14,7 @@
 //**********************************************************************************************************************
 vector<string> ChimeraSlayerCommand::getValidParameters(){     
        try {
-               string AlignArray[] =  {"fasta", "processors", "name","window", "template","numwanted", "ksize", "match","mismatch", 
+               string AlignArray[] =  {"fasta", "processors", "name","window", "include","template","numwanted", "ksize", "match","mismatch", 
                        "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
                vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                return myArray;
@@ -69,7 +69,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "processors","name", "window", "template","numwanted", "ksize", "match","mismatch", 
+                       string Array[] =  {"fasta", "processors","name", "include","window", "template","numwanted", "ksize", "match","mismatch", 
                        "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir", "search","realign" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
@@ -231,6 +231,9 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "processors", false);                if (temp == "not found") { temp = "1"; }
                        convert(temp, processors);
                        
+                       includeAbunds = validParameter.validFile(parameters, "include", false);         if (includeAbunds == "not found") { includeAbunds = "greater"; }
+                       if ((includeAbunds != "greater") && (includeAbunds != "greaterequal") && (includeAbunds != "all")) { includeAbunds = "greater"; m->mothurOut("Invalid include setting. options are greater, greaterequal or all. using greater."); m->mothurOutEndLine(); }
+                       
                        temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
                        convert(temp, ksize);
                                                
@@ -346,7 +349,7 @@ int ChimeraSlayerCommand::execute(){
                                chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);   
                        }else {
                                if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
-                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); 
+                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);  
                                }else {
                                        
                                        m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
@@ -368,7 +371,7 @@ int ChimeraSlayerCommand::execute(){
                                        string nameFile = filenames["name"][0];
                                        fastaFileNames[s] = filenames["fasta"][0];
                        
-                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign); 
+                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);  
                                }
                        }
                                
index 9ff0c8ffb0753f799133b3ee54424f0a7aff8345..9567fcab31c20f22987d04eb0a2a409dd1dfc50b 100644 (file)
@@ -49,7 +49,7 @@ private:
        #endif
 
        bool abort, realign;
-       string fastafile, templatefile, outputDir, search, namefile;
+       string fastafile, templatefile, outputDir, search, namefile, includeAbunds;
        int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
        float divR;
        Chimera* chimera;
index 7d47491f7e32a3dc2adf518aa5d97cc2265f7121..5a7b23712c3c3503c44e31e2276123fb7b5c3c56 100644 (file)
@@ -65,7 +65,6 @@
 #include "otuhierarchycommand.h"
 #include "setdircommand.h"
 #include "parselistscommand.h"
-#include "parsesffcommand.h"
 #include "chimeraccodecommand.h"
 #include "chimeracheckcommand.h"
 #include "chimeraslayercommand.h"
@@ -186,7 +185,6 @@ CommandFactory::CommandFactory(){
        commands["set.dir"]                             = "set.dir";
        commands["merge.files"]                 = "merge.files";
        commands["parse.list"]                  = "parse.list";
-       commands["parse.sff"]                   = "parse.sff";
        commands["set.logfile"]                 = "set.logfile";
        commands["phylo.diversity"]             = "phylo.diversity";
        commands["make.group"]                  = "make.group";
@@ -340,7 +338,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "set.dir")                               {       command = new SetDirectoryCommand(optionString);                        }
                else if(commandName == "set.logfile")                   {       command = new SetLogFileCommand(optionString);                          }
                else if(commandName == "parse.list")                    {       command = new ParseListCommand(optionString);                           }
-               else if(commandName == "parse.sff")                             {       command = new ParseSFFCommand(optionString);                            }
                else if(commandName == "phylo.diversity")               {       command = new PhyloDiversityCommand(optionString);                      }
                else if(commandName == "make.group")                    {       command = new MakeGroupCommand(optionString);                           }
                else if(commandName == "chop.seqs")                             {       command = new ChopSeqsCommand(optionString);                            }
@@ -465,7 +462,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "set.dir")                               {       pipecommand = new SetDirectoryCommand(optionString);                    }
                else if(commandName == "set.logfile")                   {       pipecommand = new SetLogFileCommand(optionString);                              }
                else if(commandName == "parse.list")                    {       pipecommand = new ParseListCommand(optionString);                               }
-               else if(commandName == "parse.sff")                             {       pipecommand = new ParseSFFCommand(optionString);                                }
                else if(commandName == "phylo.diversity")               {       pipecommand = new PhyloDiversityCommand(optionString);                  }
                else if(commandName == "make.group")                    {       pipecommand = new MakeGroupCommand(optionString);                               }
                else if(commandName == "chop.seqs")                             {       pipecommand = new ChopSeqsCommand(optionString);                                }
@@ -577,7 +573,6 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "set.dir")                               {       shellcommand = new SetDirectoryCommand();                       }
                else if(commandName == "set.logfile")                   {       shellcommand = new SetLogFileCommand();                         }
                else if(commandName == "parse.list")                    {       shellcommand = new ParseListCommand();                          }
-               else if(commandName == "parse.sff")                             {       shellcommand = new ParseSFFCommand();                           }
                else if(commandName == "phylo.diversity")               {       shellcommand = new PhyloDiversityCommand();                     }
                else if(commandName == "make.group")                    {       shellcommand = new MakeGroupCommand();                          }
                else if(commandName == "chop.seqs")                             {       shellcommand = new ChopSeqsCommand();                           }
index ccdd7402389d0f761e069ff1d3171c5cdfcb3824..429ef141b54b621bb9bff43904e78038c31df438 100644 (file)
@@ -185,7 +185,17 @@ CorrAxesCommand::CorrAxesCommand(string option)  {
 
 void CorrAxesCommand::help(){
        try {
-
+               m->mothurOut("The corr.axes command reads a shared or relabund file as well as a pcoa file and calculates the correlation coefficient.\n");
+               m->mothurOut("The corr.axes command parameters are shared, relabund, axes, metadata, groups, method, numaxes and label.  The shared or relabund and axes parameters are required.  If shared is given the relative abundance is calculated.\n");
+               m->mothurOut("The metadata parameter.....\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups you would like included. The group names are separated by dashes.\n");
+               m->mothurOut("The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n");
+               m->mothurOut("The method parameter allows you to select what method you would like to use. Options are pearson, spearman and kendall. Default=pearson.\n");
+               m->mothurOut("The numaxes parameter allows you to select the number of axes you would like to use. Default=3.\n");
+               m->mothurOut("The corr.axes command should be in the following format: corr.axes(axes=yourPcoaFile, shared=yourSharedFile, method=yourMethod).\n");
+               m->mothurOut("Example corr.axes(axes=genus.pool.thetayc.genus.lt.pcoa, shared=genus.pool.shared, method=kendall).\n");
+               m->mothurOut("The corr.axes command outputs a .corr.axes file.\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
        }
        catch(exception& e) {
                m->errorOut(e, "CorrAxesCommand", "help");      
@@ -254,7 +264,7 @@ int CorrAxesCommand::execute(){
                // calc the r values                                                                                                                            //
                /************************************************************************************/
                
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + "corr.axes";
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputFileName)) + method + ".corr.axes";
                outputNames.push_back(outputFileName); outputTypes["corr.axes"].push_back(outputFileName);      
                ofstream out;
                m->openOutputFile(outputFileName, out);
@@ -478,21 +488,20 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                //sort each axis
                for (int i = 0; i < numaxes; i++) {  sort(scores[i].begin(), scores[i].end(), compareSpearman); }
                
-               //find ranks of xi in each axis
-               map<string, vector<float> > rankAxes;
+               //convert scores to ranks of xi in each axis
                for (int i = 0; i < numaxes; i++) {
                        
-                       vector<spearmanRank> ties;
+                       vector<spearmanRank*> ties;
                        int rankTotal = 0;
                        for (int j = 0; j < scores[i].size(); j++) {
                                rankTotal += j;
-                               ties.push_back(scores[i][j]);
+                               ties.push_back(&(scores[i][j]));
                                
                                if (j != scores.size()-1) { // you are not the last so you can look ahead
                                        if (scores[i][j].score != scores[i][j+1].score) { // you are done with ties, rank them and continue
                                                for (int k = 0; k < ties.size(); k++) {
                                                        float thisrank = rankTotal / (float) ties.size();
-                                                       rankAxes[ties[k].name].push_back(thisrank);
+                                                       (*ties[k]).score = thisrank;
                                                }
                                                ties.clear();
                                                rankTotal = 0;
@@ -500,17 +509,15 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                }else { // you are the last one
                                        for (int k = 0; k < ties.size(); k++) {
                                                float thisrank = rankTotal / (float) ties.size();
-                                               rankAxes[ties[k].name].push_back(thisrank);
+                                               (*ties[k]).score = thisrank;
                                        }
                                }
                        }
                }
                
-               
-               
                //for each otu
                for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) {
-                       
+               
                        out << i+1 << '\t';
                        
                        //find the ranks of this otu - Y
@@ -519,7 +526,7 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                spearmanRank member(lookupFloat[j]->getGroup(), lookupFloat[j]->getAbundance(i));
                                otuScores.push_back(member);
                        }
-                       
+                                               
                        sort(otuScores.begin(), otuScores.end(), compareSpearman);
                        
                        map<string, float> rankOtus;
@@ -546,37 +553,36 @@ int CorrAxesCommand::calcKendall(map<string, vector<float> >& axes, ofstream& ou
                                }
                        }
                        
-                       //calc kendall coeffient for each axis for this otu
+                       //calc spearman ranks for each axis for this otu
                        for (int j = 0; j < numaxes; j++) {
+                       
+                               int P = 0;
+                               //assemble otus ranks in same order as axis ranks
+                               vector<spearmanRank> otus; 
+                               for (int l = 0; l < scores[j].size(); l++) {   
+                                       spearmanRank member(scores[j][l].name, rankOtus[scores[j][l].name]);
+                                       otus.push_back(member);
+                               }
                                
-                               int numConcordant = 0;
-                               int numDiscordant = 0;
-                               
-                               for (int f = 0; f < j; f++) {
+                               for (int l = 0; l < scores[j].size(); l++) {
                                        
-                                       for (int k = 0; k < lookupFloat.size(); k++) {
-                                               
-                                               float xi = rankAxes[lookupFloat[k]->getGroup()][j];
-                                               float yi = rankOtus[lookupFloat[k]->getGroup()];
-                                               
-                                               for (int h = 0; h < k; h++) {
-                                                       
-                                                       float xj = rankAxes[lookupFloat[k]->getGroup()][f];
-                                                       float yj = rankOtus[lookupFloat[h]->getGroup()];
-                                                       
-                                                       if ( ((xi > xj) && (yi < yj)) || ((xi < xj) && (yi > yj)) ){  numDiscordant++;  }
-                                                       if ( ((xi > xj) && (yi > yj)) || ((xi < xj) && (yi < yj)) ){  numConcordant++;  }
-                                               }
+                                       int numWithHigherRank = 0;
+                                       float thisrank = otus[l].score;
+                                       
+                                       for (int u = l; u < scores[j].size(); u++) {
+                                               if (otus[u].score > thisrank) { numWithHigherRank++; }
                                        }
+                                       
+                                       P += numWithHigherRank;
                                }
                                
                                int n = lookupFloat.size();
-                               double p = (numConcordant - numDiscordant) / (float) (0.5 * n * (n - 1));
+                               
+                               double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0;
                                
                                out << p << '\t';
                        }
                        
-                       
                        out << endl;
                }
                
@@ -840,6 +846,8 @@ map<string, vector<float> > CorrAxesCommand::readAxes(){
                        }else { done = true; }
                }
                
+               if (numaxes > count) { m->mothurOut("You requested " + toString(numaxes) + " axes, but your file only includes " + toString(count) + ". Using " + toString(count) + "."); m->mothurOutEndLine(); numaxes = count; }
+               
                while (!in.eof()) {
                        
                        if (m->control_pressed) { in.close(); return axes; }
index d40dcc6a30d20080770ac806855295f98ed83df3..a16accbb8ac724b1fce6579a7bcad9b2f5b261c7 100644 (file)
@@ -779,6 +779,30 @@ int GetSeqsCommand::compareAccnos(){
                set<string> namesDups;
                set<string> namesAccnos = names;
                
+               map<string, int> nameCount;
+               
+               if (namefile != "") {
+                       ifstream inName;
+                       m->openInputFile(namefile, inName);
+                       
+                       
+                       while(!inName.eof()){
+                               
+                               if (m->control_pressed) { inName.close(); return 0; }
+                               
+                               string thisname, repnames;
+                               
+                               inName >> thisname;             m->gobble(inName);              //read from first column
+                               inName >> repnames;                     //read from second column
+                               
+                               int num = m->getNumNames(repnames);
+                               nameCount[thisname] = num;
+                               
+                               m->gobble(inName);
+                       }
+                       inName.close(); 
+               }
+               
                while(!in.eof()){
                        in >> name;
                        
@@ -797,21 +821,27 @@ int GetSeqsCommand::compareAccnos(){
                m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
                
                for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
-                       out << (*it) << endl;
+                       out << (*it);
+                       if (namefile != "") { out << '\t' << nameCount[(*it)]; }
+                       out << endl;
                }
                
                out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
                m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
                
                for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
-                       out << (*it) << endl;
+                       out << (*it);
+                       if (namefile != "") { out << '\t' << nameCount[(*it)]; }
+                       out << endl;
                }
                
                out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
                m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
                
                for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
-                       out << (*it) << endl;
+                       out << (*it);
+                       if (namefile != "") { out << '\t' << nameCount[(*it)]; }
+                       out << endl;
                }
 
                out.close(); 
index 5eb8daaab6d8163b1b48852ccef7e21a406b28dc..266c0c347d82ad54aa55b2c0edf5ae8a5a3d6c03 100644 (file)
--- a/makefile
+++ b/makefile
@@ -58,7 +58,7 @@ ifeq  ($(strip $(USEREADLINE)),yes)
       -lncurses
 endif
 
-USEMPI ?= yes
+USEMPI ?= no
 
 ifeq  ($(strip $(USEMPI)),yes)
     CXX = mpic++
diff --git a/mothur b/mothur
index a425fb1254c4a3431571e1bde7e23bc48e8de236..2619f5bf96b7405d9891d4c425f6bddc0d10c23e 100755 (executable)
Binary files a/mothur and b/mothur differ
diff --git a/parsesffcommand.cpp b/parsesffcommand.cpp
deleted file mode 100644 (file)
index e2f9d61..0000000
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- *  parsesffcommand.cpp
- *  Mothur
- *
- *  Created by Pat Schloss on 2/6/10.
- *  Copyright 2010 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "parsesffcommand.h"
-#include "sequence.hpp"
-
-//**********************************************************************************************************************
-vector<string> ParseSFFCommand::getValidParameters(){  
-       try {
-               string Array[] =  {"sff", "oligos", "minlength", "outputdir", "inputdir"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "getValidParameters");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-ParseSFFCommand::ParseSFFCommand(){    
-       try {
-               abort = true;
-               //initialize outputTypes
-               vector<string> tempOutNames;
-               outputTypes["flow"] = tempOutNames;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-vector<string> ParseSFFCommand::getRequiredParameters(){       
-       try {
-               string Array[] =  {"sff"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "getRequiredParameters");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-vector<string> ParseSFFCommand::getRequiredFiles(){    
-       try {
-               vector<string> myArray;
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "getRequiredFiles");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-
-ParseSFFCommand::ParseSFFCommand(string option){
-       try {
-               abort = false;
-               
-               if(option == "help") {
-                       help();
-                       abort = true; 
-               }
-               else {
-                       //valid paramters for this command
-                       string Array[] =  {"sff", "oligos", "minlength", "outputdir", "inputdir"};
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-                       
-                       OptionParser parser(option);
-                       map<string,string> parameters = parser.getParameters();
-                       
-                       ValidParameters validParameter;
-                       map<string,string>::iterator it;
-
-                       //check to make sure all parameters are valid for command
-                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
-                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
-                       }
-                       
-                       //initialize outputTypes
-                       vector<string> tempOutNames;
-                       outputTypes["flow"] = tempOutNames;
-                       
-                       //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
-                       if (inputDir == "not found"){   inputDir = "";          }
-                       else {
-                               string path;
-                               it = parameters.find("sff");
-                               //user has given a template file
-                               if(it != parameters.end()){ 
-                                       path = m->hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["sff"] = inputDir + it->second;              }
-                               }
-                               
-                               it = parameters.find("oligos");
-                               //user has given an oligos file
-                               if(it != parameters.end()){ 
-                                       path = m->hasPath(it->second);
-                                       //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
-                               }
-                       }
-                       
-                       
-                       //check for required parameters
-                       sffFile = validParameter.validFile(parameters, "sff", true);
-                       if (sffFile == "not found"){
-                               m->mothurOut("sff is a required parameter for the parse.sff command.");
-                               m->mothurOutEndLine();
-                               abort = true;
-                       }
-                       else if (sffFile == "not open")         {       abort = true;   }       
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);
-                       if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += m->hasPath(sffFile); //if user entered a file with a path then preserve it 
-                       }
-
-                       //check for optional parameter and set defaults
-                       // ...at some point should added some additional type checking...                       
-                       oligoFile = validParameter.validFile(parameters, "oligos", true);
-                       if (oligoFile == "not found")   {       oligoFile = "";         }
-                       else if(oligoFile == "not open"){       abort = true;           } 
-                       
-                       string temp = validParameter.validFile(parameters, "minlength", false);
-                       if (temp == "not found") { temp = "0"; }
-                       convert(temp, minLength); 
-               }               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "ParseSFFCommand");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-ParseSFFCommand::~ParseSFFCommand()    {       /*      do nothing      */      }
-
-//**********************************************************************************************************************
-
-int ParseSFFCommand::execute(){
-       try {
-               if (abort == true) {    return 0;       }
-
-               ifstream inSFF;
-               m->openInputFile(sffFile, inSFF);
-               
-               cout.setf(ios::fixed, ios::floatfield);
-               cout.setf(ios::showpoint);
-               cout << setprecision(2);
-                       
-               vector<ofstream*> flowFileNames;
-               if(oligoFile != ""){
-                       getOligos(flowFileNames);
-               }
-               else{
-                       flowFileNames.push_back(new ofstream((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow").c_str(), ios::ate));
-                       outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow")); outputTypes["flow"].push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + "flow"));
-               }
-               
-               for(int i=0;i<flowFileNames.size();i++){
-                       flowFileNames[i]->setf(ios::fixed, ios::floatfield);
-                       flowFileNames[i]->setf(ios::showpoint);
-                       *flowFileNames[i] << setprecision(2);
-               }                       
-               
-               if (m->control_pressed) { outputTypes.clear(); for(int i=0;i<flowFileNames.size();i++){ flowFileNames[i]->close();  } return 0; }
-               
-//             ofstream fastaFile;
-//             m->openOutputFile(m->getRootName(sffFile) + "fasta", fastaFile);
-
-//             ofstream qualFile;
-//             m->openOutputFile(m->getRootName(sffFile) + "qual", qualFile);
-               
-               string commonHeader = m->getline(inSFF);
-               string magicNumber = m->getline(inSFF);         
-               string version = m->getline(inSFF);
-               string indexOffset = m->getline(inSFF);
-               string indexLength = m->getline(inSFF);
-               int numReads = parseHeaderLineToInt(inSFF);
-               string headerLength = m->getline(inSFF);
-               string keyLength = m->getline(inSFF);
-               int numFlows = parseHeaderLineToInt(inSFF);
-               string flowgramCode = m->getline(inSFF);
-               string flowChars = m->getline(inSFF);
-               string keySequence = m->getline(inSFF);
-               m->gobble(inSFF);
-
-               string seqName;
-               bool good = 0;
-               
-               for(int i=0;i<numReads;i++){
-                       
-                       if (m->control_pressed) { outputTypes.clear(); for(int i=0;i<flowFileNames.size();i++){ flowFileNames[i]->close();  } return 0; }
-                       
-                       inSFF >> seqName;
-                       seqName = seqName.substr(1);
-                       m->gobble(inSFF);
-                       
-                       string runPrefix = parseHeaderLineToString(inSFF);
-                       string regionNumber = parseHeaderLineToString(inSFF);
-                       string xyLocation = parseHeaderLineToString(inSFF);
-                       m->gobble(inSFF);
-                       
-                       string runName = parseHeaderLineToString(inSFF);
-                       string analysisName = parseHeaderLineToString(inSFF);
-                       string fullPath = parseHeaderLineToString(inSFF);
-                       m->gobble(inSFF);
-                       
-                       string readHeaderLen = parseHeaderLineToString(inSFF);
-                       string nameLength = parseHeaderLineToString(inSFF);
-                       int numBases = parseHeaderLineToInt(inSFF);
-                       string clipQualLeft = parseHeaderLineToString(inSFF);
-                       int clipQualRight = parseHeaderLineToInt(inSFF);
-                       string clipAdapLeft = parseHeaderLineToString(inSFF);
-                       string clipAdapRight = parseHeaderLineToString(inSFF);
-                       m->gobble(inSFF);
-                       
-                       vector<float> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);
-                       vector<int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases);
-                       string bases = parseHeaderLineToString(inSFF);
-                       string qualityScores = parseHeaderLineToString(inSFF);
-                       m->gobble(inSFF);
-                       
-
-                       
-                       int flowLength = flowIndices[clipQualRight-1];
-                                               
-                       screenFlow(flowVector, flowLength);
-                       string sequence = flow2seq(flowVector, flowLength);
-                       
-                       int group = 0;
-       
-                       if(minLength != 0 || numFPrimers != 0  || numBarcodes != 0 || numRPrimers != 0){                
-                               good = screenSeq(sequence, group);
-                       }
-
-                       if(good){
-                               *flowFileNames[group] << seqName << ' ' << flowLength;
-                               for(int i=0;i<numFlows;i++){
-                                       *flowFileNames[group] << ' ' << flowVector[i];
-                               }
-                               *flowFileNames[group] << endl;                          
-                       }
-                       
-//                     string fastaHeader = '>' + seqName + "\tregion=" + regionNumber + " xy=" + xyLocation;
-//                     fastaFile << fastaHeader << endl;
-//                     fastaFile << stripSeqQual(bases, clipQualLeft, clipQualRight) << endl;
-//
-//                     qualFile << fastaHeader << endl;
-//                     qualFile << stripQualQual(qualityScores, clipQualLeft, clipQualRight) << endl;
-
-               }
-               for(int i=0;i<flowFileNames.size();i++){
-                       flowFileNames[i]->close();
-               }
-
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
-
-//             fastaFile.close();
-//             qualFile.close();
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "execute");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-void ParseSFFCommand::help(){
-       try {
-               m->mothurOut("The parse.sff command...");
-               m->mothurOutEndLine();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "help");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-void ParseSFFCommand::getOligos(vector<ofstream*>& outSFFFlowVec){
-       try {
-
-               ifstream inOligos;
-               m->openInputFile(oligoFile, inOligos);
-               
-               string type, oligo, group;
-               
-               int index = 0;
-               
-               while(!inOligos.eof()){
-                       inOligos >> type;
-
-                       if(type[0] == '#'){     m->getline(inOligos);   } // get rest of line if there's any crap there
-                       else{
-                               inOligos >> oligo;
-                               
-                               for(int i=0;i<oligo.length();i++){
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                               if(type == "forward"){
-                                       forPrimer.push_back(oligo);
-                               }
-                               else if(type == "reverse"){
-                                       Sequence oligoRC("reverse", oligo);
-                                       oligoRC.reverseComplement();
-                                       revPrimer.push_back(oligoRC.getUnaligned());
-                               }
-                               else if(type == "barcode"){
-                                       inOligos >> group;
-                                       barcodes[oligo]=index++;
-                                       groupVector.push_back(group);
-                                       
-                                       outSFFFlowVec.push_back(new ofstream((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + ".flow").c_str(), ios::ate));
-                                       outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + "flow"));
-                                       outputTypes["flow"].push_back((outputDir + m->getRootName(m->getSimpleName(sffFile)) + group + "flow"));
-                               }
-                       }
-                       m->gobble(inOligos);
-               }
-               
-               inOligos.close();
-               
-               numFPrimers = forPrimer.size();
-               numRPrimers = revPrimer.size();
-               numBarcodes = barcodes.size();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "getOligos");
-               exit(1);
-       }
-       
-}
-
-//**********************************************************************************************************************
-
-int ParseSFFCommand::parseHeaderLineToInt(ifstream& file){
-       
-       int number;
-
-       while (!file.eof())     {
-
-               char c = file.get(); 
-               if (c == ':'){
-                       file >> number;
-                       break;
-               }
-               
-       }
-       m->gobble(file);
-       return number;
-}
-
-//**********************************************************************************************************************
-
-string ParseSFFCommand::parseHeaderLineToString(ifstream& file){
-       
-       string text;
-       
-       while (!file.eof())     {
-               char c = file.get(); 
-               
-               if (c == ':'){
-                       m->gobble(file);
-                       text = m->getline(file);                        
-                       break;
-               }
-       }
-       m->gobble(file);
-
-       return text;
-}
-
-//**********************************************************************************************************************
-
-vector<float> ParseSFFCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
-       
-       vector<float> floatVector(length);
-       
-       while (!file.eof())     {
-               char c = file.get(); 
-               if (c == ':'){
-                       for(int i=0;i<length;i++){
-                               file >> floatVector[i];
-                       }
-                       break;
-               }
-       }
-       m->gobble(file);        
-       return floatVector;
-}
-
-//**********************************************************************************************************************
-
-vector<int> ParseSFFCommand::parseHeaderLineToIntVector(ifstream& file, int length){
-       
-       vector<int> intVector(length);
-       
-       while (!file.eof())     {
-               char c = file.get(); 
-               if (c == ':'){
-                       for(int i=0;i<length;i++){
-                               file >> intVector[i];
-                       }
-                       break;
-               }
-       }
-       m->gobble(file);        
-       return intVector;
-}
-
-//**********************************************************************************************************************
-
-
-void ParseSFFCommand::screenFlow(vector<float> flowgram, int& length){
-       try{
-
-               int newLength = 0;
-
-               while(newLength * 4 < length){
-                       
-                       int signal = 0;
-                       int noise = 0;
-                       for(int i=0;i<4;i++){
-                               float flow = flowgram[i + 4 * newLength];
-
-                               if(flow > 0.50){
-                                       signal++;
-                                       if(flow <= 0.69){ // not sure why, but if i make it <0.70 it doesn't work...
-                                               noise++;
-                                       }
-                               }
-                       }
-                       if(noise > 0 || signal == 0){
-                               break;
-                       }                       
-                       newLength++;
-               }
-               length = newLength * 4;
-       }
-       
-       catch(exception& e) {
-               m->errorOut(e, "ParseSFFCommand", "screenFlow");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-string ParseSFFCommand::flow2seq(vector<float> flowgram, int length){
-
-       string flow = "TACG";
-       string sequence = "";
-       for(int i=8;i<length;i++){
-               int signal = int(flowgram[i] + 0.5);
-               char base = flow[ i % 4 ];
-               for(int j=0;j<signal;j++){
-                       sequence += base;
-               }
-       }
-       return sequence;
-}
-
-//**********************************************************************************************************************
-
-bool ParseSFFCommand::screenSeq(string& sequence, int& group){
-
-       int length = 1;
-       group = -1;
-       
-       if(sequence.length() < minLength){      length = 0;     }
-       
-       int barcode = 1;
-       int barcodeLength = 0;
-
-       for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-               if(compareDNASeq(it->first, sequence.substr(0,(it->first).length()))){
-                       barcode = 1;
-                       barcodeLength = (it->first).size();
-                       group = it->second;
-                       break;
-               }
-               else{
-                       barcode = 0;
-               }
-       }
-       
-       int fPrimer = 1;
-       for(int i=0;i<numFPrimers;i++){
-               if(compareDNASeq(forPrimer[i], sequence.substr(barcodeLength,forPrimer[i].length()))){
-                       fPrimer = 1;
-                       break;
-               }
-               else{
-                       fPrimer = 0;
-               }
-       }
-
-       int rPrimer = 1;
-       for(int i=0;i<numRPrimers;i++){
-               if(compareDNASeq(revPrimer[i], sequence.substr(sequence.length()-revPrimer[i].length(),revPrimer[i].length()))){
-                       rPrimer = 1;
-                       break;
-               }
-               else{
-                       rPrimer = 0;
-               }
-       }
-
-       return fPrimer * rPrimer * length * barcode;
-               
-}
-
-//**********************************************************************************************************************
-          
-bool ParseSFFCommand::compareDNASeq(string oligo, string seq){
-   try {
-          bool success = 1;
-          int length = oligo.length();
-          
-          for(int i=0;i<length;i++){
-                  
-                  if(oligo[i] != seq[i]){
-                          if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')         {       success = 0;    }
-                          else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                     {       success = 0;    }
-                          else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                         {       success = 0;    }
-                          else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                         {       success = 0;    }
-                          else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                         {       success = 0;    }
-                          else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                         {       success = 0;    }
-                          else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                         {       success = 0;    }
-                          else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                         {       success = 0;    }
-                          else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))        {       success = 0;    }
-                          else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))        {       success = 0;    }
-                          else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))        {       success = 0;    }
-                          else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))        {       success = 0;    }                       
-                          
-                          if(success == 0)     {       break;  }
-                  }
-                  else{
-                          success = 1;
-                  }
-          }
-          
-          return success;
-   }
-   catch(exception& e) {
-          m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
-          exit(1);
-   }
-}
-          
-//**********************************************************************************************************************
-
-//string ParseSFFCommand::stripSeqQual(string qScores, int start, int end){
-//     
-//     
-//     return qScores.substr(start-1, end-start+1);
-//
-//}
-
-//**********************************************************************************************************************
-
-//string ParseSFFCommand::stripQualQual(string qScores, int start, int end){
-//     
-//     start--;
-//     
-//     int startCount = 0;
-//     int startIndex = 0;
-//     
-//     while(startCount < start && startIndex < qScores.length()){
-//             if(isspace(qScores[startIndex])){
-//                     startCount++;
-//             }
-//        startIndex++;
-//     }
-//     
-//     int endCount = startCount;
-//     int endIndex = startIndex;
-//     
-//     while(endCount < end && endIndex < qScores.length()){
-//             if(isspace(qScores[endIndex])){
-//                     endCount++;
-//             }
-//             endIndex++;
-//     }
-//     
-//   return qScores.substr(startIndex, endIndex-startIndex-1);//, endCount-startCount);
-//     
-//}
-
-//**********************************************************************************************************************
-
-
diff --git a/parsesffcommand.h b/parsesffcommand.h
deleted file mode 100644 (file)
index 9dfa083..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef PARSESFFCOMMAND_H
-#define PARSESFFCOMMAND_H
-
-/*
- *  parsesffcommand.h
- *  Mothur
- *
- *  Created by Pat Schloss on 2/6/10.
- *  Copyright 2010 Patrick D. Schloss. All rights reserved.
- *
- */
-
-
-#include "command.hpp"
-
-class ParseSFFCommand : public Command {
-public:
-       ParseSFFCommand(string);
-       ParseSFFCommand();
-       ~ParseSFFCommand();
-       vector<string> getRequiredParameters();
-       vector<string> getValidParameters();
-       vector<string> getRequiredFiles();
-       map<string, vector<string> > getOutputFiles() { return outputTypes; }
-       int execute();
-       void help();    
-       
-private:
-
-       int parseHeaderLineToInt(ifstream&);
-       vector<float> parseHeaderLineToFloatVector(ifstream&, int);
-       vector<int> parseHeaderLineToIntVector(ifstream&, int);
-       string parseHeaderLineToString(ifstream&);
-       void screenFlow(vector<float>, int&);
-       string flow2seq(vector<float>, int);
-       bool screenSeq(string&, int&);
-       bool compareDNASeq(string, string);
-       void getOligos(vector<ofstream*>&);
-       
-       
-       string sffFile;
-       string oligoFile;
-
-       int minLength;
-       int numFPrimers, numRPrimers, numBarcodes;
-       vector<string> forPrimer, revPrimer;
-       map<string, int> barcodes;
-       vector<string> groupVector;
-       vector<string> outputNames;
-       map<string, vector<string> > outputTypes;
-
-//     string stripSeqQual(string, int, int);
-//     string stripQualQual(string, int, int);
-       
-       string outputDir;
-       bool abort;
-};
-
-#endif
-
-
index 3562dcc3e88265a93c23af38fa69b3b2c776e4c1..c9bc469cf80c0871e7a4e8868ae1866960f4afcd 100644 (file)
@@ -41,7 +41,7 @@ SffInfoCommand::SffInfoCommand(){
 //**********************************************************************************************************************
 vector<string> SffInfoCommand::getRequiredParameters(){        
        try {
-               string Array[] =  {"sff"};
+               string Array[] =  {"sff", "sfftxt", "or"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -99,7 +99,7 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
 
                        sffFilename = validParameter.validFile(parameters, "sff", false);
-                       if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
+                       if (sffFilename == "not found") { sffFilename = "";  }
                        else { 
                                m->splitAtDash(sffFilename, filenames);
                                
@@ -221,8 +221,27 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
                        trim = m->isTrue(temp); 
                        
-                       temp = validParameter.validFile(parameters, "sfftxt", false);                           if (temp == "not found"){       temp = "F";                             }
-                       sfftxt = m->isTrue(temp); 
+                       temp = validParameter.validFile(parameters, "sfftxt", false);                           
+                       if (temp == "not found")        {       temp = "F";      sfftxt = false; sfftxtFilename = "";           }
+                       else if (m->isTrue(temp))       {       sfftxt = true;          sfftxtFilename = "";                            }
+                       else {
+                               //you are a filename
+                               if (inputDir != "") {
+                                       map<string,string>::iterator it = parameters.find("sfftxt");
+                                       //user has given a template file
+                                       if(it != parameters.end()){ 
+                                               string path = m->hasPath(it->second);
+                                               //if the user has not given a path then, add inputdir. else leave path alone.
+                                               if (path == "") {       parameters["sfftxt"] = inputDir + it->second;           }
+                                       }
+                               }
+                               
+                               sfftxtFilename = validParameter.validFile(parameters, "sfftxt", true);
+                               if (sfftxtFilename == "not found") { sfftxtFilename = "";  }
+                               else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
+                       }
+                       
+                       if ((sfftxtFilename == "") && (filenames.size() == 0)) {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
                }
        }
        catch(exception& e) {
@@ -234,13 +253,14 @@ SffInfoCommand::SffInfoCommand(string option)  {
 
 void SffInfoCommand::help(){
        try {
-               m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data.\n");
+               m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n");
                m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n");
                m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n");
                m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n");
                m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n");
                m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n");
                m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n");
+               m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n");
                m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n");
                m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n");
                m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n");
@@ -277,6 +297,8 @@ int SffInfoCommand::execute(){
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");
                }
                
+               if (sfftxtFilename != "") {  parseSffTxt(); }
+               
                if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
                
                //report output filenames
@@ -823,4 +845,233 @@ int SffInfoCommand::readAccnosFile(string filename) {
                exit(1);
        }
 }
-//**********************************************************************************************************************/
+//**********************************************************************************************************************
+int SffInfoCommand::parseSffTxt() {
+       try {
+               
+               ifstream inSFF;
+               m->openInputFile(sfftxtFilename, inSFF);
+               
+               if (outputDir == "") {  outputDir += m->hasPath(sfftxtFilename); }
+               
+               //output file names
+               ofstream outFasta, outQual, outFlow;
+               string outFastaFileName, outQualFileName;
+               string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "flow";
+               if (trim) {
+                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "fasta";
+                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "qual";
+               }else{
+                       outFastaFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.fasta";
+                       outQualFileName = outputDir + m->getRootName(m->getSimpleName(sfftxtFilename)) + "raw.qual";
+               }
+               
+               if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
+               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
+               if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
+               
+               //read common header
+               string commonHeader = m->getline(inSFF);
+               string magicNumber = m->getline(inSFF); 
+               string version = m->getline(inSFF);
+               string indexOffset = m->getline(inSFF);
+               string indexLength = m->getline(inSFF);
+               int numReads = parseHeaderLineToInt(inSFF);
+               string headerLength = m->getline(inSFF);
+               string keyLength = m->getline(inSFF);
+               int numFlows = parseHeaderLineToInt(inSFF);
+               string flowgramCode = m->getline(inSFF);
+               string flowChars = m->getline(inSFF);
+               string keySequence = m->getline(inSFF);
+               m->gobble(inSFF);
+               
+               string seqName;
+               
+               if (flow)       {       outFlow << numFlows << endl;    }
+               
+               for(int i=0;i<numReads;i++){
+                       
+                       //sanity check
+                       if (inSFF.eof()) { m->mothurOut("[ERROR]: Expected " + toString(numReads) + " but reached end of file at " + toString(i+1) + "."); m->mothurOutEndLine(); break; }
+                       
+                       Header header;
+                       
+                       //parse read header
+                       inSFF >> seqName;
+                       seqName = seqName.substr(1);
+                       m->gobble(inSFF);
+                       header.name = seqName;
+                       
+                       string runPrefix = parseHeaderLineToString(inSFF);              header.timestamp = runPrefix;
+                       string regionNumber = parseHeaderLineToString(inSFF);   header.region = regionNumber;
+                       string xyLocation = parseHeaderLineToString(inSFF);             header.xy = xyLocation;
+                       m->gobble(inSFF);
+                               
+                       string runName = parseHeaderLineToString(inSFF);
+                       string analysisName = parseHeaderLineToString(inSFF);
+                       string fullPath = parseHeaderLineToString(inSFF);
+                       m->gobble(inSFF);
+                       
+                       string readHeaderLen = parseHeaderLineToString(inSFF);  convert(readHeaderLen, header.headerLength);
+                       string nameLength = parseHeaderLineToString(inSFF);             convert(nameLength, header.nameLength);
+                       int numBases = parseHeaderLineToInt(inSFF);                             header.numBases = numBases;
+                       string clipQualLeft = parseHeaderLineToString(inSFF);   convert(clipQualLeft, header.clipQualLeft);
+                       int clipQualRight = parseHeaderLineToInt(inSFF);                header.clipQualRight = clipQualRight;
+                       string clipAdapLeft = parseHeaderLineToString(inSFF);   convert(clipAdapLeft, header.clipAdapterLeft);
+                       string clipAdapRight = parseHeaderLineToString(inSFF);  convert(clipAdapRight, header.clipAdapterRight);
+                       m->gobble(inSFF);
+                               
+                       seqRead read;
+                       
+                       //parse read
+                       vector<unsigned short> flowVector = parseHeaderLineToFloatVector(inSFF, numFlows);      read.flowgram = flowVector;
+                       vector<unsigned int> flowIndices = parseHeaderLineToIntVector(inSFF, numBases); 
+                       
+                       //adjust for print
+                       vector<unsigned int> flowIndicesAdjusted; flowIndicesAdjusted.push_back(flowIndices[0]);
+                       for (int j = 1; j < flowIndices.size(); j++) {   flowIndicesAdjusted.push_back(flowIndices[j] - flowIndices[j-1]);   }
+                       read.flowIndex = flowIndicesAdjusted;
+                       
+                       string bases = parseHeaderLineToString(inSFF);                                                                          read.bases = bases;
+                       vector<unsigned int> qualityScores = parseHeaderLineToIntVector(inSFF, numBases);       read.qualScores = qualityScores;
+                       m->gobble(inSFF);
+                                       
+                       //if you have provided an accosfile and this seq is not in it, then dont print
+                       bool print = true;
+                       if (seqNames.size() != 0) {   if (seqNames.count(header.name) == 0) { print = false; }  }
+                       
+                       //print 
+                       if (print) {
+                               if (fasta)      {       printFastaSeqData(outFasta, read, header);      }
+                               if (qual)       {       printQualSeqData(outQual, read, header);        }
+                               if (flow)       {       printFlowSeqData(outFlow, read, header);        }
+                       }
+                       
+                       //report progress
+                       if((i+1) % 10000 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
+                       
+                       if (m->control_pressed) {  break;  }
+               }
+               
+               //report progress
+               if (!m->control_pressed) {   if((numReads) % 10000 != 0){       m->mothurOut(toString(numReads)); m->mothurOutEndLine();                }  }
+               
+               inSFF.close();
+               
+               if (fasta)      {  outFasta.close();    }
+               if (qual)       {  outQual.close();             }
+               if (flow)       {  outFlow.close();             }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "parseSffTxt");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int SffInfoCommand::parseHeaderLineToInt(ifstream& file){
+       try {
+               int number;
+               
+               while (!file.eof())     {
+                       
+                       char c = file.get(); 
+                       if (c == ':'){
+                               file >> number;
+                               break;
+                       }
+                       
+               }
+               m->gobble(file);
+               return number;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToInt");
+               exit(1);
+       }
+       
+}
+
+//**********************************************************************************************************************
+
+string SffInfoCommand::parseHeaderLineToString(ifstream& file){
+       try {
+               string text;
+               
+               while (!file.eof())     {
+                       char c = file.get(); 
+                       
+                       if (c == ':'){
+                               //m->gobble(file);
+                               //text = m->getline(file);      
+                               file >> text;
+                               break;
+                       }
+               }
+               m->gobble(file);
+               
+               return text;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToString");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+vector<unsigned short> SffInfoCommand::parseHeaderLineToFloatVector(ifstream& file, int length){
+       try {
+               vector<unsigned short> floatVector(length);
+               
+               while (!file.eof())     {
+                       char c = file.get(); 
+                       if (c == ':'){
+                               float temp;
+                               for(int i=0;i<length;i++){
+                                       file >> temp;
+                                       floatVector[i] = temp * 100;
+                               }
+                               break;
+                       }
+               }
+               m->gobble(file);        
+               return floatVector;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToFloatVector");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+vector<unsigned int> SffInfoCommand::parseHeaderLineToIntVector(ifstream& file, int length){
+       try {
+               vector<unsigned int> intVector(length);
+               
+               while (!file.eof())     {
+                       char c = file.get(); 
+                       if (c == ':'){
+                               for(int i=0;i<length;i++){
+                                       file >> intVector[i];
+                               }
+                               break;
+                       }
+               }
+               m->gobble(file);        
+               return intVector;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "parseHeaderLineToIntVector");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+
+                               
+                               
index d8eb853dae6892711a9004ae9ad670032f47228e..d98edbd3e04f0213a24ed51471d09aab1b74d46a 100644 (file)
@@ -73,12 +73,13 @@ public:
        void help();
        
 private:
-       string sffFilename, outputDir, accnosName;
+       string sffFilename, sfftxtFilename, outputDir, accnosName;
        vector<string> filenames, outputNames, accnosFileNames;
        bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos;
        set<string> seqNames;
        map<string, vector<string> > outputTypes;
        
+       //extract sff file functions
        int extractSffInfo(string, string);
        int readCommonHeader(ifstream&, CommonHeader&);
        int readHeader(ifstream&, Header&);
@@ -92,7 +93,13 @@ private:
        int printFastaSeqData(ofstream&, seqRead&, Header&);
        int printQualSeqData(ofstream&, seqRead&, Header&);
        int readAccnosFile(string);
-               
+       int parseSffTxt();
+       
+       //parsesfftxt file functions
+       int parseHeaderLineToInt(ifstream&);
+       vector<unsigned short> parseHeaderLineToFloatVector(ifstream&, int);
+       vector<unsigned int> parseHeaderLineToIntVector(ifstream&, int);
+       string parseHeaderLineToString(ifstream&);
 };
 
 /**********************************************************/