]> git.donarmstrong.com Git - mothur.git/blobdiff - sffinfocommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / sffinfocommand.cpp
index 4a9271ce94df57f99e9734ba2b4d9d30d20872e9..fda9604fc9fd1128094076d315549accb77fab5e 100644 (file)
 #include "endiannessmacros.h"
 
 //**********************************************************************************************************************
-vector<string> SffInfoCommand::getValidParameters(){   
-       try {
-               string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+vector<string> SffInfoCommand::setParameters(){        
+       try {           
+               CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff);
+               CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
+               CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "",false,false); parameters.push_back(psfftxt);
+               CommandParameter pflow("flow", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflow);
+               CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
+               CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
+               CommandParameter pqfile("name", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqfile);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "getValidParameters");
+               m->errorOut(e, "SffInfoCommand", "setParameters");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-SffInfoCommand::SffInfoCommand(){      
+string SffInfoCommand::getHelpString(){        
        try {
-               abort = true; calledHelp = true; 
-               vector<string> tempOutNames;
-               outputTypes["fasta"] = tempOutNames;
-               outputTypes["flow"] = tempOutNames;
-               outputTypes["sfftxt"] = tempOutNames;
-               outputTypes["qual"] = tempOutNames;
+               string helpString = "";
+               helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";
+               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n";
+               helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";
+               helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";
+               helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";
+               helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n";
+               helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";
+               helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";
+               helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
+               helpString += "The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n";
+               helpString += "Example sffinfo(sff=mySffFile.sff, trim=F).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-vector<string> SffInfoCommand::getRequiredParameters(){        
-       try {
-               string Array[] =  {"sff", "sfftxt", "or"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "getRequiredParameters");
+               m->errorOut(e, "SffInfoCommand", "getHelpString");
                exit(1);
        }
 }
+
+
 //**********************************************************************************************************************
-vector<string> SffInfoCommand::getRequiredFiles(){     
+SffInfoCommand::SffInfoCommand(){      
        try {
-               vector<string> myArray;
-               return myArray;
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["flow"] = tempOutNames;
+               outputTypes["sfftxt"] = tempOutNames;
+               outputTypes["qfile"] = tempOutNames;
        }
        catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "getRequiredFiles");
+               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
                exit(1);
        }
 }
@@ -72,8 +85,7 @@ SffInfoCommand::SffInfoCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"sff","qfile","fasta","flow","trim","accnos","sfftxt","outputdir","inputdir", "outputdir"};
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
@@ -89,7 +101,7 @@ SffInfoCommand::SffInfoCommand(string option)  {
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["flow"] = tempOutNames;
                        outputTypes["sfftxt"] = tempOutNames;
-                       outputTypes["qual"] = tempOutNames;
+                       outputTypes["qfile"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -240,7 +252,12 @@ SffInfoCommand::SffInfoCommand(string option)  {
                                else if (sfftxtFilename == "not open") { sfftxtFilename = "";  }
                        }
                        
-                       if ((sfftxtFilename == "") && (filenames.size() == 0)) {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true; }
+                       if ((sfftxtFilename == "") && (filenames.size() == 0)) {  
+                               //if there is a current fasta file, use it
+                               string filename = m->getSFFFile(); 
+                               if (filename != "") { filenames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the sff parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }
+                       }
                }
        }
        catch(exception& e) {
@@ -248,32 +265,6 @@ SffInfoCommand::SffInfoCommand(string option)  {
                exit(1);
        }
 }
-//**********************************************************************************************************************
-
-void SffInfoCommand::help(){
-       try {
-               m->mothurOut("The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file..\n");
-               m->mothurOut("The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, and trim. sff is required. \n");
-               m->mothurOut("The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n");
-               m->mothurOut("The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n");
-               m->mothurOut("The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n");
-               m->mothurOut("The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=False. \n");
-               m->mothurOut("The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n");
-               m->mothurOut("If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n");
-               m->mothurOut("The trim parameter allows you to indicate if you would like a sequences and quality scores trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n");
-               m->mothurOut("The accnos parameter allows you to provide a accnos file containing the names of the sequences you would like extracted. You may enter multiple files by separating them by -'s. \n");
-               m->mothurOut("Example sffinfo(sff=mySffFile.sff, trim=F).\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SffInfoCommand", "help");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-
-SffInfoCommand::~SffInfoCommand(){}
-
 //**********************************************************************************************************************
 int SffInfoCommand::execute(){
        try {
@@ -300,6 +291,18 @@ int SffInfoCommand::execute(){
                
                if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
                
+               //set fasta file as new current fastafile
+               string current = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+               
+               itTypes = outputTypes.find("qfile");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
+               }       
+               
                //report output filenames
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -336,7 +339,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
                
                if (sfftxt) { m->openOutputFile(sfftxtFileName, outSfftxt); outSfftxt.setf(ios::fixed, ios::floatfield); outSfftxt.setf(ios::showpoint);  outputNames.push_back(sfftxtFileName);  outputTypes["sfftxt"].push_back(sfftxtFileName); }
                if (fasta)      { m->openOutputFile(outFastaFileName, outFasta);        outputNames.push_back(outFastaFileName); outputTypes["fasta"].push_back(outFastaFileName); }
-               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qual"].push_back(outQualFileName);  }
+               if (qual)       { m->openOutputFile(outQualFileName, outQual);          outputNames.push_back(outQualFileName); outputTypes["qfile"].push_back(outQualFileName);  }
                if (flow)       { m->openOutputFile(outFlowFileName, outFlow);          outputNames.push_back(outFlowFileName);  outFlow.setf(ios::fixed, ios::floatfield); outFlow.setf(ios::showpoint); outputTypes["flow"].push_back(outFlowFileName);  }
                
                ifstream in;
@@ -344,8 +347,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
                
                CommonHeader header; 
                readCommonHeader(in, header);
-               
+       
                int count = 0;
+               mycount = 0;
                
                //check magic number and version
                if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }
@@ -380,6 +384,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
                        }
                        
                        count++;
+                       mycount++;
                
                        //report progress
                        if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
@@ -613,32 +618,36 @@ int SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, i
 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {
        try {
                
-               string time = name.substr(0, 6);
-               unsigned int timeNum = m->fromBase36(time);
-                       
-               int q1 = timeNum / 60;
-               int sec = timeNum - 60 * q1;
-               int q2 = q1 / 60;
-               int minute = q1 - 60 * q2;
-               int q3 = q2 / 24;
-               int hr = q2 - 24 * q3;
-               int q4 = q3 / 32;
-               int day = q3 - 32 * q4;
-               int q5 = q4 / 13;
-               int mon = q4 - 13 * q5;
-               int year = 2000 + q5;
-               
-               timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
-               
-               region = name.substr(7, 2);
-               
-               string xyNum = name.substr(9);
-               unsigned int myXy = m->fromBase36(xyNum);
-               int x = myXy >> 12;
-               int y = myXy & 4095;
-               
-               xy = toString(x) + "_" + toString(y);
+               if (name.length() >= 6) {
+                       string time = name.substr(0, 6);
+                       unsigned int timeNum = m->fromBase36(time);
                        
+                       int q1 = timeNum / 60;
+                       int sec = timeNum - 60 * q1;
+                       int q2 = q1 / 60;
+                       int minute = q1 - 60 * q2;
+                       int q3 = q2 / 24;
+                       int hr = q2 - 24 * q3;
+                       int q4 = q3 / 32;
+                       int day = q3 - 32 * q4;
+                       int q5 = q4 / 13;
+                       int mon = q4 - 13 * q5;
+                       int year = 2000 + q5;
+               
+                       timestamp = toString(year) + "_" + toString(mon) + "_" + toString(day) + "_" + toString(hr) + "_" + toString(minute) + "_" + toString(sec);
+               }
+               
+               if (name.length() >= 9) {
+                       region = name.substr(7, 2);
+               
+                       string xyNum = name.substr(9);
+                       unsigned int myXy = m->fromBase36(xyNum);
+                       int x = myXy >> 12;
+                       int y = myXy & 4095;
+               
+                       xy = toString(x) + "_" + toString(y);
+               }
+               
                return 0;
        }
        catch(exception& e) {
@@ -772,6 +781,7 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade
                
                if (trim) {
                        if(header.clipQualRight < header.clipQualLeft){
+                               out << ">" << header.name << " xy=" << header.xy << endl;
                                out << "0\t0\t0\t0";
                        }
                        else if((header.clipQualRight != 0) && ((header.clipQualRight-header.clipQualLeft) >= 0)){