]> git.donarmstrong.com Git - mothur.git/blobdiff - seqsummarycommand.cpp
added name parameter to phylotype command
[mothur.git] / seqsummarycommand.cpp
index 18eddec2469cc6ea34ddc7663620f6ead6021006..4514e01713b5d1e3e51a6b7c60c4ed4155a0e5ad 100644 (file)
@@ -8,22 +8,79 @@
  */
 
 #include "seqsummarycommand.h"
+#include "sequence.hpp"
 
 //***************************************************************************************************************
 
-SeqSummaryCommand::SeqSummaryCommand(){
+SeqSummaryCommand::SeqSummaryCommand(string option)  {
        try {
-               globaldata = GlobalData::getInstance();
-               if(globaldata->getFastaFile() == "")            {       cout << "you need to at least enter a fasta file name" << endl; }
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                       }
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the summary.seqs command."); m->mothurOutEndLine(); abort = true;  }       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+               }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+//**********************************************************************************************************************
+
+void SeqSummaryCommand::help(){
+       try {
+               m->mothurOut("The summary.seqs command reads a fastafile and ....\n");
+               m->mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
+               m->mothurOut("The summary.seqs command should be in the following format: \n");
+               m->mothurOut("summary.seqs(fasta=yourFastaFile) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqSummaryCommand", "help");
                exit(1);
-       }       
+       }
 }
 
 //***************************************************************************************************************
@@ -34,13 +91,15 @@ SeqSummaryCommand::~SeqSummaryCommand(){    /*      do nothing      */      }
 
 int SeqSummaryCommand::execute(){
        try{
-
+               
+               if (abort == true) { return 0; }
+               
                ifstream inFASTA;
-               openInputFile(globaldata->getFastaFile(), inFASTA);
+               openInputFile(fastafile, inFASTA);
                int numSeqs = 0;
 
                ofstream outSummary;
-               string summaryFile = globaldata->getFastaFile() + ".summary";
+               string summaryFile = outputDir + getSimpleName(fastafile) + ".summary";
                openOutputFile(summaryFile, outSummary);
                
                vector<int> startPosition;
@@ -52,19 +111,23 @@ int SeqSummaryCommand::execute(){
                outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
 
                while(!inFASTA.eof()){
-                       Sequence current(inFASTA);
-                       startPosition.push_back(current.getStartPos());
-                       endPosition.push_back(current.getEndPos());
-                       seqLength.push_back(current.getNumBases());
-                       ambigBases.push_back(current.getAmbigBases());
-                       longHomoPolymer.push_back(current.getLongHomoPolymer());
-
-                       outSummary << current.getName() << '\t';
-                       outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
-                       outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
-                       outSummary << current.getLongHomoPolymer() << endl;
+                       if (m->control_pressed) { inFASTA.close(); outSummary.close(); remove(summaryFile.c_str()); return 0; }
                        
-                       numSeqs++;
+                       Sequence current(inFASTA);
+                       if (current.getName() != "") {
+                               startPosition.push_back(current.getStartPos());
+                               endPosition.push_back(current.getEndPos());
+                               seqLength.push_back(current.getNumBases());
+                               ambigBases.push_back(current.getAmbigBases());
+                               longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               
+                               outSummary << current.getName() << '\t';
+                               outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+                               outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+                               outSummary << current.getLongHomoPolymer() << endl;
+                               
+                               numSeqs++;
+                       }
                        gobble(inFASTA);
                }
                inFASTA.close();
@@ -82,28 +145,38 @@ int SeqSummaryCommand::execute(){
                int ptile97_5   = int(numSeqs * 0.975);
                int ptile100    = numSeqs - 1;
                
-               cout << endl;
-               cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
-               cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
-               cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
-               cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
-               cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
-               cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
-               cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
-               cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
-               cout << "# of Seqs:\t" << numSeqs << endl;
+               //to compensate for blank sequences that would result in startPosition and endPostion equalling -1
+               if (startPosition[0] == -1) {  startPosition[0] = 0;    }
+               if (endPosition[0] == -1)       {  endPosition[0] = 0;          }
                
+               if (m->control_pressed) {  outSummary.close(); remove(summaryFile.c_str()); return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); m->mothurOutEndLine();
+               m->mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); m->mothurOutEndLine();
+               m->mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); m->mothurOutEndLine();
+               m->mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); m->mothurOutEndLine();
+               m->mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); m->mothurOutEndLine();
+               m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); m->mothurOutEndLine();
+               m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); m->mothurOutEndLine();
+               m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); m->mothurOutEndLine();
+               m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine();
+               
+               outSummary.close();
+               
+               if (m->control_pressed) {  remove(summaryFile.c_str()); return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+               m->mothurOut(summaryFile); m->mothurOutEndLine();       
+               m->mothurOutEndLine();
+
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "SeqSummaryCommand", "execute");
                exit(1);
        }
-       
 }
 
 //***************************************************************************************************************