]> git.donarmstrong.com Git - mothur.git/blobdiff - seqsummarycommand.cpp
added checks for ^C to quit command instead of program
[mothur.git] / seqsummarycommand.cpp
index 2bffecb30c1e3c7af065ffee6add227e70e6124c..4514e01713b5d1e3e51a6b7c60c4ed4155a0e5ad 100644 (file)
  */
 
 #include "seqsummarycommand.h"
+#include "sequence.hpp"
 
 //***************************************************************************************************************
 
-SeqSummaryCommand::SeqSummaryCommand(){
+SeqSummaryCommand::SeqSummaryCommand(string option)  {
        try {
-               globaldata = GlobalData::getInstance();
+               abort = false;
                
-               if(globaldata->getFastaFile() != "")            {       readSeqs = new ReadFasta(globaldata->inputFileName);    }
-               else if(globaldata->getNexusFile() != "")       {       readSeqs = new ReadNexus(globaldata->inputFileName);    }
-               else if(globaldata->getClustalFile() != "") {   readSeqs = new ReadClustal(globaldata->inputFileName);  }
-               else if(globaldata->getPhylipFile() != "")      {       readSeqs = new ReadPhylip(globaldata->inputFileName);   }
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
                
-               readSeqs->read();
-               db = readSeqs->getDB();
-               numSeqs = db->size();
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                       }
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the summary.seqs command."); m->mothurOutEndLine(); abort = true;  }       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it  
+                       }
+
+               }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+//**********************************************************************************************************************
+
+void SeqSummaryCommand::help(){
+       try {
+               m->mothurOut("The summary.seqs command reads a fastafile and ....\n");
+               m->mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
+               m->mothurOut("The summary.seqs command should be in the following format: \n");
+               m->mothurOut("summary.seqs(fasta=yourFastaFile) \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SeqSummaryCommand", "help");
                exit(1);
-       }       
+       }
 }
 
 //***************************************************************************************************************
 
-SeqSummaryCommand::~SeqSummaryCommand(){
-       delete readSeqs;
-}
+SeqSummaryCommand::~SeqSummaryCommand(){       /*      do nothing      */      }
 
 //***************************************************************************************************************
 
 int SeqSummaryCommand::execute(){
        try{
                
-               ofstream outfile;
-               string summaryFile = getRootName(globaldata->inputFileName) + "summary";
-               openOutputFile(summaryFile, outfile);
+               if (abort == true) { return 0; }
+               
+               ifstream inFASTA;
+               openInputFile(fastafile, inFASTA);
+               int numSeqs = 0;
 
-               vector<int> startPosition(numSeqs, 0);
-               vector<int> endPosition(numSeqs, 0);
-               vector<int> seqLength(numSeqs, 0);
-               vector<int> ambigBases(numSeqs, 0);
-               vector<int> longHomoPolymer(numSeqs, 0);
+               ofstream outSummary;
+               string summaryFile = outputDir + getSimpleName(fastafile) + ".summary";
+               openOutputFile(summaryFile, outSummary);
                
-               if(db->get(0).getIsAligned() == 1){
-                       outfile << "seqname\tstart\tend\tlength\tambiguities\tlonghomopolymer" << endl;                 
-                       for(int i = 0; i < numSeqs; i++) {
-                               Sequence current = db->get(i);
-                               startPosition[i] = current.getStartPos();
-                               endPosition[i] = current.getEndPos();
-                               seqLength[i] = current.getNumBases();
-                               ambigBases[i] = current.getAmbigBases();
-                               longHomoPolymer[i] = current.getLongHomoPolymer();
-                               outfile << current.getName() << '\t' << startPosition[i] << '\t' << endPosition[i] << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
-                       }
-               }
-               else{
-                       outfile << "seqname\tlength\tambiguities\tlonghomopolymer" << endl;
-                       for(int i=0;i<numSeqs;i++){
-                               Sequence current = db->get(i);
-                               seqLength[i] = current.getNumBases();
-                               ambigBases[i] = current.getAmbigBases();
-                               longHomoPolymer[i] = current.getLongHomoPolymer();
-                               outfile << current.getName() << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
+               vector<int> startPosition;
+               vector<int> endPosition;
+               vector<int> seqLength;
+               vector<int> ambigBases;
+               vector<int> longHomoPolymer;
+               
+               outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
+
+               while(!inFASTA.eof()){
+                       if (m->control_pressed) { inFASTA.close(); outSummary.close(); remove(summaryFile.c_str()); return 0; }
+                       
+                       Sequence current(inFASTA);
+                       if (current.getName() != "") {
+                               startPosition.push_back(current.getStartPos());
+                               endPosition.push_back(current.getEndPos());
+                               seqLength.push_back(current.getNumBases());
+                               ambigBases.push_back(current.getAmbigBases());
+                               longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               
+                               outSummary << current.getName() << '\t';
+                               outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+                               outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+                               outSummary << current.getLongHomoPolymer() << endl;
+                               
+                               numSeqs++;
                        }
+                       gobble(inFASTA);
                }
+               inFASTA.close();
                
+               sort(startPosition.begin(), startPosition.end());
+               sort(endPosition.begin(), endPosition.end());
                sort(seqLength.begin(), seqLength.end());
                sort(ambigBases.begin(), ambigBases.end());
                sort(longHomoPolymer.begin(), longHomoPolymer.end());
                
-               int median                      = int(numSeqs * 0.500);
-               int lowestPtile         = int(numSeqs * 0.025);
-               int lowPtile            = int(numSeqs * 0.250);
-               int highPtile           = int(numSeqs * 0.750);
-               int highestPtile        = int(numSeqs * 0.975);
-               int max                         = numSeqs - 1;
+               int ptile0_25   = int(numSeqs * 0.025);
+               int ptile25             = int(numSeqs * 0.250);
+               int ptile50             = int(numSeqs * 0.500);
+               int ptile75             = int(numSeqs * 0.750);
+               int ptile97_5   = int(numSeqs * 0.975);
+               int ptile100    = numSeqs - 1;
                
-               cout << endl;
-               if(db->get(0).getIsAligned() == 1){
-                       sort(startPosition.begin(), startPosition.end());
-                       sort(endPosition.begin(), endPosition.end());
-                                       
-                       cout << "\t\tStart\tEnd\tLength\tN's\tPolymer" << endl;
-                       cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
-                       cout << "2.5%-tile:\t" << startPosition[lowestPtile] << '\t' << endPosition[lowestPtile] << '\t' << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
-                       cout << "25%-tile:\t" << startPosition[lowPtile] << '\t' << endPosition[lowPtile] << '\t' << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
-                       cout << "Median: \t" << startPosition[median] << '\t' << endPosition[median] << '\t' << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
-                       cout << "75%-tile:\t" << startPosition[highPtile] << '\t' << endPosition[highPtile] << '\t' << seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
-                       cout << "97.5%-tile:\t" << startPosition[highestPtile] << '\t' << endPosition[highestPtile] << '\t' << seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
-                       cout << "Maximum:\t" << startPosition[max] << '\t' << endPosition[max] << '\t' << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
-               }
-               else{
-                       cout << "\t\tLength\tN's\tPolymer" << endl;
-                       cout << "Minimum:\t" << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
-                       cout << "2.5%-tile:\t" << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
-                       cout << "25%-tile:\t" << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
-                       cout << "Median: \t" << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
-                       cout << "75%-tile:\t"<< seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
-                       cout << "97.5%-tile:\t"<< seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
-                       cout << "Maximum:\t" << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
-               }
-               cout << "# of Seqs:\t" << numSeqs << endl;
+               //to compensate for blank sequences that would result in startPosition and endPostion equalling -1
+               if (startPosition[0] == -1) {  startPosition[0] = 0;    }
+               if (endPosition[0] == -1)       {  endPosition[0] = 0;          }
+               
+               if (m->control_pressed) {  outSummary.close(); remove(summaryFile.c_str()); return 0; }
                
+               m->mothurOutEndLine();
+               m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); m->mothurOutEndLine();
+               m->mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); m->mothurOutEndLine();
+               m->mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); m->mothurOutEndLine();
+               m->mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); m->mothurOutEndLine();
+               m->mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); m->mothurOutEndLine();
+               m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); m->mothurOutEndLine();
+               m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); m->mothurOutEndLine();
+               m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); m->mothurOutEndLine();
+               m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine();
+               
+               outSummary.close();
+               
+               if (m->control_pressed) {  remove(summaryFile.c_str()); return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+               m->mothurOut(summaryFile); m->mothurOutEndLine();       
+               m->mothurOutEndLine();
+
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "SeqSummaryCommand", "execute");
                exit(1);
        }
-       
 }
 
 //***************************************************************************************************************