*/
#include "seqsummarycommand.h"
+#include "sequence.hpp"
//***************************************************************************************************************
-SeqSummaryCommand::SeqSummaryCommand(){
+SeqSummaryCommand::SeqSummaryCommand(string option){
try {
- globaldata = GlobalData::getInstance();
+ abort = false;
- if(globaldata->getFastaFile() != "") { readSeqs = new ReadFasta(globaldata->inputFileName); }
- else if(globaldata->getNexusFile() != "") { readSeqs = new ReadNexus(globaldata->inputFileName); }
- else if(globaldata->getClustalFile() != "") { readSeqs = new ReadClustal(globaldata->inputFileName); }
- else if(globaldata->getPhylipFile() != "") { readSeqs = new ReadPhylip(globaldata->inputFileName); }
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
- readSeqs->read();
- db = readSeqs->getDB();
- numSeqs = db->size();
+ else {
+ //valid paramters for this command
+ string Array[] = {"fasta"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //check for required parameters
+ fastafile = validParameter.validFile(parameters, "fasta", true);
+ if (fastafile == "not open") { abort = true; }
+ else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the summary.seqs command."); mothurOutEndLine(); abort = true; }
+
+ }
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
exit(1);
}
- catch(...) {
- cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+}
+//**********************************************************************************************************************
+
+void SeqSummaryCommand::help(){
+ try {
+ mothurOut("The summary.seqs command reads a fastafile and ....\n");
+ mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
+ mothurOut("The summary.seqs command should be in the following format: \n");
+ mothurOut("summary.seqs(fasta=yourFastaFile) \n");
+ mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
+ }
+ catch(exception& e) {
+ errorOut(e, "SeqSummaryCommand", "help");
exit(1);
- }
+ }
}
//***************************************************************************************************************
-SeqSummaryCommand::~SeqSummaryCommand(){
-}
+SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ }
//***************************************************************************************************************
int SeqSummaryCommand::execute(){
try{
- ofstream outfile;
- string summaryFile = getRootName(globaldata->inputFileName) + "summary";
- openOutputFile(summaryFile, outfile);
+ if (abort == true) { return 0; }
+
+ ifstream inFASTA;
+ openInputFile(fastafile, inFASTA);
+ int numSeqs = 0;
- vector<int> startPosition(numSeqs, 0);
- vector<int> endPosition(numSeqs, 0);
- vector<int> seqLength(numSeqs, 0);
- vector<int> ambigBases(numSeqs, 0);
- vector<int> longHomoPolymer(numSeqs, 0);
+ ofstream outSummary;
+ string summaryFile = fastafile + ".summary";
+ openOutputFile(summaryFile, outSummary);
- if(db->get(0).getIsAligned() == 1){
- outfile << "seqname\tstart\tend\tlength\tambiguities\tlonghomopolymer" << endl;
- for(int i = 0; i < numSeqs; i++) {
- Sequence current = db->get(i);
- startPosition[i] = current.getStartPos();
- endPosition[i] = current.getEndPos();
- seqLength[i] = current.getNumBases();
- ambigBases[i] = current.getAmbigBases();
- longHomoPolymer[i] = current.getLongHomoPolymer();
- outfile << current.getName() << '\t' << startPosition[i] << '\t' << endPosition[i] << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
- }
- }
- else{
- outfile << "seqname\tlength\tambiguities\tlonghomopolymer" << endl;
- for(int i=0;i<numSeqs;i++){
- Sequence current = db->get(i);
- seqLength[i] = current.getNumBases();
- ambigBases[i] = current.getAmbigBases();
- longHomoPolymer[i] = current.getLongHomoPolymer();
- outfile << current.getName() << '\t' << seqLength[i] << '\t' << ambigBases[i] << '\t' << longHomoPolymer[i] << endl;
- }
+ vector<int> startPosition;
+ vector<int> endPosition;
+ vector<int> seqLength;
+ vector<int> ambigBases;
+ vector<int> longHomoPolymer;
+
+ outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
+
+ while(!inFASTA.eof()){
+ Sequence current(inFASTA);
+ startPosition.push_back(current.getStartPos());
+ endPosition.push_back(current.getEndPos());
+ seqLength.push_back(current.getNumBases());
+ ambigBases.push_back(current.getAmbigBases());
+ longHomoPolymer.push_back(current.getLongHomoPolymer());
+
+ outSummary << current.getName() << '\t';
+ outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+ outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+ outSummary << current.getLongHomoPolymer() << endl;
+
+ numSeqs++;
+ gobble(inFASTA);
}
+ inFASTA.close();
+ sort(startPosition.begin(), startPosition.end());
+ sort(endPosition.begin(), endPosition.end());
sort(seqLength.begin(), seqLength.end());
sort(ambigBases.begin(), ambigBases.end());
sort(longHomoPolymer.begin(), longHomoPolymer.end());
- int median = int(numSeqs * 0.500);
- int lowestPtile = int(numSeqs * 0.025);
- int lowPtile = int(numSeqs * 0.250);
- int highPtile = int(numSeqs * 0.750);
- int highestPtile = int(numSeqs * 0.975);
- int max = numSeqs - 1;
+ int ptile0_25 = int(numSeqs * 0.025);
+ int ptile25 = int(numSeqs * 0.250);
+ int ptile50 = int(numSeqs * 0.500);
+ int ptile75 = int(numSeqs * 0.750);
+ int ptile97_5 = int(numSeqs * 0.975);
+ int ptile100 = numSeqs - 1;
- cout << endl;
- if(db->get(0).getIsAligned() == 1){
- sort(startPosition.begin(), startPosition.end());
- sort(endPosition.begin(), endPosition.end());
-
- cout << "\t\tStart\tEnd\tLength\tN's\tPolymer" << endl;
- cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
- cout << "2.5%-tile:\t" << startPosition[lowestPtile] << '\t' << endPosition[lowestPtile] << '\t' << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
- cout << "25%-tile:\t" << startPosition[lowPtile] << '\t' << endPosition[lowPtile] << '\t' << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
- cout << "Median: \t" << startPosition[median] << '\t' << endPosition[median] << '\t' << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
- cout << "75%-tile:\t" << startPosition[highPtile] << '\t' << endPosition[highPtile] << '\t' << seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
- cout << "97.5%-tile:\t" << startPosition[highestPtile] << '\t' << endPosition[highestPtile] << '\t' << seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
- cout << "Maximum:\t" << startPosition[max] << '\t' << endPosition[max] << '\t' << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
- }
- else{
- cout << "\t\tLength\tN's\tPolymer" << endl;
- cout << "Minimum:\t" << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
- cout << "2.5%-tile:\t" << seqLength[lowestPtile] << '\t' << ambigBases[lowestPtile] << '\t' << longHomoPolymer[lowestPtile] << endl;
- cout << "25%-tile:\t" << seqLength[lowPtile] << '\t' << ambigBases[lowPtile] << '\t' << longHomoPolymer[lowPtile] << endl;
- cout << "Median: \t" << seqLength[median] << '\t' << ambigBases[median] << '\t' << longHomoPolymer[median] << endl;
- cout << "75%-tile:\t"<< seqLength[highPtile] << '\t' << ambigBases[highPtile] << '\t' << longHomoPolymer[highPtile] << endl;
- cout << "97.5%-tile:\t"<< seqLength[highestPtile] << '\t' << ambigBases[highestPtile] << '\t' << longHomoPolymer[highestPtile] << endl;
- cout << "Maximum:\t" << seqLength[max] << '\t' << ambigBases[max] << '\t' << longHomoPolymer[max] << endl;
- }
- cout << "# of Seqs:\t" << numSeqs << endl;
+ mothurOutEndLine();
+ mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); mothurOutEndLine();
+ mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
+ mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
+ mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
+ mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
+ mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
+ mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
+ mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
+ mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
+ outSummary.close();
return 0;
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ errorOut(e, "SeqSummaryCommand", "execute");
exit(1);
}
-
}
//***************************************************************************************************************