5 * Created by Pat Schloss on 5/30/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 SeqSummaryCommand::SeqSummaryCommand(string option){
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"fasta"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string,string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //check for required parameters
38 fastafile = validParameter.validFile(parameters, "fasta", true);
39 if (fastafile == "not open") { abort = true; }
40 else if (fastafile == "not found") { fastafile = ""; cout << "fasta is a required parameter for the summary.seqs command." << endl; abort = true; }
45 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49 cout << "An unknown error has occurred in the SeqSummaryCommand class function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53 //**********************************************************************************************************************
55 void SeqSummaryCommand::help(){
57 cout << "The summary.seqs command reads a fastafile and ...." << "\n";
58 cout << "The summary.seqs command parameter is fasta and it is required." << "\n";
59 cout << "The summary.seqs command should be in the following format: " << "\n";
60 cout << "summary.seqs(fasta=yourFastaFile) " << "\n";
61 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
64 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
68 cout << "An unknown error has occurred in the SeqSummaryCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
73 //***************************************************************************************************************
75 SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ }
77 //***************************************************************************************************************
79 int SeqSummaryCommand::execute(){
82 if (abort == true) { return 0; }
85 openInputFile(fastafile, inFASTA);
89 string summaryFile = fastafile + ".summary";
90 openOutputFile(summaryFile, outSummary);
92 vector<int> startPosition;
93 vector<int> endPosition;
94 vector<int> seqLength;
95 vector<int> ambigBases;
96 vector<int> longHomoPolymer;
98 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
100 while(!inFASTA.eof()){
101 Sequence current(inFASTA);
102 startPosition.push_back(current.getStartPos());
103 endPosition.push_back(current.getEndPos());
104 seqLength.push_back(current.getNumBases());
105 ambigBases.push_back(current.getAmbigBases());
106 longHomoPolymer.push_back(current.getLongHomoPolymer());
108 outSummary << current.getName() << '\t';
109 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
110 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
111 outSummary << current.getLongHomoPolymer() << endl;
118 sort(startPosition.begin(), startPosition.end());
119 sort(endPosition.begin(), endPosition.end());
120 sort(seqLength.begin(), seqLength.end());
121 sort(ambigBases.begin(), ambigBases.end());
122 sort(longHomoPolymer.begin(), longHomoPolymer.end());
124 int ptile0_25 = int(numSeqs * 0.025);
125 int ptile25 = int(numSeqs * 0.250);
126 int ptile50 = int(numSeqs * 0.500);
127 int ptile75 = int(numSeqs * 0.750);
128 int ptile97_5 = int(numSeqs * 0.975);
129 int ptile100 = numSeqs - 1;
132 cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
133 cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
134 cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
135 cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
136 cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
137 cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
138 cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
139 cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
140 cout << "# of Seqs:\t" << numSeqs << endl;
145 catch(exception& e) {
146 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
150 cout << "An unknown error has occurred in the SeqSummaryCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
156 //***************************************************************************************************************