5 * Created by Pat Schloss on 5/30/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 SeqSummaryCommand::SeqSummaryCommand(string option){
17 globaldata = GlobalData::getInstance();
20 //allow user to run help
21 if(option == "help") { help(); abort = true; }
24 //valid paramters for this command
25 string Array[] = {"fasta"};
26 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
28 parser = new OptionParser();
29 parser->parse(option, parameters); delete parser;
31 ValidParameters* validParameter = new ValidParameters();
33 //check to make sure all parameters are valid for command
34 for (it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //check for required parameters
39 fastafile = validParameter->validFile(parameters, "fasta", true);
40 if (fastafile == "not open") { abort = true; }
41 else if (fastafile == "not found") { fastafile = ""; cout << "fasta is a required parameter for the summary.seqs command." << endl; abort = true; }
42 else { globaldata->setFastaFile(fastafile); globaldata->setFormat("fasta"); }
44 delete validParameter;
48 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52 cout << "An unknown error has occurred in the SeqSummaryCommand class function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
56 //**********************************************************************************************************************
58 void SeqSummaryCommand::help(){
60 cout << "The summary.seqs command reads a fastafile and ...." << "\n";
61 cout << "The summary.seqs command parameter is fasta and it is required." << "\n";
62 cout << "The summary.seqs command should be in the following format: " << "\n";
63 cout << "summary.seqs(fasta=yourFastaFile) " << "\n";
64 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";
67 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
71 cout << "An unknown error has occurred in the SeqSummaryCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
76 //***************************************************************************************************************
78 SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ }
80 //***************************************************************************************************************
82 int SeqSummaryCommand::execute(){
85 if (abort == true) { return 0; }
88 openInputFile(fastafile, inFASTA);
92 string summaryFile = fastafile + ".summary";
93 openOutputFile(summaryFile, outSummary);
95 vector<int> startPosition;
96 vector<int> endPosition;
97 vector<int> seqLength;
98 vector<int> ambigBases;
99 vector<int> longHomoPolymer;
101 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
103 while(!inFASTA.eof()){
104 Sequence current(inFASTA);
105 startPosition.push_back(current.getStartPos());
106 endPosition.push_back(current.getEndPos());
107 seqLength.push_back(current.getNumBases());
108 ambigBases.push_back(current.getAmbigBases());
109 longHomoPolymer.push_back(current.getLongHomoPolymer());
111 outSummary << current.getName() << '\t';
112 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
113 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
114 outSummary << current.getLongHomoPolymer() << endl;
121 sort(startPosition.begin(), startPosition.end());
122 sort(endPosition.begin(), endPosition.end());
123 sort(seqLength.begin(), seqLength.end());
124 sort(ambigBases.begin(), ambigBases.end());
125 sort(longHomoPolymer.begin(), longHomoPolymer.end());
127 int ptile0_25 = int(numSeqs * 0.025);
128 int ptile25 = int(numSeqs * 0.250);
129 int ptile50 = int(numSeqs * 0.500);
130 int ptile75 = int(numSeqs * 0.750);
131 int ptile97_5 = int(numSeqs * 0.975);
132 int ptile100 = numSeqs - 1;
135 cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
136 cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
137 cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
138 cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
139 cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
140 cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
141 cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
142 cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
143 cout << "# of Seqs:\t" << numSeqs << endl;
147 catch(exception& e) {
148 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
152 cout << "An unknown error has occurred in the SeqSummaryCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
158 //***************************************************************************************************************