5 * Created by Pat Schloss on 5/30/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 SeqSummaryCommand::SeqSummaryCommand(string option){
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"fasta"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string,string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //check for required parameters
38 fastafile = validParameter.validFile(parameters, "fasta", true);
39 if (fastafile == "not open") { abort = true; }
40 else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the summary.seqs command."); mothurOutEndLine(); abort = true; }
45 errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
49 //**********************************************************************************************************************
51 void SeqSummaryCommand::help(){
53 mothurOut("The summary.seqs command reads a fastafile and ....\n");
54 mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
55 mothurOut("The summary.seqs command should be in the following format: \n");
56 mothurOut("summary.seqs(fasta=yourFastaFile) \n");
57 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
60 errorOut(e, "SeqSummaryCommand", "help");
65 //***************************************************************************************************************
67 SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ }
69 //***************************************************************************************************************
71 int SeqSummaryCommand::execute(){
74 if (abort == true) { return 0; }
77 openInputFile(fastafile, inFASTA);
81 string summaryFile = fastafile + ".summary";
82 openOutputFile(summaryFile, outSummary);
84 vector<int> startPosition;
85 vector<int> endPosition;
86 vector<int> seqLength;
87 vector<int> ambigBases;
88 vector<int> longHomoPolymer;
90 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
92 while(!inFASTA.eof()){
93 Sequence current(inFASTA);
94 startPosition.push_back(current.getStartPos());
95 endPosition.push_back(current.getEndPos());
96 seqLength.push_back(current.getNumBases());
97 ambigBases.push_back(current.getAmbigBases());
98 longHomoPolymer.push_back(current.getLongHomoPolymer());
100 outSummary << current.getName() << '\t';
101 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
102 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
103 outSummary << current.getLongHomoPolymer() << endl;
110 sort(startPosition.begin(), startPosition.end());
111 sort(endPosition.begin(), endPosition.end());
112 sort(seqLength.begin(), seqLength.end());
113 sort(ambigBases.begin(), ambigBases.end());
114 sort(longHomoPolymer.begin(), longHomoPolymer.end());
116 int ptile0_25 = int(numSeqs * 0.025);
117 int ptile25 = int(numSeqs * 0.250);
118 int ptile50 = int(numSeqs * 0.500);
119 int ptile75 = int(numSeqs * 0.750);
120 int ptile97_5 = int(numSeqs * 0.975);
121 int ptile100 = numSeqs - 1;
124 mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); mothurOutEndLine();
125 mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
126 mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
127 mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
128 mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
129 mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
130 mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
131 mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
132 mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
137 catch(exception& e) {
138 errorOut(e, "SeqSummaryCommand", "execute");
143 //***************************************************************************************************************