5 * Created by Pat Schloss on 5/30/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 SeqSummaryCommand::SeqSummaryCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"fasta","outputdir","inputdir"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 OptionParser parser(option);
28 map<string,string> parameters = parser.getParameters();
30 ValidParameters validParameter;
31 map<string,string>::iterator it;
33 //check to make sure all parameters are valid for command
34 for (it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //if the user changes the input directory command factory will send this info to us in the output parameter
39 string inputDir = validParameter.validFile(parameters, "inputdir", false);
40 if (inputDir == "not found"){ inputDir = ""; }
43 it = parameters.find("fasta");
44 //user has given a template file
45 if(it != parameters.end()){
46 path = hasPath(it->second);
47 //if the user has not given a path then, add inputdir. else leave path alone.
48 if (path == "") { parameters["fasta"] = inputDir + it->second; }
52 //check for required parameters
53 fastafile = validParameter.validFile(parameters, "fasta", true);
54 if (fastafile == "not open") { abort = true; }
55 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the summary.seqs command."); m->mothurOutEndLine(); abort = true; }
57 //if the user changes the output directory command factory will send this info to us in the output parameter
58 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
60 outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
66 m->errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
70 //**********************************************************************************************************************
72 void SeqSummaryCommand::help(){
74 m->mothurOut("The summary.seqs command reads a fastafile and ....\n");
75 m->mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
76 m->mothurOut("The summary.seqs command should be in the following format: \n");
77 m->mothurOut("summary.seqs(fasta=yourFastaFile) \n");
78 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
81 m->errorOut(e, "SeqSummaryCommand", "help");
86 //***************************************************************************************************************
88 SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ }
90 //***************************************************************************************************************
92 int SeqSummaryCommand::execute(){
95 if (abort == true) { return 0; }
98 openInputFile(fastafile, inFASTA);
102 string summaryFile = outputDir + getSimpleName(fastafile) + ".summary";
103 openOutputFile(summaryFile, outSummary);
105 vector<int> startPosition;
106 vector<int> endPosition;
107 vector<int> seqLength;
108 vector<int> ambigBases;
109 vector<int> longHomoPolymer;
111 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
113 while(!inFASTA.eof()){
114 if (m->control_pressed) { inFASTA.close(); outSummary.close(); remove(summaryFile.c_str()); return 0; }
116 Sequence current(inFASTA);
117 if (current.getName() != "") {
118 startPosition.push_back(current.getStartPos());
119 endPosition.push_back(current.getEndPos());
120 seqLength.push_back(current.getNumBases());
121 ambigBases.push_back(current.getAmbigBases());
122 longHomoPolymer.push_back(current.getLongHomoPolymer());
124 outSummary << current.getName() << '\t';
125 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
126 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
127 outSummary << current.getLongHomoPolymer() << endl;
135 sort(startPosition.begin(), startPosition.end());
136 sort(endPosition.begin(), endPosition.end());
137 sort(seqLength.begin(), seqLength.end());
138 sort(ambigBases.begin(), ambigBases.end());
139 sort(longHomoPolymer.begin(), longHomoPolymer.end());
141 int ptile0_25 = int(numSeqs * 0.025);
142 int ptile25 = int(numSeqs * 0.250);
143 int ptile50 = int(numSeqs * 0.500);
144 int ptile75 = int(numSeqs * 0.750);
145 int ptile97_5 = int(numSeqs * 0.975);
146 int ptile100 = numSeqs - 1;
148 //to compensate for blank sequences that would result in startPosition and endPostion equalling -1
149 if (startPosition[0] == -1) { startPosition[0] = 0; }
150 if (endPosition[0] == -1) { endPosition[0] = 0; }
152 if (m->control_pressed) { outSummary.close(); remove(summaryFile.c_str()); return 0; }
154 m->mothurOutEndLine();
155 m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); m->mothurOutEndLine();
156 m->mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); m->mothurOutEndLine();
157 m->mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); m->mothurOutEndLine();
158 m->mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); m->mothurOutEndLine();
159 m->mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); m->mothurOutEndLine();
160 m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); m->mothurOutEndLine();
161 m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); m->mothurOutEndLine();
162 m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); m->mothurOutEndLine();
163 m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine();
167 if (m->control_pressed) { remove(summaryFile.c_str()); return 0; }
169 m->mothurOutEndLine();
170 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
171 m->mothurOut(summaryFile); m->mothurOutEndLine();
172 m->mothurOutEndLine();
176 catch(exception& e) {
177 m->errorOut(e, "SeqSummaryCommand", "execute");
182 //***************************************************************************************************************