5 * Created by Pat Schloss on 5/30/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 SeqSummaryCommand::SeqSummaryCommand(){
17 globaldata = GlobalData::getInstance();
18 if(globaldata->getFastaFile() == "") { cout << "you need to at least enter a fasta file name" << endl; }
21 cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
25 cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
30 //***************************************************************************************************************
32 SeqSummaryCommand::~SeqSummaryCommand(){ /* do nothing */ }
34 //***************************************************************************************************************
36 int SeqSummaryCommand::execute(){
40 openInputFile(globaldata->getFastaFile(), inFASTA);
44 string summaryFile = globaldata->getFastaFile() + ".summary";
45 openOutputFile(summaryFile, outSummary);
47 vector<int> startPosition;
48 vector<int> endPosition;
49 vector<int> seqLength;
50 vector<int> ambigBases;
51 vector<int> longHomoPolymer;
53 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
55 while(!inFASTA.eof()){
56 Sequence current(inFASTA);
57 startPosition.push_back(current.getStartPos());
58 endPosition.push_back(current.getEndPos());
59 seqLength.push_back(current.getNumBases());
60 ambigBases.push_back(current.getAmbigBases());
61 longHomoPolymer.push_back(current.getLongHomoPolymer());
63 outSummary << current.getName() << '\t';
64 outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
65 outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
66 outSummary << current.getLongHomoPolymer() << endl;
73 sort(startPosition.begin(), startPosition.end());
74 sort(endPosition.begin(), endPosition.end());
75 sort(seqLength.begin(), seqLength.end());
76 sort(ambigBases.begin(), ambigBases.end());
77 sort(longHomoPolymer.begin(), longHomoPolymer.end());
79 int ptile0_25 = int(numSeqs * 0.025);
80 int ptile25 = int(numSeqs * 0.250);
81 int ptile50 = int(numSeqs * 0.500);
82 int ptile75 = int(numSeqs * 0.750);
83 int ptile97_5 = int(numSeqs * 0.975);
84 int ptile100 = numSeqs - 1;
87 cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
88 cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
89 cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
90 cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
91 cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
92 cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
93 cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
94 cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
95 cout << "# of Seqs:\t" << numSeqs << endl;
100 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
104 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
110 //***************************************************************************************************************