]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.cpp
broke up globaldata and moved error checking and help into commands
[mothur.git] / seqsummarycommand.cpp
1 /*
2  *  seqcoordcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 5/30/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 SeqSummaryCommand::SeqSummaryCommand(string option){
16         try {
17                 globaldata = GlobalData::getInstance();
18                 abort = false;
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"fasta"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         parser = new OptionParser();
29                         parser->parse(option, parameters);  delete parser;
30                         
31                         ValidParameters* validParameter = new ValidParameters();
32                 
33                         //check to make sure all parameters are valid for command
34                         for (it = parameters.begin(); it != parameters.end(); it++) { 
35                                 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
36                         }
37                         
38                         //check for required parameters
39                         fastafile = validParameter->validFile(parameters, "fasta", true);
40                         if (fastafile == "not open") { abort = true; }
41                         else if (fastafile == "not found") { fastafile = ""; cout << "fasta is a required parameter for the summary.seqs command." << endl; abort = true;  }    
42                         else {  globaldata->setFastaFile(fastafile);  globaldata->setFormat("fasta");   }
43                         
44                         delete validParameter;
45                 }
46         }
47         catch(exception& e) {
48                 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51         catch(...) {
52                 cout << "An unknown error has occurred in the SeqSummaryCommand class function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53                 exit(1);
54         }       
55 }
56 //**********************************************************************************************************************
57
58 void SeqSummaryCommand::help(){
59         try {
60                 cout << "The summary.seqs command reads a fastafile and ...." << "\n";
61                 cout << "The summary.seqs command parameter is fasta and it is required." << "\n";
62                 cout << "The summary.seqs command should be in the following format: " << "\n";
63                 cout << "summary.seqs(fasta=yourFastaFile) " << "\n";
64                 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";       
65         }
66         catch(exception& e) {
67                 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
68                 exit(1);
69         }
70         catch(...) {
71                 cout << "An unknown error has occurred in the SeqSummaryCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72                 exit(1);
73         }       
74 }
75
76 //***************************************************************************************************************
77
78 SeqSummaryCommand::~SeqSummaryCommand(){        /*      do nothing      */      }
79
80 //***************************************************************************************************************
81
82 int SeqSummaryCommand::execute(){
83         try{
84                 
85                 if (abort == true) { return 0; }
86                 
87                 ifstream inFASTA;
88                 openInputFile(fastafile, inFASTA);
89                 int numSeqs = 0;
90
91                 ofstream outSummary;
92                 string summaryFile = fastafile + ".summary";
93                 openOutputFile(summaryFile, outSummary);
94                 
95                 vector<int> startPosition;
96                 vector<int> endPosition;
97                 vector<int> seqLength;
98                 vector<int> ambigBases;
99                 vector<int> longHomoPolymer;
100                 
101                 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
102
103                 while(!inFASTA.eof()){
104                         Sequence current(inFASTA);
105                         startPosition.push_back(current.getStartPos());
106                         endPosition.push_back(current.getEndPos());
107                         seqLength.push_back(current.getNumBases());
108                         ambigBases.push_back(current.getAmbigBases());
109                         longHomoPolymer.push_back(current.getLongHomoPolymer());
110
111                         outSummary << current.getName() << '\t';
112                         outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
113                         outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
114                         outSummary << current.getLongHomoPolymer() << endl;
115                         
116                         numSeqs++;
117                         gobble(inFASTA);
118                 }
119                 inFASTA.close();
120                 
121                 sort(startPosition.begin(), startPosition.end());
122                 sort(endPosition.begin(), endPosition.end());
123                 sort(seqLength.begin(), seqLength.end());
124                 sort(ambigBases.begin(), ambigBases.end());
125                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
126                 
127                 int ptile0_25   = int(numSeqs * 0.025);
128                 int ptile25             = int(numSeqs * 0.250);
129                 int ptile50             = int(numSeqs * 0.500);
130                 int ptile75             = int(numSeqs * 0.750);
131                 int ptile97_5   = int(numSeqs * 0.975);
132                 int ptile100    = numSeqs - 1;
133                 
134                 cout << endl;
135                 cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
136                 cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
137                 cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
138                 cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
139                 cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
140                 cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
141                 cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
142                 cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
143                 cout << "# of Seqs:\t" << numSeqs << endl;
144                 
145                 return 0;
146         }
147         catch(exception& e) {
148                 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
149                 exit(1);
150         }
151         catch(...) {
152                 cout << "An unknown error has occurred in the SeqSummaryCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
153                 exit(1);
154         }
155         
156 }
157
158 //***************************************************************************************************************
159
160