]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.cpp
fixed some bugs
[mothur.git] / seqsummarycommand.cpp
1 /*
2  *  seqcoordcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 5/30/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 SeqSummaryCommand::SeqSummaryCommand(string option){
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //check for required parameters
38                         fastafile = validParameter.validFile(parameters, "fasta", true);
39                         if (fastafile == "not open") { abort = true; }
40                         else if (fastafile == "not found") { fastafile = ""; cout << "fasta is a required parameter for the summary.seqs command." << endl; abort = true;  }    
41                         
42                 }
43         }
44         catch(exception& e) {
45                 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
46                 exit(1);
47         }
48         catch(...) {
49                 cout << "An unknown error has occurred in the SeqSummaryCommand class function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
50                 exit(1);
51         }       
52 }
53 //**********************************************************************************************************************
54
55 void SeqSummaryCommand::help(){
56         try {
57                 cout << "The summary.seqs command reads a fastafile and ...." << "\n";
58                 cout << "The summary.seqs command parameter is fasta and it is required." << "\n";
59                 cout << "The summary.seqs command should be in the following format: " << "\n";
60                 cout << "summary.seqs(fasta=yourFastaFile) " << "\n";
61                 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile)." << "\n" << "\n";       
62         }
63         catch(exception& e) {
64                 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
65                 exit(1);
66         }
67         catch(...) {
68                 cout << "An unknown error has occurred in the SeqSummaryCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69                 exit(1);
70         }       
71 }
72
73 //***************************************************************************************************************
74
75 SeqSummaryCommand::~SeqSummaryCommand(){        /*      do nothing      */      }
76
77 //***************************************************************************************************************
78
79 int SeqSummaryCommand::execute(){
80         try{
81                 
82                 if (abort == true) { return 0; }
83                 
84                 ifstream inFASTA;
85                 openInputFile(fastafile, inFASTA);
86                 int numSeqs = 0;
87
88                 ofstream outSummary;
89                 string summaryFile = fastafile + ".summary";
90                 openOutputFile(summaryFile, outSummary);
91                 
92                 vector<int> startPosition;
93                 vector<int> endPosition;
94                 vector<int> seqLength;
95                 vector<int> ambigBases;
96                 vector<int> longHomoPolymer;
97                 
98                 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
99
100                 while(!inFASTA.eof()){
101                         Sequence current(inFASTA);
102                         startPosition.push_back(current.getStartPos());
103                         endPosition.push_back(current.getEndPos());
104                         seqLength.push_back(current.getNumBases());
105                         ambigBases.push_back(current.getAmbigBases());
106                         longHomoPolymer.push_back(current.getLongHomoPolymer());
107
108                         outSummary << current.getName() << '\t';
109                         outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
110                         outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
111                         outSummary << current.getLongHomoPolymer() << endl;
112                         
113                         numSeqs++;
114                         gobble(inFASTA);
115                 }
116                 inFASTA.close();
117                 
118                 sort(startPosition.begin(), startPosition.end());
119                 sort(endPosition.begin(), endPosition.end());
120                 sort(seqLength.begin(), seqLength.end());
121                 sort(ambigBases.begin(), ambigBases.end());
122                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
123                 
124                 int ptile0_25   = int(numSeqs * 0.025);
125                 int ptile25             = int(numSeqs * 0.250);
126                 int ptile50             = int(numSeqs * 0.500);
127                 int ptile75             = int(numSeqs * 0.750);
128                 int ptile97_5   = int(numSeqs * 0.975);
129                 int ptile100    = numSeqs - 1;
130                 
131                 cout << endl;
132                 cout << "\t\tStart\tEnd\tNBases\tAmbigs\tPolymer" << endl;
133                 cout << "Minimum:\t" << startPosition[0] << '\t' << endPosition[0] << '\t' << seqLength[0] << '\t' << ambigBases[0] << '\t' << longHomoPolymer[0] << endl;
134                 cout << "2.5%-tile:\t" << startPosition[ptile0_25] << '\t' << endPosition[ptile0_25] << '\t' << seqLength[ptile0_25] << '\t' << ambigBases[ptile0_25] << '\t' << longHomoPolymer[ptile0_25] << endl;
135                 cout << "25%-tile:\t" << startPosition[ptile25] << '\t' << endPosition[ptile25] << '\t' << seqLength[ptile25] << '\t' << ambigBases[ptile25] << '\t' << longHomoPolymer[ptile25] << endl;
136                 cout << "Median: \t" << startPosition[ptile50] << '\t' << endPosition[ptile50] << '\t' << seqLength[ptile50] << '\t' << ambigBases[ptile50] << '\t' << longHomoPolymer[ptile50] << endl;
137                 cout << "75%-tile:\t" << startPosition[ptile75] << '\t' << endPosition[ptile75] << '\t' << seqLength[ptile75] << '\t' << ambigBases[ptile75] << '\t' << longHomoPolymer[ptile75] << endl;
138                 cout << "97.5%-tile:\t" << startPosition[ptile97_5] << '\t' << endPosition[ptile97_5] << '\t' << seqLength[ptile97_5] << '\t' << ambigBases[ptile97_5] << '\t' << longHomoPolymer[ptile97_5] << endl;
139                 cout << "Maximum:\t" << startPosition[ptile100] << '\t' << endPosition[ptile100] << '\t' << seqLength[ptile100] << '\t' << ambigBases[ptile100] << '\t' << longHomoPolymer[ptile100] << endl;
140                 cout << "# of Seqs:\t" << numSeqs << endl;
141                 
142                 outSummary.close();
143                 return 0;
144         }
145         catch(exception& e) {
146                 cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
147                 exit(1);
148         }
149         catch(...) {
150                 cout << "An unknown error has occurred in the SeqSummaryCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
151                 exit(1);
152         }
153         
154 }
155
156 //***************************************************************************************************************
157
158