]> git.donarmstrong.com Git - mothur.git/blob - seqsummarycommand.cpp
finished work on classify.seqs bayesian method and various bug fixes
[mothur.git] / seqsummarycommand.cpp
1 /*
2  *  seqcoordcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 5/30/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "seqsummarycommand.h"
11 #include "sequence.hpp"
12
13 //***************************************************************************************************************
14
15 SeqSummaryCommand::SeqSummaryCommand(string option){
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter;
31                         
32                         //check to make sure all parameters are valid for command
33                         for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //check for required parameters
38                         fastafile = validParameter.validFile(parameters, "fasta", true);
39                         if (fastafile == "not open") { abort = true; }
40                         else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the summary.seqs command."); mothurOutEndLine(); abort = true;  }     
41                         
42                 }
43         }
44         catch(exception& e) {
45                 errorOut(e, "SeqSummaryCommand", "SeqSummaryCommand");
46                 exit(1);
47         }
48 }
49 //**********************************************************************************************************************
50
51 void SeqSummaryCommand::help(){
52         try {
53                 mothurOut("The summary.seqs command reads a fastafile and ....\n");
54                 mothurOut("The summary.seqs command parameter is fasta and it is required.\n");
55                 mothurOut("The summary.seqs command should be in the following format: \n");
56                 mothurOut("summary.seqs(fasta=yourFastaFile) \n");
57                 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");        
58         }
59         catch(exception& e) {
60                 errorOut(e, "SeqSummaryCommand", "help");
61                 exit(1);
62         }
63 }
64
65 //***************************************************************************************************************
66
67 SeqSummaryCommand::~SeqSummaryCommand(){        /*      do nothing      */      }
68
69 //***************************************************************************************************************
70
71 int SeqSummaryCommand::execute(){
72         try{
73                 
74                 if (abort == true) { return 0; }
75                 
76                 ifstream inFASTA;
77                 openInputFile(fastafile, inFASTA);
78                 int numSeqs = 0;
79
80                 ofstream outSummary;
81                 string summaryFile = fastafile + ".summary";
82                 openOutputFile(summaryFile, outSummary);
83                 
84                 vector<int> startPosition;
85                 vector<int> endPosition;
86                 vector<int> seqLength;
87                 vector<int> ambigBases;
88                 vector<int> longHomoPolymer;
89                 
90                 outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;                   
91
92                 while(!inFASTA.eof()){
93                         Sequence current(inFASTA);
94                         startPosition.push_back(current.getStartPos());
95                         endPosition.push_back(current.getEndPos());
96                         seqLength.push_back(current.getNumBases());
97                         ambigBases.push_back(current.getAmbigBases());
98                         longHomoPolymer.push_back(current.getLongHomoPolymer());
99
100                         outSummary << current.getName() << '\t';
101                         outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
102                         outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
103                         outSummary << current.getLongHomoPolymer() << endl;
104                         
105                         numSeqs++;
106                         gobble(inFASTA);
107                 }
108                 inFASTA.close();
109                 
110                 sort(startPosition.begin(), startPosition.end());
111                 sort(endPosition.begin(), endPosition.end());
112                 sort(seqLength.begin(), seqLength.end());
113                 sort(ambigBases.begin(), ambigBases.end());
114                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
115                 
116                 int ptile0_25   = int(numSeqs * 0.025);
117                 int ptile25             = int(numSeqs * 0.250);
118                 int ptile50             = int(numSeqs * 0.500);
119                 int ptile75             = int(numSeqs * 0.750);
120                 int ptile97_5   = int(numSeqs * 0.975);
121                 int ptile100    = numSeqs - 1;
122                 
123                 //to compensate for blank sequences that would result in startPosition and endPostion equalling -1
124                 if (startPosition[0] == -1) {  startPosition[0] = 0;    }
125                 if (endPosition[0] == -1)       {  endPosition[0] = 0;          }
126                 
127                 mothurOutEndLine();
128                 mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer"); mothurOutEndLine();
129                 mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
130                 mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
131                 mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
132                 mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
133                 mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
134                 mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
135                 mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
136                 mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
137                 
138                 outSummary.close();
139                 return 0;
140         }
141         catch(exception& e) {
142                 errorOut(e, "SeqSummaryCommand", "execute");
143                 exit(1);
144         }
145 }
146
147 //***************************************************************************************************************
148
149