]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
pat's changes to trim.seqs
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "QualityScores", "QualityScores");
22                 exit(1);
23         }                                                       
24 }
25
26 /**************************************************************************************************/
27
28 QualityScores::QualityScores(ifstream& qFile){
29         try {
30                 
31                 m = MothurOut::getInstance();
32
33                 seqName = "";
34                 seqLength = -1;
35                 int score;
36                 
37                 string line;
38                 getline(qFile, line);
39                 istringstream nameStream(line);
40         
41                 nameStream >> seqName;
42                 seqName = seqName.substr(1); 
43
44                 getline(qFile, line);
45                 istringstream qualStream(line);
46         
47                 while(qualStream){
48                         qualStream >> score;
49                         qScores.push_back(score);
50                 }
51                 qScores.pop_back();
52                 
53                 seqLength = qScores.size();     
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "QualityScores", "QualityScores");
57                 exit(1);
58         }                                                       
59
60 }
61
62 /**************************************************************************************************/
63
64 string QualityScores::getName(){
65         
66         try {
67                 return seqName;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "QualityScores", "getName");
71                 exit(1);
72         }                                                       
73 }
74
75 /**************************************************************************************************/
76
77 void QualityScores::printQScores(ofstream& qFile){
78         try {
79                 
80                 double aveQScore = calculateAverage();
81                 
82                 qFile << '>' << seqName << '\t' << aveQScore << endl;
83                 
84                 for(int i=0;i<seqLength;i++){
85                         qFile << qScores[i] << ' ';
86                 }
87                 qFile << endl;
88                                 
89         }
90         catch(exception& e) {
91                 m->errorOut(e, "QualityScores", "printQScores");
92                 exit(1);
93         }                                                       
94 }
95
96 /**************************************************************************************************/
97
98 void QualityScores::trimQScores(int start, int end){
99         try {
100                 vector<int> hold;
101                 
102                 if(end == -1){          
103                         hold = vector<int>(qScores.begin()+start, qScores.end());
104                         qScores = hold;         
105                 }
106                 if(start == -1){
107                         hold = vector<int>(qScores.begin(), qScores.begin()+end);       //not sure if indexing is correct
108                         qScores = hold;         
109                 }
110
111                 seqLength = qScores.size();
112         }
113         catch(exception& e) {
114                 m->errorOut(e, "QualityScores", "trimQScores");
115                 exit(1);
116         }                                                       
117 }
118
119 /**************************************************************************************************/
120
121 void QualityScores::flipQScores(){
122         try {
123                 
124                 vector<int> temp = qScores;
125                 for(int i=0;i<seqLength;i++){
126                         qScores[seqLength - i - 1] = temp[i];
127                 }
128                 
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "QualityScores", "flipQScores");
132                 exit(1);
133         }                                                       
134 }
135
136 /**************************************************************************************************/
137
138 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
139         try {
140                 string rawSequence = sequence.getUnaligned();
141                 int seqLength = sequence.getNumBases();
142                 
143                 if(seqName != sequence.getName()){
144                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
145                         m->mothurOutEndLine();  
146                 }
147                 
148                 int end;
149                 for(int i=0;i<seqLength;i++){
150                         end = i;
151                         if(qScores[i] < qThreshold){
152                                 break;
153                         }
154                 }
155                 
156                 sequence.setUnaligned(rawSequence.substr(0,end));
157                 trimQScores(-1, end);
158         
159                 return 1;
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "QualityScores", "flipQScores");
163                 exit(1);
164         }                                                       
165         
166 }
167
168 /**************************************************************************************************/
169
170 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
171         try {
172                 string rawSequence = sequence.getUnaligned();
173                 int seqLength = sequence.getNumBases();
174                 
175                 if(seqName != sequence.getName()){
176                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
177                         m->mothurOutEndLine();  
178                 }
179                 
180                 int end = -1;
181                 double rollingSum = 0.0000;
182                 
183                 for(int i=0;i<seqLength;i++){
184
185                         rollingSum += (double)qScores[i];
186                         
187                         if(rollingSum / (double)(i+1) < qThreshold){
188                                 end = i;
189 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
190                                 break;
191                         }
192                 }
193                 
194                 if(end == -1){  end = seqLength;        }
195                 
196                 sequence.setUnaligned(rawSequence.substr(0,end));
197                 trimQScores(-1, end);
198                 
199                 return 1;
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "QualityScores", "flipQScores");
203                 exit(1);
204         }                                                       
205         
206 }
207
208 /**************************************************************************************************/
209
210 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
211         try {
212                 string rawSequence = sequence.getUnaligned();
213                 int seqLength = sequence.getNumBases();
214                 
215                 if(seqName != sequence.getName()){
216                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
217                         m->mothurOutEndLine();  
218                 }
219                 
220                 int end = windowSize;
221                 int start = 0;
222                 
223
224                 while(start < seqLength){
225                         double windowSum = 0.0000;
226
227                         for(int i=start;i<end;i++){
228                                 windowSum += qScores[i];                                
229                         }
230                         double windowAverage = windowSum / (double)(end-start);
231                         
232                         if(windowAverage < qThreshold){
233                                 end = start;
234                                 break;
235                         }
236                         start += stepSize;
237                         end = start + windowSize;
238                         if(end >= seqLength){   end = seqLength - 1;    }
239                 }
240                 
241                 
242                 if(end == -1){  end = seqLength;        }
243                 
244                 sequence.setUnaligned(rawSequence.substr(0,end));
245                 trimQScores(-1, end);
246                 
247                 return 1;
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "QualityScores", "flipQScores");
251                 exit(1);
252         }                                                       
253         
254 }
255
256 /**************************************************************************************************/
257
258 double QualityScores::calculateAverage(){
259         
260         double aveQScore = 0.0000;
261         
262         for(int i=0;i<seqLength;i++){
263                 aveQScore += (double) qScores[i];
264         }
265         aveQScore /= (double) seqLength;
266         
267         return aveQScore;
268 }
269
270 /**************************************************************************************************/
271
272 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
273         try {
274                 string rawSequence = sequence.getUnaligned();
275                 bool success = 0;       //guilty until proven innocent
276                 
277                 if(seqName != sequence.getName())       {
278                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
279                         m->mothurOutEndLine();  
280                 } 
281                         
282                 double aveQScore = calculateAverage();
283                 
284                 if(aveQScore >= qAverage)       {       success = 1;    }
285                 else                                            {       success = 0;    }
286                 
287                 return success;
288         }
289         catch(exception& e) {
290                 m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
291                 exit(1);
292         }
293 }
294
295 /**************************************************************************************************/