]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
small bug with trimming of quality scores over a window. lost last base if sequence...
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 
48                 string qScoreString = m->getline(qFile);
49                 while(qFile.peek() != '>' && qFile.peek() != EOF){
50                         qScoreString +=  ' ' + m->getline(qFile);
51                 }
52                 
53                 istringstream qScoreStringStream(qScoreString);
54                 int count = 0;
55                 while(!qScoreStringStream.eof()){
56                         if (m->control_pressed) { break; }
57                         qScoreStringStream >> score;  m->gobble(qScoreStringStream);
58
59                         qScores.push_back(score);
60                         count++;
61                 }
62                 //qScores.pop_back();
63                 
64 //              string scores = "";
65 //              
66 //              while(!qFile.eof()){    
67 //                      
68 //                      qFile >> seqName; 
69 //                      
70 //                      //get name
71 //                      if (seqName.length() != 0) { 
72 //                              seqName = seqName.substr(1);
73 //                              while (!qFile.eof())    {       
74 //                                      char c = qFile.get(); 
75 //                                      //gobble junk on line
76 //                                      if (c == 10 || c == 13){        break;  }
77 //                              } 
78 //                              m->gobble(qFile);
79 //                      }
80 //                      
81 //                      //get scores
82 //                      while(qFile){
83 //                              char letter=qFile.get();
84 //                              if((letter == '>')){    qFile.putback(letter);  break;  }
85 //                              else if (isprint(letter)) { scores += letter; }
86 //                      }
87 //                      m->gobble(qFile);
88 //                      
89 //                      break;
90 //              }
91 //              
92 //              //convert scores string to qScores
93 //              istringstream qScoreStringStream(scores);
94 //              
95 //              int score;
96 //              while(!qScoreStringStream.eof()){
97 //                      
98 //                      if (m->control_pressed) { break; }
99 //                      
100 //                      qScoreStringStream >> score;
101 //                      qScores.push_back(score);
102 //              }
103 //              
104 //              qScores.pop_back();
105
106                 seqLength = qScores.size();
107                 //cout << "seqlenght = " <<     seqLength << '\t' << count << endl;
108                 
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "QualityScores", "QualityScores");
112                 exit(1);
113         }                                                       
114         
115 }
116
117 /**************************************************************************************************/
118
119 string QualityScores::getName(){
120         
121         try {
122                 return seqName;
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "QualityScores", "getName");
126                 exit(1);
127         }                                                       
128 }
129
130 /**************************************************************************************************/
131
132 void QualityScores::printQScores(ofstream& qFile){
133         try {
134                 
135                 double aveQScore = calculateAverage();
136                 
137                 qFile << '>' << seqName << '\t' << aveQScore << endl;
138                 
139                 for(int i=0;i<seqLength;i++){
140                         qFile << qScores[i] << ' ';
141                 }
142                 qFile << endl;
143                                 
144         }
145         catch(exception& e) {
146                 m->errorOut(e, "QualityScores", "printQScores");
147                 exit(1);
148         }                                                       
149 }
150
151 /**************************************************************************************************/
152
153 void QualityScores::trimQScores(int start, int end){
154         try {
155                 vector<int> hold;
156                 
157
158                 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
159                 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
160                 if(end == -1){          
161                         hold = vector<int>(qScores.begin()+start, qScores.end());
162                         qScores = hold;         
163                 }
164                 if(start == -1){
165                         if(qScores.size() > end){
166                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
167                                 qScores = hold;         
168                         }
169                 }
170
171                 seqLength = qScores.size();
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "QualityScores", "trimQScores");
175                 exit(1);
176         }                                                       
177 }
178
179 /**************************************************************************************************/
180
181 void QualityScores::flipQScores(){
182         try {
183                 
184                 vector<int> temp = qScores;
185                 for(int i=0;i<seqLength;i++){
186                         qScores[seqLength - i - 1] = temp[i];
187                 }
188                 
189         }
190         catch(exception& e) {
191                 m->errorOut(e, "QualityScores", "flipQScores");
192                 exit(1);
193         }                                                       
194 }
195
196 /**************************************************************************************************/
197
198 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
199         try {
200                 string rawSequence = sequence.getUnaligned();
201                 int seqLength = sequence.getNumBases();
202                 
203                 if(seqName != sequence.getName()){
204                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
205                         m->mothurOutEndLine();  
206                 }
207                 
208                 int end;
209                 for(int i=0;i<seqLength;i++){
210                         end = i;
211                         if(qScores[i] < qThreshold){
212                                 break;
213                         }
214                 }
215                 
216                 //every score passed
217                 if (end == (seqLength-1)) { end = seqLength; }
218                 
219                 sequence.setUnaligned(rawSequence.substr(0,end));
220                 trimQScores(-1, end);
221                 
222                 return 1;
223         }
224         catch(exception& e) {
225                 m->errorOut(e, "QualityScores", "flipQScores");
226                 exit(1);
227         }                                                       
228         
229 }
230
231 /**************************************************************************************************/
232
233 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
234         try {
235                 string rawSequence = sequence.getUnaligned();
236                 int seqLength = sequence.getNumBases();
237                 
238                 if(seqName != sequence.getName()){
239                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
240                         m->mothurOutEndLine();  
241                 }
242                 
243                 int end = -1;
244                 double rollingSum = 0.0000;
245                 
246                 for(int i=0;i<seqLength;i++){
247
248                         rollingSum += (double)qScores[i];
249                         
250                         if(rollingSum / (double)(i+1) < qThreshold){
251                                 end = i;
252                                 break;
253                         }
254                 }
255                 
256                 if(end == -1){  end = seqLength;        }
257                 
258                 
259                 sequence.setUnaligned(rawSequence.substr(0,end));
260                 trimQScores(-1, end);
261                 
262                 
263                 return 1;
264         }
265         catch(exception& e) {
266                 m->errorOut(e, "QualityScores", "flipQScores");
267                 exit(1);
268         }                                                       
269         
270 }
271
272 /**************************************************************************************************/
273
274 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
275         try {
276                 string rawSequence = sequence.getUnaligned();
277                 int seqLength = sequence.getNumBases();
278                 
279                 if(seqName != sequence.getName()){
280                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
281                         m->mothurOutEndLine();
282                 }
283                 
284                 int end = windowSize;
285                 int start = 0;
286
287                 if(seqLength < windowSize) {    return 0;       }
288                         
289                 while((start+windowSize) < seqLength){
290                         double windowSum = 0.0000;
291
292                         for(int i=start;i<end;i++){
293                                 windowSum += qScores[i];
294                         }
295                         double windowAverage = windowSum / (double)(end-start);
296                                 
297                         if(windowAverage < qThreshold){
298                                 end = end - stepSize;
299                                 break;
300                         }
301                         
302                         start += stepSize;
303                         end = start + windowSize;
304                                 
305                         if(end >= seqLength){   end = seqLength;        }
306                                 
307                 }
308         
309                 if(end == -1){  end = seqLength;        }
310                 
311                 //failed first window
312                 if (end < windowSize) { return 0; }
313                         
314                 sequence.setUnaligned(rawSequence.substr(0,end));
315                 trimQScores(-1, end);
316                 
317                 return 1;
318         }
319         catch(exception& e) {
320                 m->errorOut(e, "QualityScores", "flipQScores");
321                 exit(1);
322         }                                                       
323         
324 }
325
326 /**************************************************************************************************/
327
328 double QualityScores::calculateAverage(){
329         
330         double aveQScore = 0.0000;
331         
332         for(int i=0;i<seqLength;i++){
333                 aveQScore += (double) qScores[i];
334         }
335         aveQScore /= (double) seqLength;
336         
337         return aveQScore;
338 }
339
340 /**************************************************************************************************/
341
342 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
343         try {
344                 string rawSequence = sequence.getUnaligned();
345                 bool success = 0;       //guilty until proven innocent
346                 
347                 if(seqName != sequence.getName())       {
348                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
349                         m->mothurOutEndLine();  
350                 } 
351                         
352                 double aveQScore = calculateAverage();
353                 
354                 if(aveQScore >= qAverage)       {       success = 1;    }
355                 else                                            {       success = 0;    }
356                 
357                 return success;
358         }
359         catch(exception& e) {
360                 m->errorOut(e, "QualityScores", "cullQualAverage");
361                 exit(1);
362         }
363 }
364
365 /**************************************************************************************************/
366
367 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
368         try {
369
370                 int seqLength = errorSeq.size();
371                 
372                 int qIndex = start - 1;
373
374                 for(int i=0;i<seqLength;i++){
375                         
376                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
377                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
378                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
379                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
380                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
381
382                         if(errorSeq[i] != 'd')          {       qIndex++;       }
383                         if(qIndex > stop){      break;  }
384                 }       
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
388                 exit(1);
389         }
390 }
391
392 /**************************************************************************************************/
393
394 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
395         try {
396                 
397                 int index = 0;
398                 for(int i=start-1;i<stop;i++){
399                         forwardMap[index++][qScores[i]] += weight;
400                 }
401                 
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "QualityScores", "updateForwardMap");
405                 exit(1);
406         }
407 }
408
409 /**************************************************************************************************/
410
411 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
412         try {
413                 
414                 int index = 0;
415                 for(int i=stop-1;i>=start;i--){
416                         reverseMap[index++][qScores[i]] += weight;
417                 }
418                 
419         }       
420         catch(exception& e) {
421                 m->errorOut(e, "QualityScores", "updateForwardMap");
422                 exit(1);
423         }
424 }
425
426 /**************************************************************************************************/