]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
fix trim.seqs / qualscores bug
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 string qScoreString = m->getline(qFile);
48                 while(qFile.peek() != '>' && qFile.peek() != EOF){
49                         qScoreString += ' ' + m->getline(qFile);
50                 }
51                 
52                 istringstream qScoreStringStream(qScoreString);
53                 int count = 0;
54                 while(!qScoreStringStream.eof()){
55                         if (m->control_pressed) { break; }
56                         qScoreStringStream >> score;
57                         qScores.push_back(score);
58                         count++;
59                 }
60                 qScores.pop_back();
61                 
62 //              string scores = "";
63 //              
64 //              while(!qFile.eof()){    
65 //                      
66 //                      qFile >> seqName; 
67 //                      
68 //                      //get name
69 //                      if (seqName.length() != 0) { 
70 //                              seqName = seqName.substr(1);
71 //                              while (!qFile.eof())    {       
72 //                                      char c = qFile.get(); 
73 //                                      //gobble junk on line
74 //                                      if (c == 10 || c == 13){        break;  }
75 //                              } 
76 //                              m->gobble(qFile);
77 //                      }
78 //                      
79 //                      //get scores
80 //                      while(qFile){
81 //                              char letter=qFile.get();
82 //                              if((letter == '>')){    qFile.putback(letter);  break;  }
83 //                              else if (isprint(letter)) { scores += letter; }
84 //                      }
85 //                      cout << scores << endl;
86 //                      m->gobble(qFile);
87 //                      
88 //                      break;
89 //              }
90 //              
91 //              //convert scores string to qScores
92 //              istringstream qScoreStringStream(scores);
93 //              
94 //              int score;
95 //              while(!qScoreStringStream.eof()){
96 //                      
97 //                      if (m->control_pressed) { break; }
98 //                      
99 //                      qScoreStringStream >> score;
100 //                      qScores.push_back(score);
101 //              }
102 //              
103 //              qScores.pop_back();
104
105                 seqLength = qScores.size();
106                 
107                 
108         }
109         catch(exception& e) {
110                 m->errorOut(e, "QualityScores", "QualityScores");
111                 exit(1);
112         }                                                       
113         
114 }
115
116 /**************************************************************************************************/
117
118 string QualityScores::getName(){
119         
120         try {
121                 cout << qScores.size() << '\t';
122                 return seqName;
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "QualityScores", "getName");
126                 exit(1);
127         }                                                       
128 }
129
130 /**************************************************************************************************/
131
132 void QualityScores::printQScores(ofstream& qFile){
133         try {
134                 
135                 double aveQScore = calculateAverage();
136                 
137                 qFile << '>' << seqName << '\t' << aveQScore << endl;
138                 
139                 for(int i=0;i<seqLength;i++){
140                         qFile << qScores[i] << ' ';
141                 }
142                 qFile << endl;
143                                 
144         }
145         catch(exception& e) {
146                 m->errorOut(e, "QualityScores", "printQScores");
147                 exit(1);
148         }                                                       
149 }
150
151 /**************************************************************************************************/
152
153 void QualityScores::trimQScores(int start, int end){
154         try {
155                 vector<int> hold;
156                 
157                 cout << seqName << '\t' << qScores.size() << '\t' << start << '\t' << end << endl;
158                 
159                 if(end == -1){          
160                         hold = vector<int>(qScores.begin()+start, qScores.end());
161                         qScores = hold;         
162                 }
163                 if(start == -1){
164                         if(qScores.size() > end){
165                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
166                                 qScores = hold;         
167                         }
168                 }
169
170                 seqLength = qScores.size();
171         }
172         catch(exception& e) {
173                 m->errorOut(e, "QualityScores", "trimQScores");
174                 exit(1);
175         }                                                       
176 }
177
178 /**************************************************************************************************/
179
180 void QualityScores::flipQScores(){
181         try {
182                 
183                 vector<int> temp = qScores;
184                 for(int i=0;i<seqLength;i++){
185                         qScores[seqLength - i - 1] = temp[i];
186                 }
187                 
188         }
189         catch(exception& e) {
190                 m->errorOut(e, "QualityScores", "flipQScores");
191                 exit(1);
192         }                                                       
193 }
194
195 /**************************************************************************************************/
196
197 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
198         try {
199                 string rawSequence = sequence.getUnaligned();
200                 int seqLength = sequence.getNumBases();
201                 
202                 if(seqName != sequence.getName()){
203                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
204                         m->mothurOutEndLine();  
205                 }
206                 
207                 int end;
208                 for(int i=0;i<seqLength;i++){
209                         end = i;
210                         if(qScores[i] < qThreshold){
211                                 break;
212                         }
213                 }
214                 
215                 //every score passed
216                 if (end == (seqLength-1)) { end = seqLength; }
217                 
218                 sequence.setUnaligned(rawSequence.substr(0,end));
219                 trimQScores(-1, end);
220                 
221                 return 1;
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "QualityScores", "flipQScores");
225                 exit(1);
226         }                                                       
227         
228 }
229
230 /**************************************************************************************************/
231
232 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
233         try {
234                 string rawSequence = sequence.getUnaligned();
235                 int seqLength = sequence.getNumBases();
236                 
237                 if(seqName != sequence.getName()){
238                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
239                         m->mothurOutEndLine();  
240                 }
241                 
242                 int end = -1;
243                 double rollingSum = 0.0000;
244                 
245                 for(int i=0;i<seqLength;i++){
246
247                         rollingSum += (double)qScores[i];
248                         
249                         if(rollingSum / (double)(i+1) < qThreshold){
250                                 end = i;
251                                 break;
252                         }
253                 }
254                 
255                 if(end == -1){  end = seqLength;        }
256                 
257                 
258                 sequence.setUnaligned(rawSequence.substr(0,end));
259                 trimQScores(-1, end);
260                 
261                 
262                 return 1;
263         }
264         catch(exception& e) {
265                 m->errorOut(e, "QualityScores", "flipQScores");
266                 exit(1);
267         }                                                       
268         
269 }
270
271 /**************************************************************************************************/
272
273 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
274         try {
275                 string rawSequence = sequence.getUnaligned();
276                 int seqLength = sequence.getNumBases();
277                 
278                 if(seqName != sequence.getName()){
279                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
280                         m->mothurOutEndLine();
281                 }
282                 
283                 int end = windowSize;
284                 int start = 0;
285                 
286                 if(seqLength < windowSize) {    return 0;       }
287                         
288                 while(start < seqLength){
289                         double windowSum = 0.0000;
290
291                         for(int i=start;i<end;i++){
292                                 windowSum += qScores[i];
293                         }
294                         double windowAverage = windowSum / (double)(end-start);
295                         
296                         if(windowAverage < qThreshold){
297                                 end = end - stepSize;
298                                 break;
299                         }
300                         start += stepSize;
301                         end = start + windowSize;
302                         if(end >= seqLength){   end = seqLength - 1;    }
303                 }
304                 
305                 if(end == -1){  end = seqLength;        }
306                 
307                 sequence.setUnaligned(rawSequence.substr(0,end));
308                 trimQScores(-1, end);
309                 
310                 return 1;
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "QualityScores", "flipQScores");
314                 exit(1);
315         }                                                       
316         
317 }
318
319 /**************************************************************************************************/
320
321 double QualityScores::calculateAverage(){
322         
323         double aveQScore = 0.0000;
324         
325         for(int i=0;i<seqLength;i++){
326                 aveQScore += (double) qScores[i];
327         }
328         aveQScore /= (double) seqLength;
329         
330         return aveQScore;
331 }
332
333 /**************************************************************************************************/
334
335 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
336         try {
337                 string rawSequence = sequence.getUnaligned();
338                 bool success = 0;       //guilty until proven innocent
339                 
340                 if(seqName != sequence.getName())       {
341                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
342                         m->mothurOutEndLine();  
343                 } 
344                         
345                 double aveQScore = calculateAverage();
346                 
347                 if(aveQScore >= qAverage)       {       success = 1;    }
348                 else                                            {       success = 0;    }
349                 
350                 return success;
351         }
352         catch(exception& e) {
353                 m->errorOut(e, "QualityScores", "cullQualAverage");
354                 exit(1);
355         }
356 }
357
358 /**************************************************************************************************/
359
360 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
361         try {
362
363                 int seqLength = errorSeq.size();
364                 
365                 int qIndex = start - 1;
366
367                 for(int i=0;i<seqLength;i++){
368                         
369                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
370                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
371                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
372                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
373                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
374
375                         if(errorSeq[i] != 'd')          {       qIndex++;       }
376                         if(qIndex > stop){      break;  }
377                 }       
378         }
379         catch(exception& e) {
380                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
381                 exit(1);
382         }
383 }
384
385 /**************************************************************************************************/
386
387 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
388         try {
389                 
390                 int index = 0;
391                 for(int i=start-1;i<stop;i++){
392                         forwardMap[index++][qScores[i]] += weight;
393                 }
394                 
395         }
396         catch(exception& e) {
397                 m->errorOut(e, "QualityScores", "updateForwardMap");
398                 exit(1);
399         }
400 }
401
402 /**************************************************************************************************/
403
404 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
405         try {
406                 
407                 int index = 0;
408                 for(int i=stop-1;i>=start;i--){
409                         reverseMap[index++][qScores[i]] += weight;
410                 }
411                 
412         }       
413         catch(exception& e) {
414                 m->errorOut(e, "QualityScores", "updateForwardMap");
415                 exit(1);
416         }
417 }
418
419 /**************************************************************************************************/