]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
trim.seqs fix
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 cout << seqName << endl;
48                 string qScoreString = m->getline(qFile);
49                 while(qFile.peek() != '>' && qFile.peek() != EOF){
50                         qScoreString += ' ' + m->getline(qFile);
51                 }
52                 
53                 istringstream qScoreStringStream(qScoreString);
54                 int count = 0;
55                 while(!qScoreStringStream.eof()){
56                         if (m->control_pressed) { break; }
57                         qScoreStringStream >> score;
58                         qScores.push_back(score);
59                         cout << score << '\t' << count << endl;
60                         count++;
61                 }
62                 qScores.pop_back();
63                 
64 //              string scores = "";
65 //              
66 //              while(!qFile.eof()){    
67 //                      
68 //                      qFile >> seqName; 
69 //                      
70 //                      //get name
71 //                      if (seqName.length() != 0) { 
72 //                              seqName = seqName.substr(1);
73 //                              while (!qFile.eof())    {       
74 //                                      char c = qFile.get(); 
75 //                                      //gobble junk on line
76 //                                      if (c == 10 || c == 13){        break;  }
77 //                              } 
78 //                              m->gobble(qFile);
79 //                      }
80 //                      
81 //                      //get scores
82 //                      while(qFile){
83 //                              char letter=qFile.get();
84 //                              if((letter == '>')){    qFile.putback(letter);  break;  }
85 //                              else if (isprint(letter)) { scores += letter; }
86 //                      }
87 //                      cout << scores << endl;
88 //                      m->gobble(qFile);
89 //                      
90 //                      break;
91 //              }
92 //              
93 //              //convert scores string to qScores
94 //              istringstream qScoreStringStream(scores);
95 //              
96 //              int score;
97 //              while(!qScoreStringStream.eof()){
98 //                      
99 //                      if (m->control_pressed) { break; }
100 //                      
101 //                      qScoreStringStream >> score;
102 //                      qScores.push_back(score);
103 //              }
104 //              
105 //              qScores.pop_back();
106
107                 seqLength = qScores.size();
108                 
109                 
110         }
111         catch(exception& e) {
112                 m->errorOut(e, "QualityScores", "QualityScores");
113                 exit(1);
114         }                                                       
115         
116 }
117
118 /**************************************************************************************************/
119
120 string QualityScores::getName(){
121         
122         try {
123                 cout << qScores.size() << '\t';
124                 return seqName;
125         }
126         catch(exception& e) {
127                 m->errorOut(e, "QualityScores", "getName");
128                 exit(1);
129         }                                                       
130 }
131
132 /**************************************************************************************************/
133
134 void QualityScores::printQScores(ofstream& qFile){
135         try {
136                 
137                 double aveQScore = calculateAverage();
138                 
139                 qFile << '>' << seqName << '\t' << aveQScore << endl;
140                 
141                 for(int i=0;i<seqLength;i++){
142                         qFile << qScores[i] << ' ';
143                 }
144                 qFile << endl;
145                                 
146         }
147         catch(exception& e) {
148                 m->errorOut(e, "QualityScores", "printQScores");
149                 exit(1);
150         }                                                       
151 }
152
153 /**************************************************************************************************/
154
155 void QualityScores::trimQScores(int start, int end){
156         try {
157                 vector<int> hold;
158                 
159                 cout << seqName << '\t' << qScores.size() << '\t' << start << '\t' << end << endl;
160                 
161                 if(end == -1){          
162                         hold = vector<int>(qScores.begin()+start, qScores.end());
163                         qScores = hold;         
164                 }
165                 if(start == -1){
166                         if(qScores.size() > end){
167                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
168                                 qScores = hold;         
169                         }
170                 }
171
172                 seqLength = qScores.size();
173         }
174         catch(exception& e) {
175                 m->errorOut(e, "QualityScores", "trimQScores");
176                 exit(1);
177         }                                                       
178 }
179
180 /**************************************************************************************************/
181
182 void QualityScores::flipQScores(){
183         try {
184                 
185                 vector<int> temp = qScores;
186                 for(int i=0;i<seqLength;i++){
187                         qScores[seqLength - i - 1] = temp[i];
188                 }
189                 
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "QualityScores", "flipQScores");
193                 exit(1);
194         }                                                       
195 }
196
197 /**************************************************************************************************/
198
199 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
200         try {
201                 string rawSequence = sequence.getUnaligned();
202                 int seqLength = sequence.getNumBases();
203                 
204                 if(seqName != sequence.getName()){
205                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
206                         m->mothurOutEndLine();  
207                 }
208                 
209                 int end;
210                 for(int i=0;i<seqLength;i++){
211                         end = i;
212                         if(qScores[i] < qThreshold){
213                                 break;
214                         }
215                 }
216                 
217                 //every score passed
218                 if (end == (seqLength-1)) { end = seqLength; }
219                 
220                 sequence.setUnaligned(rawSequence.substr(0,end));
221                 trimQScores(-1, end);
222                 
223                 return 1;
224         }
225         catch(exception& e) {
226                 m->errorOut(e, "QualityScores", "flipQScores");
227                 exit(1);
228         }                                                       
229         
230 }
231
232 /**************************************************************************************************/
233
234 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
235         try {
236                 string rawSequence = sequence.getUnaligned();
237                 int seqLength = sequence.getNumBases();
238                 
239                 if(seqName != sequence.getName()){
240                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
241                         m->mothurOutEndLine();  
242                 }
243                 
244                 int end = -1;
245                 double rollingSum = 0.0000;
246                 
247                 for(int i=0;i<seqLength;i++){
248
249                         rollingSum += (double)qScores[i];
250                         
251                         if(rollingSum / (double)(i+1) < qThreshold){
252                                 end = i;
253                                 break;
254                         }
255                 }
256                 
257                 if(end == -1){  end = seqLength;        }
258                 
259                 
260                 sequence.setUnaligned(rawSequence.substr(0,end));
261                 trimQScores(-1, end);
262                 
263                 
264                 return 1;
265         }
266         catch(exception& e) {
267                 m->errorOut(e, "QualityScores", "flipQScores");
268                 exit(1);
269         }                                                       
270         
271 }
272
273 /**************************************************************************************************/
274
275 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
276         try {
277                 string rawSequence = sequence.getUnaligned();
278                 int seqLength = sequence.getNumBases();
279                 
280                 if(seqName != sequence.getName()){
281                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
282                         m->mothurOutEndLine();
283                 }
284                 
285                 int end = windowSize;
286                 int start = 0;
287                 
288                 if(seqLength < windowSize) {    return 0;       }
289                         
290                 while(start < seqLength){
291                         double windowSum = 0.0000;
292
293                         for(int i=start;i<end;i++){
294                                 windowSum += qScores[i];
295                         }
296                         double windowAverage = windowSum / (double)(end-start);
297                         
298                         if(windowAverage < qThreshold){
299                                 end = end - stepSize;
300                                 break;
301                         }
302                         start += stepSize;
303                         end = start + windowSize;
304                         if(end >= seqLength){   end = seqLength - 1;    }
305                 }
306                 
307                 if(end == -1){  end = seqLength;        }
308                 
309                 sequence.setUnaligned(rawSequence.substr(0,end));
310                 trimQScores(-1, end);
311                 
312                 return 1;
313         }
314         catch(exception& e) {
315                 m->errorOut(e, "QualityScores", "flipQScores");
316                 exit(1);
317         }                                                       
318         
319 }
320
321 /**************************************************************************************************/
322
323 double QualityScores::calculateAverage(){
324         
325         double aveQScore = 0.0000;
326         
327         for(int i=0;i<seqLength;i++){
328                 aveQScore += (double) qScores[i];
329         }
330         aveQScore /= (double) seqLength;
331         
332         return aveQScore;
333 }
334
335 /**************************************************************************************************/
336
337 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
338         try {
339                 string rawSequence = sequence.getUnaligned();
340                 bool success = 0;       //guilty until proven innocent
341                 
342                 if(seqName != sequence.getName())       {
343                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
344                         m->mothurOutEndLine();  
345                 } 
346                         
347                 double aveQScore = calculateAverage();
348                 
349                 if(aveQScore >= qAverage)       {       success = 1;    }
350                 else                                            {       success = 0;    }
351                 
352                 return success;
353         }
354         catch(exception& e) {
355                 m->errorOut(e, "QualityScores", "cullQualAverage");
356                 exit(1);
357         }
358 }
359
360 /**************************************************************************************************/
361
362 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
363         try {
364
365                 int seqLength = errorSeq.size();
366                 
367                 int qIndex = start - 1;
368
369                 for(int i=0;i<seqLength;i++){
370                         
371                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
372                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
373                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
374                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
375                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
376
377                         if(errorSeq[i] != 'd')          {       qIndex++;       }
378                         if(qIndex > stop){      break;  }
379                 }       
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
383                 exit(1);
384         }
385 }
386
387 /**************************************************************************************************/
388
389 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
390         try {
391                 
392                 int index = 0;
393                 for(int i=start-1;i<stop;i++){
394                         forwardMap[index++][qScores[i]] += weight;
395                 }
396                 
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "QualityScores", "updateForwardMap");
400                 exit(1);
401         }
402 }
403
404 /**************************************************************************************************/
405
406 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
407         try {
408                 
409                 int index = 0;
410                 for(int i=stop-1;i>=start;i--){
411                         reverseMap[index++][qScores[i]] += weight;
412                 }
413                 
414         }       
415         catch(exception& e) {
416                 m->errorOut(e, "QualityScores", "updateForwardMap");
417                 exit(1);
418         }
419 }
420
421 /**************************************************************************************************/