]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
bug fixes
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 /*seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 cout << seqName << endl;        
48                 string qScoreString = m->getline(qFile);
49                 cout << qScoreString << endl;   
50                 istringstream qScoreStringStream(qScoreString);
51                 int count = 0;
52                 while(!qScoreStringStream.eof()){
53                         if (m->control_pressed) { break; }
54                         qScoreStringStream >> score;
55                         qScores.push_back(score);
56                         cout << score << '\t' << count << endl;
57                         count++;
58                 }
59                 qScores.pop_back();*/
60                 string scores = "";
61                 
62                 while(!qFile.eof()){    
63                         
64                         qFile >> seqName; 
65                         
66                         //get name
67                         if (seqName.length() != 0) { 
68                                 seqName = seqName.substr(1);
69                                 while (!qFile.eof())    {       
70                                         char c = qFile.get(); 
71                                         //gobble junk on line
72                                         if (c == 10 || c == 13){        break;  }
73                                 } 
74                                 m->gobble(qFile);
75                         }
76                         
77                         //get scores
78                         while(qFile){
79                                 char letter=qFile.get();
80                                 if((letter == '>')){    qFile.putback(letter);  break;  }
81                                 else if (isprint(letter)) { scores += letter; }
82                         }
83                         
84                         m->gobble(qFile);
85                         
86                         break;
87                 }
88                 
89                 //convert scores string to qScores
90                 istringstream qScoreStringStream(scores);
91                 
92                 int score;
93                 while(!qScoreStringStream.eof()){
94                         
95                         if (m->control_pressed) { break; }
96                         
97                         qScoreStringStream >> score;
98                         qScores.push_back(score);
99                 }
100                 
101                 qScores.pop_back();
102                 seqLength = qScores.size();
103                 
104                 
105         }
106         catch(exception& e) {
107                 m->errorOut(e, "QualityScores", "QualityScores");
108                 exit(1);
109         }                                                       
110         
111 }
112
113 /**************************************************************************************************/
114
115 string QualityScores::getName(){
116         
117         try {
118                 return seqName;
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "QualityScores", "getName");
122                 exit(1);
123         }                                                       
124 }
125
126 /**************************************************************************************************/
127
128 void QualityScores::printQScores(ofstream& qFile){
129         try {
130                 
131                 double aveQScore = calculateAverage();
132                 
133                 qFile << '>' << seqName << '\t' << aveQScore << endl;
134                 
135                 for(int i=0;i<seqLength;i++){
136                         qFile << qScores[i] << ' ';
137                 }
138                 qFile << endl;
139                                 
140         }
141         catch(exception& e) {
142                 m->errorOut(e, "QualityScores", "printQScores");
143                 exit(1);
144         }                                                       
145 }
146
147 /**************************************************************************************************/
148
149 void QualityScores::trimQScores(int start, int end){
150         try {
151                 vector<int> hold;
152                 
153                 if(end == -1){          
154                         hold = vector<int>(qScores.begin()+start, qScores.end());
155                         qScores = hold;         
156                 }
157                 if(start == -1){
158                         if(qScores.size() > end){
159                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
160                                 qScores = hold;         
161                         }
162                 }
163
164                 seqLength = qScores.size();
165         }
166         catch(exception& e) {
167                 m->errorOut(e, "QualityScores", "trimQScores");
168                 exit(1);
169         }                                                       
170 }
171
172 /**************************************************************************************************/
173
174 void QualityScores::flipQScores(){
175         try {
176                 
177                 vector<int> temp = qScores;
178                 for(int i=0;i<seqLength;i++){
179                         qScores[seqLength - i - 1] = temp[i];
180                 }
181                 
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "QualityScores", "flipQScores");
185                 exit(1);
186         }                                                       
187 }
188
189 /**************************************************************************************************/
190
191 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
192         try {
193                 string rawSequence = sequence.getUnaligned();
194                 int seqLength = sequence.getNumBases();
195                 
196                 if(seqName != sequence.getName()){
197                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
198                         m->mothurOutEndLine();  
199                 }
200                 
201                 int end;
202                 for(int i=0;i<seqLength;i++){
203                         end = i;
204                         if(qScores[i] < qThreshold){
205                                 break;
206                         }
207                 }
208                 
209                 //every score passed
210                 if (end == (seqLength-1)) { end = seqLength; }
211                 
212                 sequence.setUnaligned(rawSequence.substr(0,end));
213                 trimQScores(-1, end);
214                 
215                 return 1;
216         }
217         catch(exception& e) {
218                 m->errorOut(e, "QualityScores", "flipQScores");
219                 exit(1);
220         }                                                       
221         
222 }
223
224 /**************************************************************************************************/
225
226 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
227         try {
228                 string rawSequence = sequence.getUnaligned();
229                 int seqLength = sequence.getNumBases();
230                 
231                 if(seqName != sequence.getName()){
232                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
233                         m->mothurOutEndLine();  
234                 }
235                 
236                 int end = -1;
237                 double rollingSum = 0.0000;
238                 
239                 for(int i=0;i<seqLength;i++){
240
241                         rollingSum += (double)qScores[i];
242                         
243                         if(rollingSum / (double)(i+1) < qThreshold){
244                                 end = i;
245                                 break;
246                         }
247                 }
248                 
249                 if(end == -1){  end = seqLength;        }
250                 
251                 
252                 sequence.setUnaligned(rawSequence.substr(0,end));
253                 trimQScores(-1, end);
254                 
255                 
256                 return 1;
257         }
258         catch(exception& e) {
259                 m->errorOut(e, "QualityScores", "flipQScores");
260                 exit(1);
261         }                                                       
262         
263 }
264
265 /**************************************************************************************************/
266
267 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
268         try {
269                 string rawSequence = sequence.getUnaligned();
270                 int seqLength = sequence.getNumBases();
271                 
272                 if(seqName != sequence.getName()){
273                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
274                         m->mothurOutEndLine();
275                 }
276                 
277                 int end = windowSize;
278                 int start = 0;
279                 
280                 if(seqLength < windowSize) {    return 0;       }
281                         
282                 while(start < seqLength){
283                         double windowSum = 0.0000;
284
285                         for(int i=start;i<end;i++){
286                                 windowSum += qScores[i];
287                         }
288                         double windowAverage = windowSum / (double)(end-start);
289                         
290                         if(windowAverage < qThreshold){
291                                 end = end - stepSize;
292                                 break;
293                         }
294                         start += stepSize;
295                         end = start + windowSize;
296                         if(end >= seqLength){   end = seqLength - 1;    }
297                 }
298                 
299                 if(end == -1){  end = seqLength;        }
300                 
301                 sequence.setUnaligned(rawSequence.substr(0,end));
302                 trimQScores(-1, end);
303                 
304                 return 1;
305         }
306         catch(exception& e) {
307                 m->errorOut(e, "QualityScores", "flipQScores");
308                 exit(1);
309         }                                                       
310         
311 }
312
313 /**************************************************************************************************/
314
315 double QualityScores::calculateAverage(){
316         
317         double aveQScore = 0.0000;
318         
319         for(int i=0;i<seqLength;i++){
320                 aveQScore += (double) qScores[i];
321         }
322         aveQScore /= (double) seqLength;
323         
324         return aveQScore;
325 }
326
327 /**************************************************************************************************/
328
329 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
330         try {
331                 string rawSequence = sequence.getUnaligned();
332                 bool success = 0;       //guilty until proven innocent
333                 
334                 if(seqName != sequence.getName())       {
335                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
336                         m->mothurOutEndLine();  
337                 } 
338                         
339                 double aveQScore = calculateAverage();
340                 
341                 if(aveQScore >= qAverage)       {       success = 1;    }
342                 else                                            {       success = 0;    }
343                 
344                 return success;
345         }
346         catch(exception& e) {
347                 m->errorOut(e, "QualityScores", "cullQualAverage");
348                 exit(1);
349         }
350 }
351
352 /**************************************************************************************************/
353
354 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
355         try {
356
357                 int seqLength = errorSeq.size();
358                 
359                 int qIndex = start - 1;
360
361                 for(int i=0;i<seqLength;i++){
362                         
363                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
364                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
365                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
366                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
367                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
368
369                         if(errorSeq[i] != 'd')          {       qIndex++;       }
370                         if(qIndex > stop){      break;  }
371                 }       
372         }
373         catch(exception& e) {
374                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
375                 exit(1);
376         }
377 }
378
379 /**************************************************************************************************/
380
381 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
382         try {
383                 
384                 int index = 0;
385                 for(int i=start-1;i<stop;i++){
386                         forwardMap[index++][qScores[i]] += weight;
387                 }
388                 
389         }
390         catch(exception& e) {
391                 m->errorOut(e, "QualityScores", "updateForwardMap");
392                 exit(1);
393         }
394 }
395
396 /**************************************************************************************************/
397
398 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
399         try {
400                 
401                 int index = 0;
402                 for(int i=stop-1;i>=start;i--){
403                         reverseMap[index++][qScores[i]] += weight;
404                 }
405                 
406         }       
407         catch(exception& e) {
408                 m->errorOut(e, "QualityScores", "updateForwardMap");
409                 exit(1);
410         }
411 }
412
413 /**************************************************************************************************/