]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
fixes while testing 1.33.0
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33
34                 int score;
35                 seqName = getSequenceName(qFile); m->gobble(qFile);
36                 
37         if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n.");  }
38         
39                 if (!m->control_pressed) {
40             string qScoreString = m->getline(qFile); m->gobble(qFile);
41             
42             if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n.");  }
43             
44             while(qFile.peek() != '>' && qFile.peek() != EOF){
45                 if (m->control_pressed) { break; }
46                 string temp = m->getline(qFile); m->gobble(qFile);
47                 //if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n.");  }
48                 qScoreString +=  ' ' + temp;
49             }
50             //cout << "done reading " << endl;
51             istringstream qScoreStringStream(qScoreString);
52             int count = 0;
53             while(!qScoreStringStream.eof()){
54                 if (m->control_pressed) { break; }
55                 string temp;
56                 qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
57                 
58                 //if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n.");  }
59                 
60                 //check temp to make sure its a number
61                 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
62                 convert(temp, score);
63                 
64                 //cout << count << '\t' << score << endl;
65                 qScores.push_back(score);
66                 count++;
67             }
68         }
69                 
70                 seqLength = qScores.size();
71                 //cout << "seqlength = " << seqLength  << endl;
72                 
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "QualityScores", "QualityScores");
76                 exit(1);
77         }                                                       
78         
79 }
80 //********************************************************************************************************************
81 string QualityScores::getSequenceName(ifstream& qFile) {
82         try {
83                 string name = "";
84                 
85         qFile >> name;
86         m->getline(qFile);
87                 
88                 if (name.length() != 0) { 
89             
90                         name = name.substr(1); 
91             
92             m->checkName(name);
93             
94         }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
95         
96                 return name;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "QualityScores", "getSequenceName");
100                 exit(1);
101         }
102 }
103 //********************************************************************************************************************
104 void QualityScores::setName(string name) {
105         try {
106       
107         m->checkName(name);   
108         seqName = name;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "QualityScores", "setName");
112                 exit(1);
113         }
114 }
115 /**************************************************************************************************/
116
117 string QualityScores::getName(){
118         
119         try {
120                 return seqName;
121         }
122         catch(exception& e) {
123                 m->errorOut(e, "QualityScores", "getName");
124                 exit(1);
125         }                                                       
126 }
127
128 /**************************************************************************************************/
129
130 void QualityScores::printQScores(ofstream& qFile){
131         try {
132                 
133                 double aveQScore = calculateAverage(false);
134                 
135                 qFile << '>' << seqName << '\t' << aveQScore << endl;
136                 
137                 for(int i=0;i<seqLength;i++){
138                         qFile << qScores[i] << ' ';
139                 }
140                 qFile << endl;
141                                 
142         }
143         catch(exception& e) {
144                 m->errorOut(e, "QualityScores", "printQScores");
145                 exit(1);
146         }                                                       
147 }
148
149 /**************************************************************************************************/
150
151 void QualityScores::trimQScores(int start, int end){
152         try {
153                 vector<int> hold;
154                 
155
156                 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
157                 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
158                 if(end == -1){          
159                         hold = vector<int>(qScores.begin()+start, qScores.end());
160                         qScores = hold;         
161                 }
162                 if(start == -1){
163                         if(qScores.size() > end){
164                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
165                                 qScores = hold;         
166                         }
167                 }
168
169                 seqLength = qScores.size();
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "QualityScores", "trimQScores");
173                 exit(1);
174         }                                                       
175 }
176
177 /**************************************************************************************************/
178
179 void QualityScores::flipQScores(){
180         try {
181                 
182                 vector<int> temp = qScores;
183                 for(int i=0;i<seqLength;i++){
184                         qScores[seqLength - i - 1] = temp[i];
185                 }
186                 
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "QualityScores", "flipQScores");
190                 exit(1);
191         }                                                       
192 }
193
194 /**************************************************************************************************/
195
196 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
197         try {
198                 string rawSequence = sequence.getUnaligned();
199                 int seqLength = sequence.getNumBases();
200                 
201                 if(seqName != sequence.getName()){
202                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
203                         m->mothurOutEndLine();  m->control_pressed = true;
204                 }
205                 
206                 int end;
207                 for(int i=0;i<seqLength;i++){
208                         end = i;
209                         if(qScores[i] < qThreshold){
210                                 break;
211                         }
212                 }
213                 
214                 //every score passed
215                 if (end == (seqLength-1)) { end = seqLength; }
216                 
217                 sequence.setUnaligned(rawSequence.substr(0,end));
218                 trimQScores(-1, end);
219                 
220                 return 1;
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "QualityScores", "flipQScores");
224                 exit(1);
225         }                                                       
226         
227 }
228
229 /**************************************************************************************************/
230
231 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
232         try {
233                 string rawSequence = sequence.getUnaligned();
234                 int seqLength = sequence.getNumBases();
235                 
236                 if(seqName != sequence.getName()){
237                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
238                         m->mothurOutEndLine();  
239                 }
240                 
241                 int end = -1;
242                 double rollingSum = 0.0000;
243         double value = 0.0;
244                 
245                 for(int i=0;i<seqLength;i++){
246             
247             if (logTransform)   {
248                 rollingSum += (double)pow(10.0, qScores[i]);
249                 value = log10(rollingSum / (double)(i+1));
250                 
251             } //Sum 10^Q
252             else                {
253                 rollingSum += (double)qScores[i];
254                 value = rollingSum / (double)(i+1);
255             }
256                         
257                         
258                         if(value < qThreshold){
259                                 end = i;
260                                 break;
261                         }
262                 }
263                 
264                 if(end == -1){  end = seqLength;        }
265                 
266                 
267                 sequence.setUnaligned(rawSequence.substr(0,end));
268                 trimQScores(-1, end);
269                 
270                 
271                 return 1;
272         }
273         catch(exception& e) {
274                 m->errorOut(e, "QualityScores", "flipQScores");
275                 exit(1);
276         }                                                       
277         
278 }
279
280 /**************************************************************************************************/
281
282 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
283         try {
284                 string rawSequence = sequence.getUnaligned();
285                 int seqLength = sequence.getNumBases();
286                 
287                 if(seqName != sequence.getName()){
288                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
289                         m->mothurOutEndLine();
290                 }
291                 
292                 int end = windowSize;
293                 int start = 0;
294
295                 if(seqLength < windowSize) {    return 0;       }
296                         
297                 while((start+windowSize) < seqLength){
298                         double windowSum = 0.0000;
299
300                         for(int i=start;i<end;i++){
301                 if (logTransform)   {  windowSum += pow(10.0, qScores[i]);  }
302                 else                {  windowSum += qScores[i];             }
303                         }
304                         double windowAverage = 0.0;
305             if (logTransform)   { windowAverage = log10(windowSum / (double)(end-start)); }
306             else                { windowAverage = windowSum / (double)(end-start);      }
307                                 
308                         if(windowAverage < qThreshold){
309                                 end = end - stepSize;
310                                 break;
311                         }
312                         
313                         start += stepSize;
314                         end = start + windowSize;
315                                 
316                         if(end >= seqLength){   end = seqLength;        }
317                                 
318                 }
319         
320                 if(end == -1){  end = seqLength;        }
321                 
322                 //failed first window
323                 if (end < windowSize) { return 0; }
324                         
325                 sequence.setUnaligned(rawSequence.substr(0,end));
326                 trimQScores(-1, end);
327                 
328                 return 1;
329         }
330         catch(exception& e) {
331                 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
332                 exit(1);
333         }                                                       
334         
335 }
336
337 /**************************************************************************************************/
338
339 double QualityScores::calculateAverage(bool logTransform){
340         
341         double aveQScore = 0.0000;
342         
343         for(int i=0;i<seqLength;i++){
344         if (logTransform)   {  aveQScore += pow(10.0, qScores[i]);  }
345         else                {  aveQScore += qScores[i];             }
346         }
347         aveQScore /= (double) seqLength;
348     
349     if (logTransform)   {  aveQScore = log10(aveQScore /(double) seqLength);  }
350     else                {  aveQScore /= (double) seqLength;                 }
351         
352         return aveQScore;
353 }
354
355 /**************************************************************************************************/
356
357 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
358         try {
359                 string rawSequence = sequence.getUnaligned();
360                 bool success = 0;       //guilty until proven innocent
361                 
362                 if(seqName != sequence.getName())       {
363                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
364                         m->mothurOutEndLine();  
365                 } 
366                         
367                 double aveQScore = calculateAverage(logTransform);
368                 
369                 if(aveQScore >= qAverage)       {       success = 1;    }
370                 else                                            {       success = 0;    }
371                 
372                 return success;
373         }
374         catch(exception& e) {
375                 m->errorOut(e, "QualityScores", "cullQualAverage");
376                 exit(1);
377         }
378 }
379
380 /**************************************************************************************************/
381
382 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
383         try {
384
385                 int seqLength = errorSeq.size();
386                 
387                 int qIndex = start - 1;
388
389                 for(int i=0;i<seqLength;i++){
390                         
391                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
392                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
393                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
394                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   /*if(qScores[qIndex] != 0){     cout << qIndex << '\t';         }*/     }
395                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
396
397                         if(errorSeq[i] != 'd')          {       qIndex++;       }
398                                                 
399                         if(qIndex > stop){      break;  }
400                 }       
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
404                 exit(1);
405         }
406 }
407
408 /**************************************************************************************************/
409
410 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
411         try {
412                 
413                 int index = 0;
414                 for(int i=start-1;i<stop;i++){
415                         forwardMap[index++][qScores[i]] += weight;
416                 }
417                 
418         }
419         catch(exception& e) {
420                 m->errorOut(e, "QualityScores", "updateForwardMap");
421                 exit(1);
422         }
423 }
424
425 /**************************************************************************************************/
426
427 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
428         try {
429                 
430                 int index = 0;
431                 for(int i=stop-1;i>=start-1;i--){
432                         reverseMap[index++][qScores[i]] += weight;
433                 }
434                 
435         }       
436         catch(exception& e) {
437                 m->errorOut(e, "QualityScores", "updateReverseMap");
438                 exit(1);
439         }
440 }
441
442 /**************************************************************************************************/