]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
removed some couts from qualityscores.cpp
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 string qScoreString = m->getline(qFile);
48                 while(qFile.peek() != '>' && qFile.peek() != EOF){
49                         qScoreString += ' ' + m->getline(qFile);
50                 }
51                 
52                 istringstream qScoreStringStream(qScoreString);
53                 int count = 0;
54                 while(!qScoreStringStream.eof()){
55                         if (m->control_pressed) { break; }
56                         qScoreStringStream >> score;
57                         qScores.push_back(score);
58                         count++;
59                 }
60                 qScores.pop_back();
61                 
62 //              string scores = "";
63 //              
64 //              while(!qFile.eof()){    
65 //                      
66 //                      qFile >> seqName; 
67 //                      
68 //                      //get name
69 //                      if (seqName.length() != 0) { 
70 //                              seqName = seqName.substr(1);
71 //                              while (!qFile.eof())    {       
72 //                                      char c = qFile.get(); 
73 //                                      //gobble junk on line
74 //                                      if (c == 10 || c == 13){        break;  }
75 //                              } 
76 //                              m->gobble(qFile);
77 //                      }
78 //                      
79 //                      //get scores
80 //                      while(qFile){
81 //                              char letter=qFile.get();
82 //                              if((letter == '>')){    qFile.putback(letter);  break;  }
83 //                              else if (isprint(letter)) { scores += letter; }
84 //                      }
85 //                      m->gobble(qFile);
86 //                      
87 //                      break;
88 //              }
89 //              
90 //              //convert scores string to qScores
91 //              istringstream qScoreStringStream(scores);
92 //              
93 //              int score;
94 //              while(!qScoreStringStream.eof()){
95 //                      
96 //                      if (m->control_pressed) { break; }
97 //                      
98 //                      qScoreStringStream >> score;
99 //                      qScores.push_back(score);
100 //              }
101 //              
102 //              qScores.pop_back();
103
104                 seqLength = qScores.size();
105                 
106                 
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "QualityScores", "QualityScores");
110                 exit(1);
111         }                                                       
112         
113 }
114
115 /**************************************************************************************************/
116
117 string QualityScores::getName(){
118         
119         try {
120                 return seqName;
121         }
122         catch(exception& e) {
123                 m->errorOut(e, "QualityScores", "getName");
124                 exit(1);
125         }                                                       
126 }
127
128 /**************************************************************************************************/
129
130 void QualityScores::printQScores(ofstream& qFile){
131         try {
132                 
133                 double aveQScore = calculateAverage();
134                 
135                 qFile << '>' << seqName << '\t' << aveQScore << endl;
136                 
137                 for(int i=0;i<seqLength;i++){
138                         qFile << qScores[i] << ' ';
139                 }
140                 qFile << endl;
141                                 
142         }
143         catch(exception& e) {
144                 m->errorOut(e, "QualityScores", "printQScores");
145                 exit(1);
146         }                                                       
147 }
148
149 /**************************************************************************************************/
150
151 void QualityScores::trimQScores(int start, int end){
152         try {
153                 vector<int> hold;
154                         
155                 if(end == -1){          
156                         hold = vector<int>(qScores.begin()+start, qScores.end());
157                         qScores = hold;         
158                 }
159                 if(start == -1){
160                         if(qScores.size() > end){
161                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
162                                 qScores = hold;         
163                         }
164                 }
165
166                 seqLength = qScores.size();
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "QualityScores", "trimQScores");
170                 exit(1);
171         }                                                       
172 }
173
174 /**************************************************************************************************/
175
176 void QualityScores::flipQScores(){
177         try {
178                 
179                 vector<int> temp = qScores;
180                 for(int i=0;i<seqLength;i++){
181                         qScores[seqLength - i - 1] = temp[i];
182                 }
183                 
184         }
185         catch(exception& e) {
186                 m->errorOut(e, "QualityScores", "flipQScores");
187                 exit(1);
188         }                                                       
189 }
190
191 /**************************************************************************************************/
192
193 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
194         try {
195                 string rawSequence = sequence.getUnaligned();
196                 int seqLength = sequence.getNumBases();
197                 
198                 if(seqName != sequence.getName()){
199                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
200                         m->mothurOutEndLine();  
201                 }
202                 
203                 int end;
204                 for(int i=0;i<seqLength;i++){
205                         end = i;
206                         if(qScores[i] < qThreshold){
207                                 break;
208                         }
209                 }
210                 
211                 //every score passed
212                 if (end == (seqLength-1)) { end = seqLength; }
213                 
214                 sequence.setUnaligned(rawSequence.substr(0,end));
215                 trimQScores(-1, end);
216                 
217                 return 1;
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "QualityScores", "flipQScores");
221                 exit(1);
222         }                                                       
223         
224 }
225
226 /**************************************************************************************************/
227
228 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
229         try {
230                 string rawSequence = sequence.getUnaligned();
231                 int seqLength = sequence.getNumBases();
232                 
233                 if(seqName != sequence.getName()){
234                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
235                         m->mothurOutEndLine();  
236                 }
237                 
238                 int end = -1;
239                 double rollingSum = 0.0000;
240                 
241                 for(int i=0;i<seqLength;i++){
242
243                         rollingSum += (double)qScores[i];
244                         
245                         if(rollingSum / (double)(i+1) < qThreshold){
246                                 end = i;
247                                 break;
248                         }
249                 }
250                 
251                 if(end == -1){  end = seqLength;        }
252                 
253                 
254                 sequence.setUnaligned(rawSequence.substr(0,end));
255                 trimQScores(-1, end);
256                 
257                 
258                 return 1;
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "QualityScores", "flipQScores");
262                 exit(1);
263         }                                                       
264         
265 }
266
267 /**************************************************************************************************/
268
269 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
270         try {
271                 string rawSequence = sequence.getUnaligned();
272                 int seqLength = sequence.getNumBases();
273                 
274                 if(seqName != sequence.getName()){
275                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
276                         m->mothurOutEndLine();
277                 }
278                 
279                 int end = windowSize;
280                 int start = 0;
281                 
282                 if(seqLength < windowSize) {    return 0;       }
283                         
284                 while(start < seqLength){
285                         double windowSum = 0.0000;
286
287                         for(int i=start;i<end;i++){
288                                 windowSum += qScores[i];
289                         }
290                         double windowAverage = windowSum / (double)(end-start);
291                         
292                         if(windowAverage < qThreshold){
293                                 end = end - stepSize;
294                                 break;
295                         }
296                         start += stepSize;
297                         end = start + windowSize;
298                         if(end >= seqLength){   end = seqLength - 1;    }
299                 }
300                 
301                 if(end == -1){  end = seqLength;        }
302                 
303                 sequence.setUnaligned(rawSequence.substr(0,end));
304                 trimQScores(-1, end);
305                 
306                 return 1;
307         }
308         catch(exception& e) {
309                 m->errorOut(e, "QualityScores", "flipQScores");
310                 exit(1);
311         }                                                       
312         
313 }
314
315 /**************************************************************************************************/
316
317 double QualityScores::calculateAverage(){
318         
319         double aveQScore = 0.0000;
320         
321         for(int i=0;i<seqLength;i++){
322                 aveQScore += (double) qScores[i];
323         }
324         aveQScore /= (double) seqLength;
325         
326         return aveQScore;
327 }
328
329 /**************************************************************************************************/
330
331 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
332         try {
333                 string rawSequence = sequence.getUnaligned();
334                 bool success = 0;       //guilty until proven innocent
335                 
336                 if(seqName != sequence.getName())       {
337                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
338                         m->mothurOutEndLine();  
339                 } 
340                         
341                 double aveQScore = calculateAverage();
342                 
343                 if(aveQScore >= qAverage)       {       success = 1;    }
344                 else                                            {       success = 0;    }
345                 
346                 return success;
347         }
348         catch(exception& e) {
349                 m->errorOut(e, "QualityScores", "cullQualAverage");
350                 exit(1);
351         }
352 }
353
354 /**************************************************************************************************/
355
356 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
357         try {
358
359                 int seqLength = errorSeq.size();
360                 
361                 int qIndex = start - 1;
362
363                 for(int i=0;i<seqLength;i++){
364                         
365                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
366                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
367                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
368                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
369                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
370
371                         if(errorSeq[i] != 'd')          {       qIndex++;       }
372                         if(qIndex > stop){      break;  }
373                 }       
374         }
375         catch(exception& e) {
376                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
377                 exit(1);
378         }
379 }
380
381 /**************************************************************************************************/
382
383 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
384         try {
385                 
386                 int index = 0;
387                 for(int i=start-1;i<stop;i++){
388                         forwardMap[index++][qScores[i]] += weight;
389                 }
390                 
391         }
392         catch(exception& e) {
393                 m->errorOut(e, "QualityScores", "updateForwardMap");
394                 exit(1);
395         }
396 }
397
398 /**************************************************************************************************/
399
400 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
401         try {
402                 
403                 int index = 0;
404                 for(int i=stop-1;i>=start;i--){
405                         reverseMap[index++][qScores[i]] += weight;
406                 }
407                 
408         }       
409         catch(exception& e) {
410                 m->errorOut(e, "QualityScores", "updateForwardMap");
411                 exit(1);
412         }
413 }
414
415 /**************************************************************************************************/