]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
Changes to seq.error so that an alignment is not necessary
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 //cout << seqName << endl;      
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 
48                 string qScoreString = m->getline(qFile);
49                 //cout << qScoreString << endl;
50                 while(qFile.peek() != '>' && qFile.peek() != EOF){
51                         if (m->control_pressed) { break; }
52                         string temp = m->getline(qFile);
53                         //cout << temp << endl;
54                         qScoreString +=  ' ' + temp;
55                 }
56                 //cout << "done reading " << endl;      
57                 istringstream qScoreStringStream(qScoreString);
58                 int count = 0;
59                 while(!qScoreStringStream.eof()){
60                         if (m->control_pressed) { break; }
61                         string temp;
62                         qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
63                         
64                         //check temp to make sure its a number
65                         if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
66                         convert(temp, score);
67                         
68                         //cout << count << '\t' << score << endl;
69                         qScores.push_back(score);
70                         count++;
71                 }
72                 //qScores.pop_back();
73                 
74 //              string scores = "";
75 //              
76 //              while(!qFile.eof()){    
77 //                      
78 //                      qFile >> seqName; 
79 //                      
80 //                      //get name
81 //                      if (seqName.length() != 0) { 
82 //                              seqName = seqName.substr(1);
83 //                              while (!qFile.eof())    {       
84 //                                      char c = qFile.get(); 
85 //                                      //gobble junk on line
86 //                                      if (c == 10 || c == 13){        break;  }
87 //                              } 
88 //                              m->gobble(qFile);
89 //                      }
90 //                      
91 //                      //get scores
92 //                      while(qFile){
93 //                              char letter=qFile.get();
94 //                              if((letter == '>')){    qFile.putback(letter);  break;  }
95 //                              else if (isprint(letter)) { scores += letter; }
96 //                      }
97 //                      m->gobble(qFile);
98 //                      
99 //                      break;
100 //              }
101 //              
102 //              //convert scores string to qScores
103 //              istringstream qScoreStringStream(scores);
104 //              
105 //              int score;
106 //              while(!qScoreStringStream.eof()){
107 //                      
108 //                      if (m->control_pressed) { break; }
109 //                      
110 //                      qScoreStringStream >> score;
111 //                      qScores.push_back(score);
112 //              }
113 //              
114 //              qScores.pop_back();
115
116                 seqLength = qScores.size();
117                 //cout << "seqlength = " << seqLength << '\t' << count << endl;
118                 
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "QualityScores", "QualityScores");
122                 exit(1);
123         }                                                       
124         
125 }
126
127 /**************************************************************************************************/
128
129 string QualityScores::getName(){
130         
131         try {
132                 return seqName;
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "QualityScores", "getName");
136                 exit(1);
137         }                                                       
138 }
139
140 /**************************************************************************************************/
141
142 void QualityScores::printQScores(ofstream& qFile){
143         try {
144                 
145                 double aveQScore = calculateAverage();
146                 
147                 qFile << '>' << seqName << '\t' << aveQScore << endl;
148                 
149                 for(int i=0;i<seqLength;i++){
150                         qFile << qScores[i] << ' ';
151                 }
152                 qFile << endl;
153                                 
154         }
155         catch(exception& e) {
156                 m->errorOut(e, "QualityScores", "printQScores");
157                 exit(1);
158         }                                                       
159 }
160
161 /**************************************************************************************************/
162
163 void QualityScores::trimQScores(int start, int end){
164         try {
165                 vector<int> hold;
166                 
167
168                 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
169                 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
170                 if(end == -1){          
171                         hold = vector<int>(qScores.begin()+start, qScores.end());
172                         qScores = hold;         
173                 }
174                 if(start == -1){
175                         if(qScores.size() > end){
176                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
177                                 qScores = hold;         
178                         }
179                 }
180
181                 seqLength = qScores.size();
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "QualityScores", "trimQScores");
185                 exit(1);
186         }                                                       
187 }
188
189 /**************************************************************************************************/
190
191 void QualityScores::flipQScores(){
192         try {
193                 
194                 vector<int> temp = qScores;
195                 for(int i=0;i<seqLength;i++){
196                         qScores[seqLength - i - 1] = temp[i];
197                 }
198                 
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "QualityScores", "flipQScores");
202                 exit(1);
203         }                                                       
204 }
205
206 /**************************************************************************************************/
207
208 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
209         try {
210                 string rawSequence = sequence.getUnaligned();
211                 int seqLength = sequence.getNumBases();
212                 
213                 if(seqName != sequence.getName()){
214                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
215                         m->mothurOutEndLine();  
216                 }
217                 
218                 int end;
219                 for(int i=0;i<seqLength;i++){
220                         end = i;
221                         if(qScores[i] < qThreshold){
222                                 break;
223                         }
224                 }
225                 
226                 //every score passed
227                 if (end == (seqLength-1)) { end = seqLength; }
228                 
229                 sequence.setUnaligned(rawSequence.substr(0,end));
230                 trimQScores(-1, end);
231                 
232                 return 1;
233         }
234         catch(exception& e) {
235                 m->errorOut(e, "QualityScores", "flipQScores");
236                 exit(1);
237         }                                                       
238         
239 }
240
241 /**************************************************************************************************/
242
243 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
244         try {
245                 string rawSequence = sequence.getUnaligned();
246                 int seqLength = sequence.getNumBases();
247                 
248                 if(seqName != sequence.getName()){
249                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
250                         m->mothurOutEndLine();  
251                 }
252                 
253                 int end = -1;
254                 double rollingSum = 0.0000;
255                 
256                 for(int i=0;i<seqLength;i++){
257
258                         rollingSum += (double)qScores[i];
259                         
260                         if(rollingSum / (double)(i+1) < qThreshold){
261                                 end = i;
262                                 break;
263                         }
264                 }
265                 
266                 if(end == -1){  end = seqLength;        }
267                 
268                 
269                 sequence.setUnaligned(rawSequence.substr(0,end));
270                 trimQScores(-1, end);
271                 
272                 
273                 return 1;
274         }
275         catch(exception& e) {
276                 m->errorOut(e, "QualityScores", "flipQScores");
277                 exit(1);
278         }                                                       
279         
280 }
281
282 /**************************************************************************************************/
283
284 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
285         try {
286                 string rawSequence = sequence.getUnaligned();
287                 int seqLength = sequence.getNumBases();
288                 
289                 if(seqName != sequence.getName()){
290                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
291                         m->mothurOutEndLine();
292                 }
293                 
294                 int end = windowSize;
295                 int start = 0;
296
297                 if(seqLength < windowSize) {    return 0;       }
298                         
299                 while((start+windowSize) < seqLength){
300                         double windowSum = 0.0000;
301
302                         for(int i=start;i<end;i++){
303                                 windowSum += qScores[i];
304                         }
305                         double windowAverage = windowSum / (double)(end-start);
306                                 
307                         if(windowAverage < qThreshold){
308                                 end = end - stepSize;
309                                 break;
310                         }
311                         
312                         start += stepSize;
313                         end = start + windowSize;
314                                 
315                         if(end >= seqLength){   end = seqLength;        }
316                                 
317                 }
318         
319                 if(end == -1){  end = seqLength;        }
320                 
321                 //failed first window
322                 if (end < windowSize) { return 0; }
323                         
324                 sequence.setUnaligned(rawSequence.substr(0,end));
325                 trimQScores(-1, end);
326                 
327                 return 1;
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
331                 exit(1);
332         }                                                       
333         
334 }
335
336 /**************************************************************************************************/
337
338 double QualityScores::calculateAverage(){
339         
340         double aveQScore = 0.0000;
341         
342         for(int i=0;i<seqLength;i++){
343                 aveQScore += (double) qScores[i];
344         }
345         aveQScore /= (double) seqLength;
346         
347         return aveQScore;
348 }
349
350 /**************************************************************************************************/
351
352 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
353         try {
354                 string rawSequence = sequence.getUnaligned();
355                 bool success = 0;       //guilty until proven innocent
356                 
357                 if(seqName != sequence.getName())       {
358                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
359                         m->mothurOutEndLine();  
360                 } 
361                         
362                 double aveQScore = calculateAverage();
363                 
364                 if(aveQScore >= qAverage)       {       success = 1;    }
365                 else                                            {       success = 0;    }
366                 
367                 return success;
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "QualityScores", "cullQualAverage");
371                 exit(1);
372         }
373 }
374
375 /**************************************************************************************************/
376
377 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
378         try {
379
380                 int seqLength = errorSeq.size();
381                 
382                 int qIndex = start - 1;
383
384                 for(int i=0;i<seqLength;i++){
385                         
386                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
387                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
388                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
389                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   /*if(qScores[qIndex] != 0){     cout << qIndex << '\t';         }*/     }
390                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
391
392                         if(errorSeq[i] != 'd')          {       qIndex++;       }
393                                                 
394                         if(qIndex > stop){      break;  }
395                 }       
396         }
397         catch(exception& e) {
398                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
399                 exit(1);
400         }
401 }
402
403 /**************************************************************************************************/
404
405 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
406         try {
407                 
408                 int index = 0;
409                 for(int i=start-1;i<stop;i++){
410                         forwardMap[index++][qScores[i]] += weight;
411                 }
412                 
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "QualityScores", "updateForwardMap");
416                 exit(1);
417         }
418 }
419
420 /**************************************************************************************************/
421
422 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
423         try {
424                 
425                 int index = 0;
426                 for(int i=stop-1;i>=start-1;i--){
427                         reverseMap[index++][qScores[i]] += weight;
428                 }
429                 
430         }       
431         catch(exception& e) {
432                 m->errorOut(e, "QualityScores", "updateReverseMap");
433                 exit(1);
434         }
435 }
436
437 /**************************************************************************************************/