]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33
34                 int score;
35                 seqName = getSequenceName(qFile);
36                 
37         if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n.");  }
38         
39                 if (!m->control_pressed) {
40             string qScoreString = m->getline(qFile);
41             
42             if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n.");  }
43             
44             while(qFile.peek() != '>' && qFile.peek() != EOF){
45                 if (m->control_pressed) { break; }
46                 string temp = m->getline(qFile);
47                 if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n.");  }
48                 qScoreString +=  ' ' + temp;
49             }
50             //cout << "done reading " << endl;  
51             istringstream qScoreStringStream(qScoreString);
52             int count = 0;
53             while(!qScoreStringStream.eof()){
54                 if (m->control_pressed) { break; }
55                 string temp;
56                 qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
57                 
58                 if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n.");  }
59                 
60                 //check temp to make sure its a number
61                 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
62                 convert(temp, score);
63                 
64                 //cout << count << '\t' << score << endl;
65                 qScores.push_back(score);
66                 count++;
67             }
68         }
69                 
70                 seqLength = qScores.size();
71                 //cout << "seqlength = " << seqLength << '\t' << count << endl;
72                 
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "QualityScores", "QualityScores");
76                 exit(1);
77         }                                                       
78         
79 }
80 //********************************************************************************************************************
81 string QualityScores::getSequenceName(ifstream& qFile) {
82         try {
83                 string name = "";
84                 
85         qFile >> name;
86         m->getline(qFile);
87                 
88                 if (name.length() != 0) { 
89             
90                         name = name.substr(1); 
91             
92             for (int i = 0; i < name.length(); i++) {
93                 if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
94             }
95             
96         }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
97         
98                 return name;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "QualityScores", "getSequenceName");
102                 exit(1);
103         }
104 }
105 //********************************************************************************************************************
106 void QualityScores::setName(string name) {
107         try {
108       
109         for (int i = 0; i < name.length(); i++) {
110             if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
111         }     
112     
113         seqName = name;
114         }
115         catch(exception& e) {
116                 m->errorOut(e, "QualityScores", "setName");
117                 exit(1);
118         }
119 }
120 /**************************************************************************************************/
121
122 string QualityScores::getName(){
123         
124         try {
125                 return seqName;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "QualityScores", "getName");
129                 exit(1);
130         }                                                       
131 }
132
133 /**************************************************************************************************/
134
135 void QualityScores::printQScores(ofstream& qFile){
136         try {
137                 
138                 double aveQScore = calculateAverage();
139                 
140                 qFile << '>' << seqName << '\t' << aveQScore << endl;
141                 
142                 for(int i=0;i<seqLength;i++){
143                         qFile << qScores[i] << ' ';
144                 }
145                 qFile << endl;
146                                 
147         }
148         catch(exception& e) {
149                 m->errorOut(e, "QualityScores", "printQScores");
150                 exit(1);
151         }                                                       
152 }
153
154 /**************************************************************************************************/
155
156 void QualityScores::trimQScores(int start, int end){
157         try {
158                 vector<int> hold;
159                 
160
161                 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
162                 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
163                 if(end == -1){          
164                         hold = vector<int>(qScores.begin()+start, qScores.end());
165                         qScores = hold;         
166                 }
167                 if(start == -1){
168                         if(qScores.size() > end){
169                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
170                                 qScores = hold;         
171                         }
172                 }
173
174                 seqLength = qScores.size();
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "QualityScores", "trimQScores");
178                 exit(1);
179         }                                                       
180 }
181
182 /**************************************************************************************************/
183
184 void QualityScores::flipQScores(){
185         try {
186                 
187                 vector<int> temp = qScores;
188                 for(int i=0;i<seqLength;i++){
189                         qScores[seqLength - i - 1] = temp[i];
190                 }
191                 
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "QualityScores", "flipQScores");
195                 exit(1);
196         }                                                       
197 }
198
199 /**************************************************************************************************/
200
201 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
202         try {
203                 string rawSequence = sequence.getUnaligned();
204                 int seqLength = sequence.getNumBases();
205                 
206                 if(seqName != sequence.getName()){
207                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
208                         m->mothurOutEndLine();  
209                 }
210                 
211                 int end;
212                 for(int i=0;i<seqLength;i++){
213                         end = i;
214                         if(qScores[i] < qThreshold){
215                                 break;
216                         }
217                 }
218                 
219                 //every score passed
220                 if (end == (seqLength-1)) { end = seqLength; }
221                 
222                 sequence.setUnaligned(rawSequence.substr(0,end));
223                 trimQScores(-1, end);
224                 
225                 return 1;
226         }
227         catch(exception& e) {
228                 m->errorOut(e, "QualityScores", "flipQScores");
229                 exit(1);
230         }                                                       
231         
232 }
233
234 /**************************************************************************************************/
235
236 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
237         try {
238                 string rawSequence = sequence.getUnaligned();
239                 int seqLength = sequence.getNumBases();
240                 
241                 if(seqName != sequence.getName()){
242                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
243                         m->mothurOutEndLine();  
244                 }
245                 
246                 int end = -1;
247                 double rollingSum = 0.0000;
248                 
249                 for(int i=0;i<seqLength;i++){
250
251                         rollingSum += (double)qScores[i];
252                         
253                         if(rollingSum / (double)(i+1) < qThreshold){
254                                 end = i;
255                                 break;
256                         }
257                 }
258                 
259                 if(end == -1){  end = seqLength;        }
260                 
261                 
262                 sequence.setUnaligned(rawSequence.substr(0,end));
263                 trimQScores(-1, end);
264                 
265                 
266                 return 1;
267         }
268         catch(exception& e) {
269                 m->errorOut(e, "QualityScores", "flipQScores");
270                 exit(1);
271         }                                                       
272         
273 }
274
275 /**************************************************************************************************/
276
277 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
278         try {
279                 string rawSequence = sequence.getUnaligned();
280                 int seqLength = sequence.getNumBases();
281                 
282                 if(seqName != sequence.getName()){
283                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
284                         m->mothurOutEndLine();
285                 }
286                 
287                 int end = windowSize;
288                 int start = 0;
289
290                 if(seqLength < windowSize) {    return 0;       }
291                         
292                 while((start+windowSize) < seqLength){
293                         double windowSum = 0.0000;
294
295                         for(int i=start;i<end;i++){
296                                 windowSum += qScores[i];
297                         }
298                         double windowAverage = windowSum / (double)(end-start);
299                                 
300                         if(windowAverage < qThreshold){
301                                 end = end - stepSize;
302                                 break;
303                         }
304                         
305                         start += stepSize;
306                         end = start + windowSize;
307                                 
308                         if(end >= seqLength){   end = seqLength;        }
309                                 
310                 }
311         
312                 if(end == -1){  end = seqLength;        }
313                 
314                 //failed first window
315                 if (end < windowSize) { return 0; }
316                         
317                 sequence.setUnaligned(rawSequence.substr(0,end));
318                 trimQScores(-1, end);
319                 
320                 return 1;
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
324                 exit(1);
325         }                                                       
326         
327 }
328
329 /**************************************************************************************************/
330
331 double QualityScores::calculateAverage(){
332         
333         double aveQScore = 0.0000;
334         
335         for(int i=0;i<seqLength;i++){
336                 aveQScore += (double) qScores[i];
337         }
338         aveQScore /= (double) seqLength;
339         
340         return aveQScore;
341 }
342
343 /**************************************************************************************************/
344
345 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
346         try {
347                 string rawSequence = sequence.getUnaligned();
348                 bool success = 0;       //guilty until proven innocent
349                 
350                 if(seqName != sequence.getName())       {
351                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
352                         m->mothurOutEndLine();  
353                 } 
354                         
355                 double aveQScore = calculateAverage();
356                 
357                 if(aveQScore >= qAverage)       {       success = 1;    }
358                 else                                            {       success = 0;    }
359                 
360                 return success;
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "QualityScores", "cullQualAverage");
364                 exit(1);
365         }
366 }
367
368 /**************************************************************************************************/
369
370 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
371         try {
372
373                 int seqLength = errorSeq.size();
374                 
375                 int qIndex = start - 1;
376
377                 for(int i=0;i<seqLength;i++){
378                         
379                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
380                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
381                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
382                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   /*if(qScores[qIndex] != 0){     cout << qIndex << '\t';         }*/     }
383                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
384
385                         if(errorSeq[i] != 'd')          {       qIndex++;       }
386                                                 
387                         if(qIndex > stop){      break;  }
388                 }       
389         }
390         catch(exception& e) {
391                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
392                 exit(1);
393         }
394 }
395
396 /**************************************************************************************************/
397
398 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
399         try {
400                 
401                 int index = 0;
402                 for(int i=start-1;i<stop;i++){
403                         forwardMap[index++][qScores[i]] += weight;
404                 }
405                 
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "QualityScores", "updateForwardMap");
409                 exit(1);
410         }
411 }
412
413 /**************************************************************************************************/
414
415 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
416         try {
417                 
418                 int index = 0;
419                 for(int i=stop-1;i>=start-1;i--){
420                         reverseMap[index++][qScores[i]] += weight;
421                 }
422                 
423         }       
424         catch(exception& e) {
425                 m->errorOut(e, "QualityScores", "updateReverseMap");
426                 exit(1);
427         }
428 }
429
430 /**************************************************************************************************/