]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26 /**************************************************************************************************/
27
28 QualityScores::QualityScores(string n, vector<int> s){
29         try {
30                 m = MothurOut::getInstance();
31                 setName(n);
32         setScores(s);
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "QualityScores", "QualityScores");
36                 exit(1);
37         }
38 }
39 /**************************************************************************************************/
40
41 QualityScores::QualityScores(ifstream& qFile){
42         try {
43                 
44                 m = MothurOut::getInstance();
45
46                 int score;
47                 seqName = getSequenceName(qFile); m->gobble(qFile);
48                 
49         if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n.");  }
50         
51                 if (!m->control_pressed) {
52             string qScoreString = m->getline(qFile); m->gobble(qFile);
53             
54             if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n.");  }
55             
56             while(qFile.peek() != '>' && qFile.peek() != EOF){
57                 if (m->control_pressed) { break; }
58                 string temp = m->getline(qFile); m->gobble(qFile);
59                 //if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n.");  }
60                 qScoreString +=  ' ' + temp;
61             }
62             //cout << "done reading " << endl;
63             istringstream qScoreStringStream(qScoreString);
64             int count = 0;
65             while(!qScoreStringStream.eof()){
66                 if (m->control_pressed) { break; }
67                 string temp;
68                 qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
69                 
70                 //if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n.");  }
71                 
72                 //check temp to make sure its a number
73                 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
74                 convert(temp, score);
75                 
76                 //cout << count << '\t' << score << endl;
77                 qScores.push_back(score);
78                 count++;
79             }
80         }
81                 
82                 seqLength = qScores.size();
83                 //cout << "seqlength = " << seqLength  << endl;
84                 
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "QualityScores", "QualityScores");
88                 exit(1);
89         }                                                       
90         
91 }
92 //********************************************************************************************************************
93 string QualityScores::getSequenceName(ifstream& qFile) {
94         try {
95                 string name = "";
96                 
97         qFile >> name;
98         m->getline(qFile);
99                 
100                 if (name.length() != 0) { 
101             
102                         name = name.substr(1); 
103             
104             m->checkName(name);
105             
106         }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
107         
108                 return name;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "QualityScores", "getSequenceName");
112                 exit(1);
113         }
114 }
115 //********************************************************************************************************************
116 void QualityScores::setName(string name) {
117         try {
118       
119         m->checkName(name);   
120         seqName = name;
121         }
122         catch(exception& e) {
123                 m->errorOut(e, "QualityScores", "setName");
124                 exit(1);
125         }
126 }
127 /**************************************************************************************************/
128
129 string QualityScores::getName(){
130         
131         try {
132                 return seqName;
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "QualityScores", "getName");
136                 exit(1);
137         }                                                       
138 }
139
140 /**************************************************************************************************/
141
142 void QualityScores::printQScores(ofstream& qFile){
143         try {
144                 
145                 double aveQScore = calculateAverage(false);
146                 
147                 qFile << '>' << seqName << '\t' << aveQScore << endl;
148                 
149                 for(int i=0;i<seqLength;i++){
150                         qFile << qScores[i] << ' ';
151                 }
152                 qFile << endl;
153                                 
154         }
155         catch(exception& e) {
156                 m->errorOut(e, "QualityScores", "printQScores");
157                 exit(1);
158         }                                                       
159 }
160
161 /**************************************************************************************************/
162
163 void QualityScores::trimQScores(int start, int end){
164         try {
165                 vector<int> hold;
166                 
167
168                 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
169                 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
170                 if(end == -1){          
171                         hold = vector<int>(qScores.begin()+start, qScores.end());
172                         qScores = hold;         
173                 }
174                 if(start == -1){
175                         if(qScores.size() > end){
176                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
177                                 qScores = hold;         
178                         }
179                 }
180
181                 seqLength = qScores.size();
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "QualityScores", "trimQScores");
185                 exit(1);
186         }                                                       
187 }
188
189 /**************************************************************************************************/
190
191 void QualityScores::flipQScores(){
192         try {
193                 
194                 vector<int> temp = qScores;
195                 for(int i=0;i<seqLength;i++){
196                         qScores[seqLength - i - 1] = temp[i];
197                 }
198                 
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "QualityScores", "flipQScores");
202                 exit(1);
203         }                                                       
204 }
205
206 /**************************************************************************************************/
207
208 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
209         try {
210                 string rawSequence = sequence.getUnaligned();
211                 int seqLength = sequence.getNumBases();
212                 
213                 if(seqName != sequence.getName()){
214                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
215                         m->mothurOutEndLine();  m->control_pressed = true;
216                 }
217                 
218                 int end;
219                 for(int i=0;i<seqLength;i++){
220                         end = i;
221                         if(qScores[i] < qThreshold){
222                                 break;
223                         }
224                 }
225                 
226                 //every score passed
227                 if (end == (seqLength-1)) { end = seqLength; }
228                 
229                 sequence.setUnaligned(rawSequence.substr(0,end));
230                 trimQScores(-1, end);
231                 
232                 return 1;
233         }
234         catch(exception& e) {
235                 m->errorOut(e, "QualityScores", "flipQScores");
236                 exit(1);
237         }                                                       
238         
239 }
240
241 /**************************************************************************************************/
242
243 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
244         try {
245                 string rawSequence = sequence.getUnaligned();
246                 int seqLength = sequence.getNumBases();
247                 
248                 if(seqName != sequence.getName()){
249                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
250                         m->mothurOutEndLine();  
251                 }
252                 
253                 int end = -1;
254                 double rollingSum = 0.0000;
255         double value = 0.0;
256                 
257                 for(int i=0;i<seqLength;i++){
258             
259             if (logTransform)   {
260                 rollingSum += (double)pow(10.0, qScores[i]);
261                 value = log10(rollingSum / (double)(i+1));
262                 
263             } //Sum 10^Q
264             else                {
265                 rollingSum += (double)qScores[i];
266                 value = rollingSum / (double)(i+1);
267             }
268                         
269                         
270                         if(value < qThreshold){
271                                 end = i;
272                                 break;
273                         }
274                 }
275                 
276                 if(end == -1){  end = seqLength;        }
277                 
278                 
279                 sequence.setUnaligned(rawSequence.substr(0,end));
280                 trimQScores(-1, end);
281                 
282                 
283                 return 1;
284         }
285         catch(exception& e) {
286                 m->errorOut(e, "QualityScores", "flipQScores");
287                 exit(1);
288         }                                                       
289         
290 }
291
292 /**************************************************************************************************/
293
294 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
295         try {
296                 string rawSequence = sequence.getUnaligned();
297                 int seqLength = sequence.getNumBases();
298                 
299                 if(seqName != sequence.getName()){
300                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
301                         m->mothurOutEndLine();
302                 }
303                 
304                 int end = windowSize;
305                 int start = 0;
306
307                 if(seqLength < windowSize) {    return 0;       }
308                         
309                 while((start+windowSize) < seqLength){
310                         double windowSum = 0.0000;
311
312                         for(int i=start;i<end;i++){
313                 if (logTransform)   {  windowSum += pow(10.0, qScores[i]);  }
314                 else                {  windowSum += qScores[i];             }
315                         }
316                         double windowAverage = 0.0;
317             if (logTransform)   { windowAverage = log10(windowSum / (double)(end-start)); }
318             else                { windowAverage = windowSum / (double)(end-start);      }
319                                 
320                         if(windowAverage < qThreshold){
321                                 end = end - stepSize;
322                                 break;
323                         }
324                         
325                         start += stepSize;
326                         end = start + windowSize;
327                                 
328                         if(end >= seqLength){   end = seqLength;        }
329                                 
330                 }
331         
332                 if(end == -1){  end = seqLength;        }
333                 
334                 //failed first window
335                 if (end < windowSize) { return 0; }
336                         
337                 sequence.setUnaligned(rawSequence.substr(0,end));
338                 trimQScores(-1, end);
339                 
340                 return 1;
341         }
342         catch(exception& e) {
343                 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
344                 exit(1);
345         }                                                       
346         
347 }
348
349 /**************************************************************************************************/
350
351 double QualityScores::calculateAverage(bool logTransform){
352         
353         double aveQScore = 0.0000;
354         
355         for(int i=0;i<seqLength;i++){
356         if (logTransform)   {  aveQScore += pow(10.0, qScores[i]);  }
357         else                {  aveQScore += qScores[i];             }
358         }
359     
360     if (logTransform)   {  aveQScore = log10(aveQScore /(double) seqLength);    }
361     else                {  aveQScore /= (double) seqLength;                     }
362         
363         return aveQScore;
364 }
365
366 /**************************************************************************************************/
367
368 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
369         try {
370                 string rawSequence = sequence.getUnaligned();
371                 bool success = 0;       //guilty until proven innocent
372                 
373                 if(seqName != sequence.getName())       {
374                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
375                         m->mothurOutEndLine();  
376                 } 
377                         
378                 double aveQScore = calculateAverage(logTransform);
379                 
380         if (m->debug) { m->mothurOut("[DEBUG]: " + sequence.getName() + " average = " + toString(aveQScore) + "\n"); }
381         
382                 if(aveQScore >= qAverage)       {       success = 1;    }
383                 else                                            {       success = 0;    }
384                 
385                 return success;
386         }
387         catch(exception& e) {
388                 m->errorOut(e, "QualityScores", "cullQualAverage");
389                 exit(1);
390         }
391 }
392
393 /**************************************************************************************************/
394
395 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
396         try {
397
398                 int seqLength = errorSeq.size();
399                 
400                 int qIndex = start - 1;
401
402                 for(int i=0;i<seqLength;i++){
403                         
404                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
405                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
406                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
407                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   /*if(qScores[qIndex] != 0){     cout << qIndex << '\t';         }*/     }
408                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
409
410                         if(errorSeq[i] != 'd')          {       qIndex++;       }
411                                                 
412                         if(qIndex > stop){      break;  }
413                 }       
414         }
415         catch(exception& e) {
416                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
417                 exit(1);
418         }
419 }
420
421 /**************************************************************************************************/
422
423 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
424         try {
425                 
426                 int index = 0;
427                 for(int i=start-1;i<stop;i++){
428                         forwardMap[index++][qScores[i]] += weight;
429                 }
430                 
431         }
432         catch(exception& e) {
433                 m->errorOut(e, "QualityScores", "updateForwardMap");
434                 exit(1);
435         }
436 }
437
438 /**************************************************************************************************/
439
440 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
441         try {
442                 
443                 int index = 0;
444                 for(int i=stop-1;i>=start-1;i--){
445                         reverseMap[index++][qScores[i]] += weight;
446                 }
447                 
448         }       
449         catch(exception& e) {
450                 m->errorOut(e, "QualityScores", "updateReverseMap");
451                 exit(1);
452         }
453 }
454
455 /**************************************************************************************************/