]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
modified reportfile class
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33                 
34                 seqName = "";
35                 int score;
36                 
37                 qFile >> seqName; 
38                 m->getline(qFile);
39                 
40                 if (seqName == "")      {
41                         m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42                         m->mothurOutEndLine(); 
43                 }
44                 else{
45                         seqName = seqName.substr(1);
46                 }
47                 
48                 string qScoreString = m->getline(qFile);
49                 
50                 istringstream qScoreStringStream(qScoreString);
51                 while(!qScoreStringStream.eof()){
52                         qScoreStringStream >> score;
53                         qScores.push_back(score);
54                 }
55                 qScores.pop_back();
56                 seqLength = qScores.size();
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "QualityScores", "QualityScores");
60                 exit(1);
61         }                                                       
62         
63 }
64
65 /**************************************************************************************************/
66
67 string QualityScores::getName(){
68         
69         try {
70                 return seqName;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "QualityScores", "getName");
74                 exit(1);
75         }                                                       
76 }
77
78 /**************************************************************************************************/
79
80 void QualityScores::printQScores(ofstream& qFile){
81         try {
82                 
83                 double aveQScore = calculateAverage();
84                 
85                 qFile << '>' << seqName << '\t' << aveQScore << endl;
86                 
87                 for(int i=0;i<seqLength;i++){
88                         qFile << qScores[i] << ' ';
89                 }
90                 qFile << endl;
91                                 
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "QualityScores", "printQScores");
95                 exit(1);
96         }                                                       
97 }
98
99 /**************************************************************************************************/
100
101 void QualityScores::trimQScores(int start, int end){
102         try {
103                 vector<int> hold;
104                 
105                 if(end == -1){          
106                         hold = vector<int>(qScores.begin()+start, qScores.end());
107                         qScores = hold;         
108                 }
109                 if(start == -1){
110                         if(qScores.size() > end){
111                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
112                                 qScores = hold;         
113                         }
114                 }
115
116                 seqLength = qScores.size();
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "QualityScores", "trimQScores");
120                 exit(1);
121         }                                                       
122 }
123
124 /**************************************************************************************************/
125
126 void QualityScores::flipQScores(){
127         try {
128                 
129                 vector<int> temp = qScores;
130                 for(int i=0;i<seqLength;i++){
131                         qScores[seqLength - i - 1] = temp[i];
132                 }
133                 
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "QualityScores", "flipQScores");
137                 exit(1);
138         }                                                       
139 }
140
141 /**************************************************************************************************/
142
143 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
144         try {
145                 string rawSequence = sequence.getUnaligned();
146                 int seqLength = sequence.getNumBases();
147                 
148                 if(seqName != sequence.getName()){
149                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
150                         m->mothurOutEndLine();  
151                 }
152                 
153                 int end;
154                 for(int i=0;i<seqLength;i++){
155                         end = i;
156                         if(qScores[i] < qThreshold){
157                                 break;
158                         }
159                 }
160                 
161                 //every score passed
162                 if (end == (seqLength-1)) { end = seqLength; }
163                 
164                 sequence.setUnaligned(rawSequence.substr(0,end));
165                 trimQScores(-1, end);
166                 
167                 return 1;
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "QualityScores", "flipQScores");
171                 exit(1);
172         }                                                       
173         
174 }
175
176 /**************************************************************************************************/
177
178 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
179         try {
180                 string rawSequence = sequence.getUnaligned();
181                 int seqLength = sequence.getNumBases();
182                 
183                 if(seqName != sequence.getName()){
184                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
185                         m->mothurOutEndLine();  
186                 }
187                 
188                 int end = -1;
189                 double rollingSum = 0.0000;
190                 
191                 for(int i=0;i<seqLength;i++){
192
193                         rollingSum += (double)qScores[i];
194                         
195                         if(rollingSum / (double)(i+1) < qThreshold){
196                                 end = i;
197 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
198                                 break;
199                         }
200                 }
201                 
202                 if(end == -1){  end = seqLength;        }
203                 
204                 
205                 sequence.setUnaligned(rawSequence.substr(0,end));
206                 trimQScores(-1, end);
207                 
208                 
209                 return 1;
210         }
211         catch(exception& e) {
212                 m->errorOut(e, "QualityScores", "flipQScores");
213                 exit(1);
214         }                                                       
215         
216 }
217
218 /**************************************************************************************************/
219
220 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
221         try {
222                 string rawSequence = sequence.getUnaligned();
223                 int seqLength = sequence.getNumBases();
224                 
225                 if(seqName != sequence.getName()){
226                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
227                         m->mothurOutEndLine();
228                 }
229                 
230                 int end = windowSize;
231                 int start = 0;
232                 
233                 if(seqLength < windowSize) {    return 0;       }
234                         
235                 while(start < seqLength){
236                         double windowSum = 0.0000;
237
238                         for(int i=start;i<end;i++){
239                                 windowSum += qScores[i];
240                         }
241                         double windowAverage = windowSum / (double)(end-start);
242                         
243                         if(windowAverage < qThreshold){
244                                 end = end - stepSize;
245                                 break;
246                         }
247                         start += stepSize;
248                         end = start + windowSize;
249                         if(end >= seqLength){   end = seqLength - 1;    }
250                 }
251                 
252                 if(end == -1){  end = seqLength;        }
253                 
254                 sequence.setUnaligned(rawSequence.substr(0,end));
255                 trimQScores(-1, end);
256                 
257                 return 1;
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "QualityScores", "flipQScores");
261                 exit(1);
262         }                                                       
263         
264 }
265
266 /**************************************************************************************************/
267
268 double QualityScores::calculateAverage(){
269         
270         double aveQScore = 0.0000;
271         
272         for(int i=0;i<seqLength;i++){
273                 aveQScore += (double) qScores[i];
274         }
275         aveQScore /= (double) seqLength;
276         
277         return aveQScore;
278 }
279
280 /**************************************************************************************************/
281
282 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
283         try {
284                 string rawSequence = sequence.getUnaligned();
285                 bool success = 0;       //guilty until proven innocent
286                 
287                 if(seqName != sequence.getName())       {
288                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
289                         m->mothurOutEndLine();  
290                 } 
291                         
292                 double aveQScore = calculateAverage();
293                 
294                 if(aveQScore >= qAverage)       {       success = 1;    }
295                 else                                            {       success = 0;    }
296                 
297                 return success;
298         }
299         catch(exception& e) {
300                 m->errorOut(e, "QualityScores", "cullQualAverage");
301                 exit(1);
302         }
303 }
304
305 /**************************************************************************************************/
306
307 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
308         try {
309
310                 int seqLength = errorSeq.size();
311                 
312                 int qIndex = start - 1;
313                 cout << start << '\t' << stop << '\t' << seqLength << endl;
314                 for(int i=0;i<seqLength;i++){
315                         
316                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
317                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
318                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
319                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
320                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
321
322                         if(errorSeq[i] != 'd')          {       qIndex++;       }
323                         if(qIndex > stop){      break;  }
324                 }       
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
328                 exit(1);
329         }
330 }
331
332 /**************************************************************************************************/
333
334 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
335         try {
336                 
337                 int index = 0;
338                 for(int i=start-1;i<stop;i++){
339                         forwardMap[index++][qScores[i]] += weight;
340                 }
341                 
342         }
343         catch(exception& e) {
344                 m->errorOut(e, "QualityScores", "updateForwardMap");
345                 exit(1);
346         }
347 }
348
349 /**************************************************************************************************/
350
351 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
352         try {
353                 
354                 int index = 0;
355                 for(int i=stop-1;i>=start;i--){
356                         reverseMap[index++][qScores[i]] += weight;
357                 }
358                 
359         }       
360         catch(exception& e) {
361                 m->errorOut(e, "QualityScores", "updateForwardMap");
362                 exit(1);
363         }
364 }
365
366 /**************************************************************************************************/