]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
added homova command
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile, int l){
30         try {
31                 
32                 m = MothurOut::getInstance();
33
34                 seqName = "";
35                 seqLength = l;
36                 int score;
37                 
38                 qFile >> seqName; 
39
40                 while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13 || c == -1){       break;  }       } // get rest of line 
41                 m->gobble(qFile);
42                 if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
43                 else {
44                         seqName = seqName.substr(1); 
45                 }
46                 
47                 //m->getline(qFile, line);
48                 //istringstream qualStream(line);
49         
50                 //while(qualStream){
51                 //      qualStream >> score;
52                 //      qScores.push_back(score);
53                 //}
54                 //qScores.pop_back();
55                 
56                 //seqLength = qScores.size();   
57                 
58                 /*while(!in.eof()){     
59                         string saveName = "";
60                         string name = "";
61                         string scores = "";
62                         
63                         in >> name; 
64                         //cout << name << endl;         
65                         if (name.length() != 0) { 
66                                 saveName = name.substr(1);
67                                 while (!in.eof())       {       
68                                         char c = in.get(); 
69                                         if (c == 10 || c == 13){        break;  }
70                                         else { name += c; }     
71                                 } 
72                                 m->gobble(in);
73                         }
74                         
75                         while(in){
76                                 char letter= in.get();
77                                 if(letter == '>'){      in.putback(letter);     break;  }
78                                 else{ scores += letter; }
79                         }
80                         
81                  //istringstream iss (scores,istringstream::in);
82                  
83                  //int count = 0; int tempScore;
84                  //while (iss) { iss >> tempScore; count++; }
85                  //cout << saveName << '\t' << count << endl;   
86                         
87                         m->gobble(in);
88                 }*/             
89                 
90                 
91                 
92                 for(int i=0;i<seqLength;i++){
93                         qFile >> score;
94                         qScores.push_back(score);
95                 }
96                 m->gobble(qFile);
97
98         }
99         catch(exception& e) {
100                 m->errorOut(e, "QualityScores", "QualityScores");
101                 exit(1);
102         }                                                       
103
104 }
105 /**************************************************************************************************/
106
107 string QualityScores::getName(){
108         
109         try {
110                 return seqName;
111         }
112         catch(exception& e) {
113                 m->errorOut(e, "QualityScores", "getName");
114                 exit(1);
115         }                                                       
116 }
117
118 /**************************************************************************************************/
119
120 void QualityScores::printQScores(ofstream& qFile){
121         try {
122                 
123                 double aveQScore = calculateAverage();
124                 
125                 qFile << '>' << seqName << '\t' << aveQScore << endl;
126                 
127                 for(int i=0;i<seqLength;i++){
128                         qFile << qScores[i] << ' ';
129                 }
130                 qFile << endl;
131                                 
132         }
133         catch(exception& e) {
134                 m->errorOut(e, "QualityScores", "printQScores");
135                 exit(1);
136         }                                                       
137 }
138
139 /**************************************************************************************************/
140
141 void QualityScores::trimQScores(int start, int end){
142         try {
143                 vector<int> hold;
144                 
145                 if(end == -1){          
146                         hold = vector<int>(qScores.begin()+start, qScores.end());
147                         qScores = hold;         
148                 }
149                 if(start == -1){
150                         if(qScores.size() > end){
151                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
152                                 qScores = hold;         
153                         }
154                 }
155
156                 seqLength = qScores.size();
157         }
158         catch(exception& e) {
159                 m->errorOut(e, "QualityScores", "trimQScores");
160                 exit(1);
161         }                                                       
162 }
163
164 /**************************************************************************************************/
165
166 void QualityScores::flipQScores(){
167         try {
168                 
169                 vector<int> temp = qScores;
170                 for(int i=0;i<seqLength;i++){
171                         qScores[seqLength - i - 1] = temp[i];
172                 }
173                 
174         }
175         catch(exception& e) {
176                 m->errorOut(e, "QualityScores", "flipQScores");
177                 exit(1);
178         }                                                       
179 }
180
181 /**************************************************************************************************/
182
183 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
184         try {
185                 string rawSequence = sequence.getUnaligned();
186                 int seqLength = sequence.getNumBases();
187                 
188                 if(seqName != sequence.getName()){
189                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
190                         m->mothurOutEndLine();  
191                 }
192                 
193                 int end;
194                 for(int i=0;i<seqLength;i++){
195                         end = i;
196                         if(qScores[i] < qThreshold){
197                                 break;
198                         }
199                 }
200                 
201                 //every score passed
202                 if (end == (seqLength-1)) { end = seqLength; }
203                 
204                 sequence.setUnaligned(rawSequence.substr(0,end));
205                 trimQScores(-1, end);
206                 
207                 return 1;
208         }
209         catch(exception& e) {
210                 m->errorOut(e, "QualityScores", "flipQScores");
211                 exit(1);
212         }                                                       
213         
214 }
215
216 /**************************************************************************************************/
217
218 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
219         try {
220                 string rawSequence = sequence.getUnaligned();
221                 int seqLength = sequence.getNumBases();
222                 
223                 if(seqName != sequence.getName()){
224                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
225                         m->mothurOutEndLine();  
226                 }
227                 
228                 int end = -1;
229                 double rollingSum = 0.0000;
230                 
231                 for(int i=0;i<seqLength;i++){
232
233                         rollingSum += (double)qScores[i];
234                         
235                         if(rollingSum / (double)(i+1) < qThreshold){
236                                 end = i;
237 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
238                                 break;
239                         }
240                 }
241                 
242                 if(end == -1){  end = seqLength;        }
243                 
244                 
245                 sequence.setUnaligned(rawSequence.substr(0,end));
246                 trimQScores(-1, end);
247                 
248                 
249                 return 1;
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "QualityScores", "flipQScores");
253                 exit(1);
254         }                                                       
255         
256 }
257
258 /**************************************************************************************************/
259
260 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
261         try {
262                 string rawSequence = sequence.getUnaligned();
263                 int seqLength = sequence.getNumBases();
264                 
265                 if(seqName != sequence.getName()){
266                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
267                         m->mothurOutEndLine();
268                 }
269                 
270                 int end = windowSize;
271                 int start = 0;
272                 
273                 if(seqLength < windowSize) {    return 0;       }
274                         
275                 while(start < seqLength){
276                         double windowSum = 0.0000;
277
278                         for(int i=start;i<end;i++){
279                                 windowSum += qScores[i];
280                         }
281                         double windowAverage = windowSum / (double)(end-start);
282                         
283                         if(windowAverage < qThreshold){
284                                 end = end - stepSize;
285                                 break;
286                         }
287                         start += stepSize;
288                         end = start + windowSize;
289                         if(end >= seqLength){   end = seqLength - 1;    }
290                 }
291                 
292                 if(end == -1){  end = seqLength;        }
293                 
294                 sequence.setUnaligned(rawSequence.substr(0,end));
295                 trimQScores(-1, end);
296                 
297                 return 1;
298         }
299         catch(exception& e) {
300                 m->errorOut(e, "QualityScores", "flipQScores");
301                 exit(1);
302         }                                                       
303         
304 }
305
306 /**************************************************************************************************/
307
308 double QualityScores::calculateAverage(){
309         
310         double aveQScore = 0.0000;
311         
312         for(int i=0;i<seqLength;i++){
313                 aveQScore += (double) qScores[i];
314         }
315         aveQScore /= (double) seqLength;
316         
317         return aveQScore;
318 }
319
320 /**************************************************************************************************/
321
322 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
323         try {
324                 string rawSequence = sequence.getUnaligned();
325                 bool success = 0;       //guilty until proven innocent
326                 
327                 if(seqName != sequence.getName())       {
328                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
329                         m->mothurOutEndLine();  
330                 } 
331                         
332                 double aveQScore = calculateAverage();
333                 
334                 if(aveQScore >= qAverage)       {       success = 1;    }
335                 else                                            {       success = 0;    }
336                 
337                 return success;
338         }
339         catch(exception& e) {
340                 m->errorOut(e, "QualityScores", "cullQualAverage");
341                 exit(1);
342         }
343 }
344
345 /**************************************************************************************************/
346
347 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
348         try {
349
350                 int seqLength = errorSeq.size();
351                 int qIndex = start - 1;
352                 for(int i=0;i<seqLength;i++){
353                         
354                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
355                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
356                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
357                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
358                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
359
360                         if(errorSeq[i] != 'd')          {       qIndex++;       }
361
362                 }       
363         }
364         catch(exception& e) {
365                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
366                 exit(1);
367         }
368 }
369
370 /**************************************************************************************************/
371
372 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
373         try {
374                 
375                 int index = 0;
376                 for(int i=start-1;i<stop;i++){
377                         forwardMap[index++][qScores[i]] += weight;
378                 }
379                 
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "QualityScores", "updateForwardMap");
383                 exit(1);
384         }
385 }
386
387 /**************************************************************************************************/
388
389 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
390         try {
391                 
392                 int index = 0;
393                 for(int i=stop-1;i>=start;i--){
394                         reverseMap[index++][qScores[i]] += weight;
395                 }
396                 
397         }       
398         catch(exception& e) {
399                 m->errorOut(e, "QualityScores", "updateForwardMap");
400                 exit(1);
401         }
402 }
403
404 /**************************************************************************************************/