]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
added primer.design command. fixed bug with linux unifrac subsampling, metastats...
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19                 
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "QualityScores", "QualityScores");
23                 exit(1);
24         }                                                       
25 }
26
27 /**************************************************************************************************/
28
29 QualityScores::QualityScores(ifstream& qFile){
30         try {
31                 
32                 m = MothurOut::getInstance();
33
34                 int score;
35                 seqName = getSequenceName(qFile);
36                 
37                 if (!m->control_pressed) {
38             string qScoreString = m->getline(qFile);
39             //cout << qScoreString << endl;
40             while(qFile.peek() != '>' && qFile.peek() != EOF){
41                 if (m->control_pressed) { break; }
42                 string temp = m->getline(qFile);
43                 //cout << temp << endl;
44                 qScoreString +=  ' ' + temp;
45             }
46             //cout << "done reading " << endl;  
47             istringstream qScoreStringStream(qScoreString);
48             int count = 0;
49             while(!qScoreStringStream.eof()){
50                 if (m->control_pressed) { break; }
51                 string temp;
52                 qScoreStringStream >> temp;  m->gobble(qScoreStringStream);
53                 
54                 //check temp to make sure its a number
55                 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
56                 convert(temp, score);
57                 
58                 //cout << count << '\t' << score << endl;
59                 qScores.push_back(score);
60                 count++;
61             }
62         }
63                 
64                 seqLength = qScores.size();
65                 //cout << "seqlength = " << seqLength << '\t' << count << endl;
66                 
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "QualityScores", "QualityScores");
70                 exit(1);
71         }                                                       
72         
73 }
74 //********************************************************************************************************************
75 string QualityScores::getSequenceName(ifstream& qFile) {
76         try {
77                 string name = "";
78                 
79         qFile >> name;
80         m->getline(qFile);
81                 
82                 if (name.length() != 0) { 
83             
84                         name = name.substr(1); 
85             
86             for (int i = 0; i < name.length(); i++) {
87                 if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
88             }
89             
90         }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true;  }
91         
92                 return name;
93         }
94         catch(exception& e) {
95                 m->errorOut(e, "QualityScores", "getSequenceName");
96                 exit(1);
97         }
98 }
99 //********************************************************************************************************************
100 void QualityScores::setName(string name) {
101         try {
102       
103         for (int i = 0; i < name.length(); i++) {
104             if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
105         }     
106     
107         seqName = name;
108         }
109         catch(exception& e) {
110                 m->errorOut(e, "QualityScores", "setName");
111                 exit(1);
112         }
113 }
114 /**************************************************************************************************/
115
116 string QualityScores::getName(){
117         
118         try {
119                 return seqName;
120         }
121         catch(exception& e) {
122                 m->errorOut(e, "QualityScores", "getName");
123                 exit(1);
124         }                                                       
125 }
126
127 /**************************************************************************************************/
128
129 void QualityScores::printQScores(ofstream& qFile){
130         try {
131                 
132                 double aveQScore = calculateAverage();
133                 
134                 qFile << '>' << seqName << '\t' << aveQScore << endl;
135                 
136                 for(int i=0;i<seqLength;i++){
137                         qFile << qScores[i] << ' ';
138                 }
139                 qFile << endl;
140                                 
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "QualityScores", "printQScores");
144                 exit(1);
145         }                                                       
146 }
147
148 /**************************************************************************************************/
149
150 void QualityScores::trimQScores(int start, int end){
151         try {
152                 vector<int> hold;
153                 
154
155                 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
156                 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
157                 if(end == -1){          
158                         hold = vector<int>(qScores.begin()+start, qScores.end());
159                         qScores = hold;         
160                 }
161                 if(start == -1){
162                         if(qScores.size() > end){
163                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
164                                 qScores = hold;         
165                         }
166                 }
167
168                 seqLength = qScores.size();
169         }
170         catch(exception& e) {
171                 m->errorOut(e, "QualityScores", "trimQScores");
172                 exit(1);
173         }                                                       
174 }
175
176 /**************************************************************************************************/
177
178 void QualityScores::flipQScores(){
179         try {
180                 
181                 vector<int> temp = qScores;
182                 for(int i=0;i<seqLength;i++){
183                         qScores[seqLength - i - 1] = temp[i];
184                 }
185                 
186         }
187         catch(exception& e) {
188                 m->errorOut(e, "QualityScores", "flipQScores");
189                 exit(1);
190         }                                                       
191 }
192
193 /**************************************************************************************************/
194
195 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
196         try {
197                 string rawSequence = sequence.getUnaligned();
198                 int seqLength = sequence.getNumBases();
199                 
200                 if(seqName != sequence.getName()){
201                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
202                         m->mothurOutEndLine();  
203                 }
204                 
205                 int end;
206                 for(int i=0;i<seqLength;i++){
207                         end = i;
208                         if(qScores[i] < qThreshold){
209                                 break;
210                         }
211                 }
212                 
213                 //every score passed
214                 if (end == (seqLength-1)) { end = seqLength; }
215                 
216                 sequence.setUnaligned(rawSequence.substr(0,end));
217                 trimQScores(-1, end);
218                 
219                 return 1;
220         }
221         catch(exception& e) {
222                 m->errorOut(e, "QualityScores", "flipQScores");
223                 exit(1);
224         }                                                       
225         
226 }
227
228 /**************************************************************************************************/
229
230 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
231         try {
232                 string rawSequence = sequence.getUnaligned();
233                 int seqLength = sequence.getNumBases();
234                 
235                 if(seqName != sequence.getName()){
236                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
237                         m->mothurOutEndLine();  
238                 }
239                 
240                 int end = -1;
241                 double rollingSum = 0.0000;
242                 
243                 for(int i=0;i<seqLength;i++){
244
245                         rollingSum += (double)qScores[i];
246                         
247                         if(rollingSum / (double)(i+1) < qThreshold){
248                                 end = i;
249                                 break;
250                         }
251                 }
252                 
253                 if(end == -1){  end = seqLength;        }
254                 
255                 
256                 sequence.setUnaligned(rawSequence.substr(0,end));
257                 trimQScores(-1, end);
258                 
259                 
260                 return 1;
261         }
262         catch(exception& e) {
263                 m->errorOut(e, "QualityScores", "flipQScores");
264                 exit(1);
265         }                                                       
266         
267 }
268
269 /**************************************************************************************************/
270
271 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
272         try {
273                 string rawSequence = sequence.getUnaligned();
274                 int seqLength = sequence.getNumBases();
275                 
276                 if(seqName != sequence.getName()){
277                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
278                         m->mothurOutEndLine();
279                 }
280                 
281                 int end = windowSize;
282                 int start = 0;
283
284                 if(seqLength < windowSize) {    return 0;       }
285                         
286                 while((start+windowSize) < seqLength){
287                         double windowSum = 0.0000;
288
289                         for(int i=start;i<end;i++){
290                                 windowSum += qScores[i];
291                         }
292                         double windowAverage = windowSum / (double)(end-start);
293                                 
294                         if(windowAverage < qThreshold){
295                                 end = end - stepSize;
296                                 break;
297                         }
298                         
299                         start += stepSize;
300                         end = start + windowSize;
301                                 
302                         if(end >= seqLength){   end = seqLength;        }
303                                 
304                 }
305         
306                 if(end == -1){  end = seqLength;        }
307                 
308                 //failed first window
309                 if (end < windowSize) { return 0; }
310                         
311                 sequence.setUnaligned(rawSequence.substr(0,end));
312                 trimQScores(-1, end);
313                 
314                 return 1;
315         }
316         catch(exception& e) {
317                 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
318                 exit(1);
319         }                                                       
320         
321 }
322
323 /**************************************************************************************************/
324
325 double QualityScores::calculateAverage(){
326         
327         double aveQScore = 0.0000;
328         
329         for(int i=0;i<seqLength;i++){
330                 aveQScore += (double) qScores[i];
331         }
332         aveQScore /= (double) seqLength;
333         
334         return aveQScore;
335 }
336
337 /**************************************************************************************************/
338
339 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
340         try {
341                 string rawSequence = sequence.getUnaligned();
342                 bool success = 0;       //guilty until proven innocent
343                 
344                 if(seqName != sequence.getName())       {
345                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
346                         m->mothurOutEndLine();  
347                 } 
348                         
349                 double aveQScore = calculateAverage();
350                 
351                 if(aveQScore >= qAverage)       {       success = 1;    }
352                 else                                            {       success = 0;    }
353                 
354                 return success;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "QualityScores", "cullQualAverage");
358                 exit(1);
359         }
360 }
361
362 /**************************************************************************************************/
363
364 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
365         try {
366
367                 int seqLength = errorSeq.size();
368                 
369                 int qIndex = start - 1;
370
371                 for(int i=0;i<seqLength;i++){
372                         
373                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
374                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
375                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
376                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   /*if(qScores[qIndex] != 0){     cout << qIndex << '\t';         }*/     }
377                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
378
379                         if(errorSeq[i] != 'd')          {       qIndex++;       }
380                                                 
381                         if(qIndex > stop){      break;  }
382                 }       
383         }
384         catch(exception& e) {
385                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
386                 exit(1);
387         }
388 }
389
390 /**************************************************************************************************/
391
392 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
393         try {
394                 
395                 int index = 0;
396                 for(int i=start-1;i<stop;i++){
397                         forwardMap[index++][qScores[i]] += weight;
398                 }
399                 
400         }
401         catch(exception& e) {
402                 m->errorOut(e, "QualityScores", "updateForwardMap");
403                 exit(1);
404         }
405 }
406
407 /**************************************************************************************************/
408
409 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
410         try {
411                 
412                 int index = 0;
413                 for(int i=stop-1;i>=start-1;i--){
414                         reverseMap[index++][qScores[i]] += weight;
415                 }
416                 
417         }       
418         catch(exception& e) {
419                 m->errorOut(e, "QualityScores", "updateReverseMap");
420                 exit(1);
421         }
422 }
423
424 /**************************************************************************************************/