]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
more changes to error.seqs command and quality scores classses
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "QualityScores", "QualityScores");
22                 exit(1);
23         }                                                       
24 }
25
26 /**************************************************************************************************/
27
28 QualityScores::QualityScores(ifstream& qFile, int l){
29         try {
30                 
31                 m = MothurOut::getInstance();
32
33                 seqName = "";
34                 seqLength = l;
35                 int score;
36                 
37                 qFile >> seqName; 
38
39                 while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13 || c == -1){       break;  }       } // get rest of line 
40                 m->gobble(qFile);
41                 if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
42                 else {
43                         seqName = seqName.substr(1); 
44                 }
45                 
46                 //m->getline(qFile, line);
47                 //istringstream qualStream(line);
48         
49                 //while(qualStream){
50                 //      qualStream >> score;
51                 //      qScores.push_back(score);
52                 //}
53                 //qScores.pop_back();
54                 
55                 //seqLength = qScores.size();   
56                 
57                 /*while(!in.eof()){     
58                         string saveName = "";
59                         string name = "";
60                         string scores = "";
61                         
62                         in >> name; 
63                         //cout << name << endl;         
64                         if (name.length() != 0) { 
65                                 saveName = name.substr(1);
66                                 while (!in.eof())       {       
67                                         char c = in.get(); 
68                                         if (c == 10 || c == 13){        break;  }
69                                         else { name += c; }     
70                                 } 
71                                 m->gobble(in);
72                         }
73                         
74                         while(in){
75                                 char letter= in.get();
76                                 if(letter == '>'){      in.putback(letter);     break;  }
77                                 else{ scores += letter; }
78                         }
79                         
80                  //istringstream iss (scores,istringstream::in);
81                  
82                  //int count = 0; int tempScore;
83                  //while (iss) { iss >> tempScore; count++; }
84                  //cout << saveName << '\t' << count << endl;   
85                         
86                         m->gobble(in);
87                 }*/             
88                 
89                 
90                 
91                 for(int i=0;i<seqLength;i++){
92                         qFile >> score;
93                         qScores.push_back(score);
94                 }
95                 m->gobble(qFile);
96
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "QualityScores", "QualityScores");
100                 exit(1);
101         }                                                       
102
103 }
104 /**************************************************************************************************/
105
106 string QualityScores::getName(){
107         
108         try {
109                 return seqName;
110         }
111         catch(exception& e) {
112                 m->errorOut(e, "QualityScores", "getName");
113                 exit(1);
114         }                                                       
115 }
116
117 /**************************************************************************************************/
118
119 void QualityScores::printQScores(ofstream& qFile){
120         try {
121                 
122                 double aveQScore = calculateAverage();
123                 
124                 qFile << '>' << seqName << '\t' << aveQScore << endl;
125                 
126                 for(int i=0;i<seqLength;i++){
127                         qFile << qScores[i] << ' ';
128                 }
129                 qFile << endl;
130                                 
131         }
132         catch(exception& e) {
133                 m->errorOut(e, "QualityScores", "printQScores");
134                 exit(1);
135         }                                                       
136 }
137
138 /**************************************************************************************************/
139
140 void QualityScores::trimQScores(int start, int end){
141         try {
142                 vector<int> hold;
143                 
144                 if(end == -1){          
145                         hold = vector<int>(qScores.begin()+start, qScores.end());
146                         qScores = hold;         
147                 }
148                 if(start == -1){
149                         if(qScores.size() > end){
150                                 hold = vector<int>(qScores.begin(), qScores.begin()+end);
151                                 qScores = hold;         
152                         }
153                 }
154
155                 seqLength = qScores.size();
156         }
157         catch(exception& e) {
158                 m->errorOut(e, "QualityScores", "trimQScores");
159                 exit(1);
160         }                                                       
161 }
162
163 /**************************************************************************************************/
164
165 void QualityScores::flipQScores(){
166         try {
167                 
168                 vector<int> temp = qScores;
169                 for(int i=0;i<seqLength;i++){
170                         qScores[seqLength - i - 1] = temp[i];
171                 }
172                 
173         }
174         catch(exception& e) {
175                 m->errorOut(e, "QualityScores", "flipQScores");
176                 exit(1);
177         }                                                       
178 }
179
180 /**************************************************************************************************/
181
182 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
183         try {
184                 string rawSequence = sequence.getUnaligned();
185                 int seqLength = sequence.getNumBases();
186                 
187                 if(seqName != sequence.getName()){
188                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
189                         m->mothurOutEndLine();  
190                 }
191                 
192                 int end;
193                 for(int i=0;i<seqLength;i++){
194                         end = i;
195                         if(qScores[i] < qThreshold){
196                                 break;
197                         }
198                 }
199                 
200                 sequence.setUnaligned(rawSequence.substr(0,end));
201                 trimQScores(-1, end);
202         
203                 return 1;
204         }
205         catch(exception& e) {
206                 m->errorOut(e, "QualityScores", "flipQScores");
207                 exit(1);
208         }                                                       
209         
210 }
211
212 /**************************************************************************************************/
213
214 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
215         try {
216                 string rawSequence = sequence.getUnaligned();
217                 int seqLength = sequence.getNumBases();
218                 
219                 if(seqName != sequence.getName()){
220                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
221                         m->mothurOutEndLine();  
222                 }
223                 
224                 int end = -1;
225                 double rollingSum = 0.0000;
226                 
227                 for(int i=0;i<seqLength;i++){
228
229                         rollingSum += (double)qScores[i];
230                         
231                         if(rollingSum / (double)(i+1) < qThreshold){
232                                 end = i;
233 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
234                                 break;
235                         }
236                 }
237                 
238                 if(end == -1){  end = seqLength;        }
239                 
240                 sequence.setUnaligned(rawSequence.substr(0,end));
241                 trimQScores(-1, end);
242                 
243                 return 1;
244         }
245         catch(exception& e) {
246                 m->errorOut(e, "QualityScores", "flipQScores");
247                 exit(1);
248         }                                                       
249         
250 }
251
252 /**************************************************************************************************/
253
254 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
255         try {
256                 string rawSequence = sequence.getUnaligned();
257                 int seqLength = sequence.getNumBases();
258                 
259                 if(seqName != sequence.getName()){
260                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
261                         m->mothurOutEndLine();
262                 }
263                 
264                 int end = windowSize;
265                 int start = 0;
266                 
267                 if(seqLength < windowSize) {    return 0;       }
268                 
269                 while(start < seqLength){
270                         double windowSum = 0.0000;
271
272                         for(int i=start;i<end;i++){
273                                 windowSum += qScores[i];                                
274                         }
275                         double windowAverage = windowSum / (double)(end-start);
276                         
277                         if(windowAverage < qThreshold){
278                                 end = end - stepSize;
279                                 break;
280                         }
281                         start += stepSize;
282                         end = start + windowSize;
283                         if(end >= seqLength){   end = seqLength - 1;    }
284                 }
285                 
286                 
287                 if(end == -1){  end = seqLength;        }
288                 
289                 sequence.setUnaligned(rawSequence.substr(0,end));
290                 trimQScores(-1, end);
291                 
292                 return 1;
293         }
294         catch(exception& e) {
295                 m->errorOut(e, "QualityScores", "flipQScores");
296                 exit(1);
297         }                                                       
298         
299 }
300
301 /**************************************************************************************************/
302
303 double QualityScores::calculateAverage(){
304         
305         double aveQScore = 0.0000;
306         
307         for(int i=0;i<seqLength;i++){
308                 aveQScore += (double) qScores[i];
309         }
310         aveQScore /= (double) seqLength;
311         
312         return aveQScore;
313 }
314
315 /**************************************************************************************************/
316
317 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
318         try {
319                 string rawSequence = sequence.getUnaligned();
320                 bool success = 0;       //guilty until proven innocent
321                 
322                 if(seqName != sequence.getName())       {
323                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
324                         m->mothurOutEndLine();  
325                 } 
326                         
327                 double aveQScore = calculateAverage();
328                 
329                 if(aveQScore >= qAverage)       {       success = 1;    }
330                 else                                            {       success = 0;    }
331                 
332                 return success;
333         }
334         catch(exception& e) {
335                 m->errorOut(e, "QualityScores", "cullQualAverage");
336                 exit(1);
337         }
338 }
339
340 /**************************************************************************************************/
341
342 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
343         try {
344
345                 int seqLength = errorSeq.size();
346                 int qIndex = start - 1;
347                 for(int i=0;i<seqLength;i++){
348                         
349                         if(errorSeq[i] == 'm')          {       qualErrorMap['m'][qScores[qIndex]] += weight;   }
350                         else if(errorSeq[i] == 's')     {       qualErrorMap['s'][qScores[qIndex]] += weight;   }
351                         else if(errorSeq[i] == 'i')     {       qualErrorMap['i'][qScores[qIndex]] += weight;   }
352                         else if(errorSeq[i] == 'a')     {       qualErrorMap['a'][qScores[qIndex]] += weight;   }
353                         else if(errorSeq[i] == 'd')     {       /*      there are no qScores for deletions      */              }
354
355                         if(errorSeq[i] != 'd')          {       qIndex++;       }
356
357                 }       
358         }
359         catch(exception& e) {
360                 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
361                 exit(1);
362         }
363 }
364
365 /**************************************************************************************************/
366
367 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
368         try {
369                 
370                 int index = 0;
371                 for(int i=start-1;i<stop;i++){
372                         forwardMap[index++][qScores[i]] += weight;
373                 }
374                 
375         }
376         catch(exception& e) {
377                 m->errorOut(e, "QualityScores", "updateForwardMap");
378                 exit(1);
379         }
380 }
381
382 /**************************************************************************************************/
383
384 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
385         try {
386                 
387                 int index = 0;
388                 for(int i=stop-1;i>=start;i--){
389                         reverseMap[index++][qScores[i]] += weight;
390                 }
391                 
392         }       
393         catch(exception& e) {
394                 m->errorOut(e, "QualityScores", "updateForwardMap");
395                 exit(1);
396         }
397 }
398
399 /**************************************************************************************************/