]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "QualityScores", "QualityScores");
22                 exit(1);
23         }                                                       
24 }
25
26 /**************************************************************************************************/
27
28 QualityScores::QualityScores(ifstream& qFile, int l){
29         try {
30                 
31                 m = MothurOut::getInstance();
32
33                 seqName = "";
34                 seqLength = l;
35                 int score;
36                 
37                 //string line;
38                 //m->getline(qFile, line); 
39                 //istringstream nameStream(line);
40         
41                 qFile >> seqName; 
42                 while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13 || c == -1){       break;  }       } // get rest of line 
43                 m->gobble(qFile);
44                 if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
45                 else {
46                         seqName = seqName.substr(1); 
47                 }
48
49                 //m->getline(qFile, line);
50                 //istringstream qualStream(line);
51         
52                 //while(qualStream){
53                 //      qualStream >> score;
54                 //      qScores.push_back(score);
55                 //}
56                 //qScores.pop_back();
57                 
58                 //seqLength = qScores.size();   
59                 
60                 for(int i=0;i<seqLength;i++){
61                         qFile >> score;
62                         qScores.push_back(score);
63                 }
64                 m->gobble(qFile);
65
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "QualityScores", "QualityScores");
69                 exit(1);
70         }                                                       
71
72 }
73
74 /**************************************************************************************************/
75
76 string QualityScores::getName(){
77         
78         try {
79                 return seqName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "QualityScores", "getName");
83                 exit(1);
84         }                                                       
85 }
86
87 /**************************************************************************************************/
88
89 void QualityScores::printQScores(ofstream& qFile){
90         try {
91                 
92                 double aveQScore = calculateAverage();
93                 
94                 qFile << '>' << seqName << '\t' << aveQScore << endl;
95                 
96                 for(int i=0;i<seqLength;i++){
97                         qFile << qScores[i] << ' ';
98                 }
99                 qFile << endl;
100                                 
101         }
102         catch(exception& e) {
103                 m->errorOut(e, "QualityScores", "printQScores");
104                 exit(1);
105         }                                                       
106 }
107
108 /**************************************************************************************************/
109
110 void QualityScores::trimQScores(int start, int end){
111         try {
112                 vector<int> hold;
113                 
114                 if(end == -1){          
115                         hold = vector<int>(qScores.begin()+start, qScores.end());
116                         qScores = hold;         
117                 }
118                 if(start == -1){
119                         hold = vector<int>(qScores.begin(), qScores.begin()+end);       //not sure if indexing is correct
120                         qScores = hold;         
121                 }
122
123                 seqLength = qScores.size();
124         }
125         catch(exception& e) {
126                 m->errorOut(e, "QualityScores", "trimQScores");
127                 exit(1);
128         }                                                       
129 }
130
131 /**************************************************************************************************/
132
133 void QualityScores::flipQScores(){
134         try {
135                 
136                 vector<int> temp = qScores;
137                 for(int i=0;i<seqLength;i++){
138                         qScores[seqLength - i - 1] = temp[i];
139                 }
140                 
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "QualityScores", "flipQScores");
144                 exit(1);
145         }                                                       
146 }
147
148 /**************************************************************************************************/
149
150 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
151         try {
152                 string rawSequence = sequence.getUnaligned();
153                 int seqLength = sequence.getNumBases();
154                 
155                 if(seqName != sequence.getName()){
156                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
157                         m->mothurOutEndLine();  
158                 }
159                 
160                 int end;
161                 for(int i=0;i<seqLength;i++){
162                         end = i;
163                         if(qScores[i] < qThreshold){
164                                 break;
165                         }
166                 }
167                 
168                 sequence.setUnaligned(rawSequence.substr(0,end));
169                 trimQScores(-1, end);
170         
171                 return 1;
172         }
173         catch(exception& e) {
174                 m->errorOut(e, "QualityScores", "flipQScores");
175                 exit(1);
176         }                                                       
177         
178 }
179
180 /**************************************************************************************************/
181
182 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
183         try {
184                 string rawSequence = sequence.getUnaligned();
185                 int seqLength = sequence.getNumBases();
186                 
187                 if(seqName != sequence.getName()){
188                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
189                         m->mothurOutEndLine();  
190                 }
191                 
192                 int end = -1;
193                 double rollingSum = 0.0000;
194                 
195                 for(int i=0;i<seqLength;i++){
196
197                         rollingSum += (double)qScores[i];
198                         
199                         if(rollingSum / (double)(i+1) < qThreshold){
200                                 end = i;
201 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
202                                 break;
203                         }
204                 }
205                 
206                 if(end == -1){  end = seqLength;        }
207                 
208                 sequence.setUnaligned(rawSequence.substr(0,end));
209                 trimQScores(-1, end);
210                 
211                 return 1;
212         }
213         catch(exception& e) {
214                 m->errorOut(e, "QualityScores", "flipQScores");
215                 exit(1);
216         }                                                       
217         
218 }
219
220 /**************************************************************************************************/
221
222 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
223         try {
224                 string rawSequence = sequence.getUnaligned();
225                 int seqLength = sequence.getNumBases();
226                 
227                 if(seqName != sequence.getName()){
228                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
229                         m->mothurOutEndLine();
230                 }
231                 
232                 int end = windowSize;
233                 int start = 0;
234                 
235
236                 while(start < seqLength){
237                         double windowSum = 0.0000;
238
239                         for(int i=start;i<end;i++){
240                                 windowSum += qScores[i];                                
241                         }
242                         double windowAverage = windowSum / (double)(end-start);
243                         
244                         if(windowAverage < qThreshold){
245                                 end = end - stepSize;
246                                 break;
247                         }
248                         start += stepSize;
249                         end = start + windowSize;
250                         if(end >= seqLength){   end = seqLength - 1;    }
251                 }
252                 
253                 
254                 if(end == -1){  end = seqLength;        }
255                 
256                 sequence.setUnaligned(rawSequence.substr(0,end));
257                 trimQScores(-1, end);
258                 
259                 return 1;
260         }
261         catch(exception& e) {
262                 m->errorOut(e, "QualityScores", "flipQScores");
263                 exit(1);
264         }                                                       
265         
266 }
267
268 /**************************************************************************************************/
269
270 double QualityScores::calculateAverage(){
271         
272         double aveQScore = 0.0000;
273         
274         for(int i=0;i<seqLength;i++){
275                 aveQScore += (double) qScores[i];
276         }
277         aveQScore /= (double) seqLength;
278         
279         return aveQScore;
280 }
281
282 /**************************************************************************************************/
283
284 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
285         try {
286                 string rawSequence = sequence.getUnaligned();
287                 bool success = 0;       //guilty until proven innocent
288                 
289                 if(seqName != sequence.getName())       {
290                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
291                         m->mothurOutEndLine();  
292                 } 
293                         
294                 double aveQScore = calculateAverage();
295                 
296                 if(aveQScore >= qAverage)       {       success = 1;    }
297                 else                                            {       success = 0;    }
298                 
299                 return success;
300         }
301         catch(exception& e) {
302                 m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
303                 exit(1);
304         }
305 }
306
307 /**************************************************************************************************/