]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
pat's slight modifications to trim.seqs
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "QualityScores", "QualityScores");
22                 exit(1);
23         }                                                       
24 }
25
26 /**************************************************************************************************/
27
28 QualityScores::QualityScores(ifstream& qFile, int l){
29         try {
30                 
31                 m = MothurOut::getInstance();
32
33                 seqName = "";
34                 seqLength = l;
35                 int score;
36                 
37                 string line;
38                 getline(qFile, line); gobble(qFile);
39                 istringstream nameStream(line);
40         
41                 nameStream >> seqName;
42                 seqName = seqName.substr(1); 
43
44                 //getline(qFile, line);
45                 //istringstream qualStream(line);
46         
47                 //while(qualStream){
48                 //      qualStream >> score;
49                 //      qScores.push_back(score);
50                 //}
51                 //qScores.pop_back();
52                 
53                 //seqLength = qScores.size();   
54                 
55                 for(int i=0;i<seqLength;i++){
56                         qFile >> score;
57                         qScores.push_back(score);
58                 }
59                 gobble(qFile);
60
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "QualityScores", "QualityScores");
64                 exit(1);
65         }                                                       
66
67 }
68
69 /**************************************************************************************************/
70
71 string QualityScores::getName(){
72         
73         try {
74                 return seqName;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "QualityScores", "getName");
78                 exit(1);
79         }                                                       
80 }
81
82 /**************************************************************************************************/
83
84 void QualityScores::printQScores(ofstream& qFile){
85         try {
86                 
87                 double aveQScore = calculateAverage();
88                 
89                 qFile << '>' << seqName << '\t' << aveQScore << endl;
90                 
91                 for(int i=0;i<seqLength;i++){
92                         qFile << qScores[i] << ' ';
93                 }
94                 qFile << endl;
95                                 
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "QualityScores", "printQScores");
99                 exit(1);
100         }                                                       
101 }
102
103 /**************************************************************************************************/
104
105 void QualityScores::trimQScores(int start, int end){
106         try {
107                 vector<int> hold;
108                 
109                 if(end == -1){          
110                         hold = vector<int>(qScores.begin()+start, qScores.end());
111                         qScores = hold;         
112                 }
113                 if(start == -1){
114                         hold = vector<int>(qScores.begin(), qScores.begin()+end);       //not sure if indexing is correct
115                         qScores = hold;         
116                 }
117
118                 seqLength = qScores.size();
119         }
120         catch(exception& e) {
121                 m->errorOut(e, "QualityScores", "trimQScores");
122                 exit(1);
123         }                                                       
124 }
125
126 /**************************************************************************************************/
127
128 void QualityScores::flipQScores(){
129         try {
130                 
131                 vector<int> temp = qScores;
132                 for(int i=0;i<seqLength;i++){
133                         qScores[seqLength - i - 1] = temp[i];
134                 }
135                 
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "QualityScores", "flipQScores");
139                 exit(1);
140         }                                                       
141 }
142
143 /**************************************************************************************************/
144
145 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
146         try {
147                 string rawSequence = sequence.getUnaligned();
148                 int seqLength = sequence.getNumBases();
149                 
150                 if(seqName != sequence.getName()){
151                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
152                         m->mothurOutEndLine();  
153                 }
154                 
155                 int end;
156                 for(int i=0;i<seqLength;i++){
157                         end = i;
158                         if(qScores[i] < qThreshold){
159                                 break;
160                         }
161                 }
162                 
163                 sequence.setUnaligned(rawSequence.substr(0,end));
164                 trimQScores(-1, end);
165         
166                 return 1;
167         }
168         catch(exception& e) {
169                 m->errorOut(e, "QualityScores", "flipQScores");
170                 exit(1);
171         }                                                       
172         
173 }
174
175 /**************************************************************************************************/
176
177 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
178         try {
179                 string rawSequence = sequence.getUnaligned();
180                 int seqLength = sequence.getNumBases();
181                 
182                 if(seqName != sequence.getName()){
183                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
184                         m->mothurOutEndLine();  
185                 }
186                 
187                 int end = -1;
188                 double rollingSum = 0.0000;
189                 
190                 for(int i=0;i<seqLength;i++){
191
192                         rollingSum += (double)qScores[i];
193                         
194                         if(rollingSum / (double)(i+1) < qThreshold){
195                                 end = i;
196 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
197                                 break;
198                         }
199                 }
200                 
201                 if(end == -1){  end = seqLength;        }
202                 
203                 sequence.setUnaligned(rawSequence.substr(0,end));
204                 trimQScores(-1, end);
205                 
206                 return 1;
207         }
208         catch(exception& e) {
209                 m->errorOut(e, "QualityScores", "flipQScores");
210                 exit(1);
211         }                                                       
212         
213 }
214
215 /**************************************************************************************************/
216
217 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
218         try {
219                 string rawSequence = sequence.getUnaligned();
220                 int seqLength = sequence.getNumBases();
221                 
222                 if(seqName != sequence.getName()){
223                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
224                         m->mothurOutEndLine();
225                 }
226                 
227                 int end = windowSize;
228                 int start = 0;
229                 
230
231                 while(start < seqLength){
232                         double windowSum = 0.0000;
233
234                         for(int i=start;i<end;i++){
235                                 windowSum += qScores[i];                                
236                         }
237                         double windowAverage = windowSum / (double)(end-start);
238                         
239                         if(windowAverage < qThreshold){
240                                 end = end - stepSize;
241                                 break;
242                         }
243                         start += stepSize;
244                         end = start + windowSize;
245                         if(end >= seqLength){   end = seqLength - 1;    }
246                 }
247                 
248                 
249                 if(end == -1){  end = seqLength;        }
250                 
251                 sequence.setUnaligned(rawSequence.substr(0,end));
252                 trimQScores(-1, end);
253                 
254                 return 1;
255         }
256         catch(exception& e) {
257                 m->errorOut(e, "QualityScores", "flipQScores");
258                 exit(1);
259         }                                                       
260         
261 }
262
263 /**************************************************************************************************/
264
265 double QualityScores::calculateAverage(){
266         
267         double aveQScore = 0.0000;
268         
269         for(int i=0;i<seqLength;i++){
270                 aveQScore += (double) qScores[i];
271         }
272         aveQScore /= (double) seqLength;
273         
274         return aveQScore;
275 }
276
277 /**************************************************************************************************/
278
279 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
280         try {
281                 string rawSequence = sequence.getUnaligned();
282                 bool success = 0;       //guilty until proven innocent
283                 
284                 if(seqName != sequence.getName())       {
285                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
286                         m->mothurOutEndLine();  
287                 } 
288                         
289                 double aveQScore = calculateAverage();
290                 
291                 if(aveQScore >= qAverage)       {       success = 1;    }
292                 else                                            {       success = 0;    }
293                 
294                 return success;
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
298                 exit(1);
299         }
300 }
301
302 /**************************************************************************************************/