5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
48 string qScoreString = m->getline(qFile);
50 istringstream qScoreStringStream(qScoreString);
51 while(!qScoreStringStream.eof()){
52 qScoreStringStream >> score;
53 qScores.push_back(score);
56 seqLength = qScores.size();
59 m->errorOut(e, "QualityScores", "QualityScores");
65 /**************************************************************************************************/
67 string QualityScores::getName(){
73 m->errorOut(e, "QualityScores", "getName");
78 /**************************************************************************************************/
80 void QualityScores::printQScores(ofstream& qFile){
83 double aveQScore = calculateAverage();
85 qFile << '>' << seqName << '\t' << aveQScore << endl;
87 for(int i=0;i<seqLength;i++){
88 qFile << qScores[i] << ' ';
94 m->errorOut(e, "QualityScores", "printQScores");
99 /**************************************************************************************************/
101 void QualityScores::trimQScores(int start, int end){
106 hold = vector<int>(qScores.begin()+start, qScores.end());
110 if(qScores.size() > end){
111 hold = vector<int>(qScores.begin(), qScores.begin()+end);
116 seqLength = qScores.size();
118 catch(exception& e) {
119 m->errorOut(e, "QualityScores", "trimQScores");
124 /**************************************************************************************************/
126 void QualityScores::flipQScores(){
129 vector<int> temp = qScores;
130 for(int i=0;i<seqLength;i++){
131 qScores[seqLength - i - 1] = temp[i];
135 catch(exception& e) {
136 m->errorOut(e, "QualityScores", "flipQScores");
141 /**************************************************************************************************/
143 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
145 string rawSequence = sequence.getUnaligned();
146 int seqLength = sequence.getNumBases();
148 if(seqName != sequence.getName()){
149 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
150 m->mothurOutEndLine();
154 for(int i=0;i<seqLength;i++){
156 if(qScores[i] < qThreshold){
162 if (end == (seqLength-1)) { end = seqLength; }
164 sequence.setUnaligned(rawSequence.substr(0,end));
165 trimQScores(-1, end);
169 catch(exception& e) {
170 m->errorOut(e, "QualityScores", "flipQScores");
176 /**************************************************************************************************/
178 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
180 string rawSequence = sequence.getUnaligned();
181 int seqLength = sequence.getNumBases();
183 if(seqName != sequence.getName()){
184 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
185 m->mothurOutEndLine();
189 double rollingSum = 0.0000;
191 for(int i=0;i<seqLength;i++){
193 rollingSum += (double)qScores[i];
195 if(rollingSum / (double)(i+1) < qThreshold){
197 // cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
202 if(end == -1){ end = seqLength; }
205 sequence.setUnaligned(rawSequence.substr(0,end));
206 trimQScores(-1, end);
211 catch(exception& e) {
212 m->errorOut(e, "QualityScores", "flipQScores");
218 /**************************************************************************************************/
220 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
222 string rawSequence = sequence.getUnaligned();
223 int seqLength = sequence.getNumBases();
225 if(seqName != sequence.getName()){
226 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
227 m->mothurOutEndLine();
230 int end = windowSize;
233 if(seqLength < windowSize) { return 0; }
235 while(start < seqLength){
236 double windowSum = 0.0000;
238 for(int i=start;i<end;i++){
239 windowSum += qScores[i];
241 double windowAverage = windowSum / (double)(end-start);
243 if(windowAverage < qThreshold){
244 end = end - stepSize;
248 end = start + windowSize;
249 if(end >= seqLength){ end = seqLength - 1; }
252 if(end == -1){ end = seqLength; }
254 sequence.setUnaligned(rawSequence.substr(0,end));
255 trimQScores(-1, end);
259 catch(exception& e) {
260 m->errorOut(e, "QualityScores", "flipQScores");
266 /**************************************************************************************************/
268 double QualityScores::calculateAverage(){
270 double aveQScore = 0.0000;
272 for(int i=0;i<seqLength;i++){
273 aveQScore += (double) qScores[i];
275 aveQScore /= (double) seqLength;
280 /**************************************************************************************************/
282 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
284 string rawSequence = sequence.getUnaligned();
285 bool success = 0; //guilty until proven innocent
287 if(seqName != sequence.getName()) {
288 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
289 m->mothurOutEndLine();
292 double aveQScore = calculateAverage();
294 if(aveQScore >= qAverage) { success = 1; }
295 else { success = 0; }
299 catch(exception& e) {
300 m->errorOut(e, "QualityScores", "cullQualAverage");
305 /**************************************************************************************************/
307 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
310 int seqLength = errorSeq.size();
311 int qIndex = start - 1;
312 for(int i=0;i<seqLength;i++){
314 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
315 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
316 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
317 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
318 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
320 if(errorSeq[i] != 'd') { qIndex++; }
324 catch(exception& e) {
325 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
330 /**************************************************************************************************/
332 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
336 for(int i=start-1;i<stop;i++){
337 forwardMap[index++][qScores[i]] += weight;
341 catch(exception& e) {
342 m->errorOut(e, "QualityScores", "updateForwardMap");
347 /**************************************************************************************************/
349 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
353 for(int i=stop-1;i>=start;i--){
354 reverseMap[index++][qScores[i]] += weight;
358 catch(exception& e) {
359 m->errorOut(e, "QualityScores", "updateForwardMap");
364 /**************************************************************************************************/