5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile, int l){
32 m = MothurOut::getInstance();
40 while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13 || c == -1){ break; } } // get rest of line
42 if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
44 seqName = seqName.substr(1);
47 //m->getline(qFile, line);
48 //istringstream qualStream(line);
51 // qualStream >> score;
52 // qScores.push_back(score);
56 //seqLength = qScores.size();
64 //cout << name << endl;
65 if (name.length() != 0) {
66 saveName = name.substr(1);
69 if (c == 10 || c == 13){ break; }
76 char letter= in.get();
77 if(letter == '>'){ in.putback(letter); break; }
78 else{ scores += letter; }
81 //istringstream iss (scores,istringstream::in);
83 //int count = 0; int tempScore;
84 //while (iss) { iss >> tempScore; count++; }
85 //cout << saveName << '\t' << count << endl;
92 for(int i=0;i<seqLength;i++){
94 qScores.push_back(score);
100 m->errorOut(e, "QualityScores", "QualityScores");
105 /**************************************************************************************************/
107 string QualityScores::getName(){
112 catch(exception& e) {
113 m->errorOut(e, "QualityScores", "getName");
118 /**************************************************************************************************/
120 void QualityScores::printQScores(ofstream& qFile){
123 double aveQScore = calculateAverage();
125 qFile << '>' << seqName << '\t' << aveQScore << endl;
127 for(int i=0;i<seqLength;i++){
128 qFile << qScores[i] << ' ';
133 catch(exception& e) {
134 m->errorOut(e, "QualityScores", "printQScores");
139 /**************************************************************************************************/
141 void QualityScores::trimQScores(int start, int end){
146 hold = vector<int>(qScores.begin()+start, qScores.end());
150 if(qScores.size() > end){
151 hold = vector<int>(qScores.begin(), qScores.begin()+end);
156 seqLength = qScores.size();
158 catch(exception& e) {
159 m->errorOut(e, "QualityScores", "trimQScores");
164 /**************************************************************************************************/
166 void QualityScores::flipQScores(){
169 vector<int> temp = qScores;
170 for(int i=0;i<seqLength;i++){
171 qScores[seqLength - i - 1] = temp[i];
175 catch(exception& e) {
176 m->errorOut(e, "QualityScores", "flipQScores");
181 /**************************************************************************************************/
183 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
185 string rawSequence = sequence.getUnaligned();
186 int seqLength = sequence.getNumBases();
188 if(seqName != sequence.getName()){
189 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
190 m->mothurOutEndLine();
194 for(int i=0;i<seqLength;i++){
196 if(qScores[i] < qThreshold){
202 if (end == (seqLength-1)) { end = seqLength; }
204 sequence.setUnaligned(rawSequence.substr(0,end));
205 trimQScores(-1, end);
209 catch(exception& e) {
210 m->errorOut(e, "QualityScores", "flipQScores");
216 /**************************************************************************************************/
218 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
220 string rawSequence = sequence.getUnaligned();
221 int seqLength = sequence.getNumBases();
223 if(seqName != sequence.getName()){
224 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
225 m->mothurOutEndLine();
229 double rollingSum = 0.0000;
231 for(int i=0;i<seqLength;i++){
233 rollingSum += (double)qScores[i];
235 if(rollingSum / (double)(i+1) < qThreshold){
237 // cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
242 if(end == -1){ end = seqLength; }
245 sequence.setUnaligned(rawSequence.substr(0,end));
246 trimQScores(-1, end);
251 catch(exception& e) {
252 m->errorOut(e, "QualityScores", "flipQScores");
258 /**************************************************************************************************/
260 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
262 string rawSequence = sequence.getUnaligned();
263 int seqLength = sequence.getNumBases();
265 if(seqName != sequence.getName()){
266 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
267 m->mothurOutEndLine();
270 int end = windowSize;
273 if(seqLength < windowSize) { return 0; }
275 while(start < seqLength){
276 double windowSum = 0.0000;
278 for(int i=start;i<end;i++){
279 windowSum += qScores[i];
281 double windowAverage = windowSum / (double)(end-start);
283 if(windowAverage < qThreshold){
284 end = end - stepSize;
288 end = start + windowSize;
289 if(end >= seqLength){ end = seqLength - 1; }
293 if(end == -1){ end = seqLength; }
296 sequence.setUnaligned(rawSequence.substr(0,end));
297 trimQScores(-1, end);
301 catch(exception& e) {
302 m->errorOut(e, "QualityScores", "flipQScores");
308 /**************************************************************************************************/
310 double QualityScores::calculateAverage(){
312 double aveQScore = 0.0000;
314 for(int i=0;i<seqLength;i++){
315 aveQScore += (double) qScores[i];
317 aveQScore /= (double) seqLength;
322 /**************************************************************************************************/
324 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
326 string rawSequence = sequence.getUnaligned();
327 bool success = 0; //guilty until proven innocent
329 if(seqName != sequence.getName()) {
330 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
331 m->mothurOutEndLine();
334 double aveQScore = calculateAverage();
336 if(aveQScore >= qAverage) { success = 1; }
337 else { success = 0; }
341 catch(exception& e) {
342 m->errorOut(e, "QualityScores", "cullQualAverage");
347 /**************************************************************************************************/
349 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
352 int seqLength = errorSeq.size();
353 int qIndex = start - 1;
354 for(int i=0;i<seqLength;i++){
356 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
357 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
358 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
359 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
360 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
362 if(errorSeq[i] != 'd') { qIndex++; }
366 catch(exception& e) {
367 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
372 /**************************************************************************************************/
374 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
378 for(int i=start-1;i<stop;i++){
379 forwardMap[index++][qScores[i]] += weight;
383 catch(exception& e) {
384 m->errorOut(e, "QualityScores", "updateForwardMap");
389 /**************************************************************************************************/
391 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
395 for(int i=stop-1;i>=start;i--){
396 reverseMap[index++][qScores[i]] += weight;
400 catch(exception& e) {
401 m->errorOut(e, "QualityScores", "updateForwardMap");
406 /**************************************************************************************************/