5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
48 string qScoreString = m->getline(qFile);
49 while(qFile.peek() != '>' && qFile.peek() != EOF){
50 qScoreString += ' ' + m->getline(qFile);
53 istringstream qScoreStringStream(qScoreString);
55 while(!qScoreStringStream.eof()){
56 if (m->control_pressed) { break; }
57 qScoreStringStream >> score; m->gobble(qScoreStringStream);
59 qScores.push_back(score);
64 // string scores = "";
66 // while(!qFile.eof()){
71 // if (seqName.length() != 0) {
72 // seqName = seqName.substr(1);
73 // while (!qFile.eof()) {
74 // char c = qFile.get();
75 // //gobble junk on line
76 // if (c == 10 || c == 13){ break; }
83 // char letter=qFile.get();
84 // if((letter == '>')){ qFile.putback(letter); break; }
85 // else if (isprint(letter)) { scores += letter; }
92 // //convert scores string to qScores
93 // istringstream qScoreStringStream(scores);
96 // while(!qScoreStringStream.eof()){
98 // if (m->control_pressed) { break; }
100 // qScoreStringStream >> score;
101 // qScores.push_back(score);
104 // qScores.pop_back();
106 seqLength = qScores.size();
107 //cout << "seqlenght = " << seqLength << '\t' << count << endl;
110 catch(exception& e) {
111 m->errorOut(e, "QualityScores", "QualityScores");
117 /**************************************************************************************************/
119 string QualityScores::getName(){
124 catch(exception& e) {
125 m->errorOut(e, "QualityScores", "getName");
130 /**************************************************************************************************/
132 void QualityScores::printQScores(ofstream& qFile){
135 double aveQScore = calculateAverage();
137 qFile << '>' << seqName << '\t' << aveQScore << endl;
139 for(int i=0;i<seqLength;i++){
140 qFile << qScores[i] << ' ';
145 catch(exception& e) {
146 m->errorOut(e, "QualityScores", "printQScores");
151 /**************************************************************************************************/
153 void QualityScores::trimQScores(int start, int end){
158 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
159 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
161 hold = vector<int>(qScores.begin()+start, qScores.end());
165 if(qScores.size() > end){
166 hold = vector<int>(qScores.begin(), qScores.begin()+end);
171 seqLength = qScores.size();
173 catch(exception& e) {
174 m->errorOut(e, "QualityScores", "trimQScores");
179 /**************************************************************************************************/
181 void QualityScores::flipQScores(){
184 vector<int> temp = qScores;
185 for(int i=0;i<seqLength;i++){
186 qScores[seqLength - i - 1] = temp[i];
190 catch(exception& e) {
191 m->errorOut(e, "QualityScores", "flipQScores");
196 /**************************************************************************************************/
198 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
200 string rawSequence = sequence.getUnaligned();
201 int seqLength = sequence.getNumBases();
203 if(seqName != sequence.getName()){
204 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
205 m->mothurOutEndLine();
209 for(int i=0;i<seqLength;i++){
211 if(qScores[i] < qThreshold){
217 if (end == (seqLength-1)) { end = seqLength; }
219 sequence.setUnaligned(rawSequence.substr(0,end));
220 trimQScores(-1, end);
224 catch(exception& e) {
225 m->errorOut(e, "QualityScores", "flipQScores");
231 /**************************************************************************************************/
233 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
235 string rawSequence = sequence.getUnaligned();
236 int seqLength = sequence.getNumBases();
238 if(seqName != sequence.getName()){
239 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
240 m->mothurOutEndLine();
244 double rollingSum = 0.0000;
246 for(int i=0;i<seqLength;i++){
248 rollingSum += (double)qScores[i];
250 if(rollingSum / (double)(i+1) < qThreshold){
256 if(end == -1){ end = seqLength; }
259 sequence.setUnaligned(rawSequence.substr(0,end));
260 trimQScores(-1, end);
265 catch(exception& e) {
266 m->errorOut(e, "QualityScores", "flipQScores");
272 /**************************************************************************************************/
274 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
276 string rawSequence = sequence.getUnaligned();
277 int seqLength = sequence.getNumBases();
279 if(seqName != sequence.getName()){
280 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
281 m->mothurOutEndLine();
284 int end = windowSize;
287 if(seqLength < windowSize) { return 0; }
289 while(start < seqLength){
290 double windowSum = 0.0000;
292 for(int i=start;i<end;i++){
293 windowSum += qScores[i];
295 double windowAverage = windowSum / (double)(end-start);
297 if(windowAverage < qThreshold){
298 end = end - stepSize;
302 end = start + windowSize;
303 if(end >= seqLength){ end = seqLength - 1; }
306 if(end == -1){ end = seqLength; }
308 //failed first window
309 if (end < windowSize) { return 0; }
311 sequence.setUnaligned(rawSequence.substr(0,end));
312 trimQScores(-1, end);
316 catch(exception& e) {
317 m->errorOut(e, "QualityScores", "flipQScores");
323 /**************************************************************************************************/
325 double QualityScores::calculateAverage(){
327 double aveQScore = 0.0000;
329 for(int i=0;i<seqLength;i++){
330 aveQScore += (double) qScores[i];
332 aveQScore /= (double) seqLength;
337 /**************************************************************************************************/
339 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
341 string rawSequence = sequence.getUnaligned();
342 bool success = 0; //guilty until proven innocent
344 if(seqName != sequence.getName()) {
345 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
346 m->mothurOutEndLine();
349 double aveQScore = calculateAverage();
351 if(aveQScore >= qAverage) { success = 1; }
352 else { success = 0; }
356 catch(exception& e) {
357 m->errorOut(e, "QualityScores", "cullQualAverage");
362 /**************************************************************************************************/
364 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
367 int seqLength = errorSeq.size();
369 int qIndex = start - 1;
371 for(int i=0;i<seqLength;i++){
373 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
374 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
375 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
376 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
377 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
379 if(errorSeq[i] != 'd') { qIndex++; }
380 if(qIndex > stop){ break; }
383 catch(exception& e) {
384 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
389 /**************************************************************************************************/
391 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
395 for(int i=start-1;i<stop;i++){
396 forwardMap[index++][qScores[i]] += weight;
400 catch(exception& e) {
401 m->errorOut(e, "QualityScores", "updateForwardMap");
406 /**************************************************************************************************/
408 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
412 for(int i=stop-1;i>=start;i--){
413 reverseMap[index++][qScores[i]] += weight;
417 catch(exception& e) {
418 m->errorOut(e, "QualityScores", "updateForwardMap");
423 /**************************************************************************************************/