5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
47 cout << seqName << endl;
48 string qScoreString = m->getline(qFile);
49 while(qFile.peek() != '>' && qFile.peek() != EOF){
50 qScoreString += ' ' + m->getline(qFile);
53 istringstream qScoreStringStream(qScoreString);
55 while(!qScoreStringStream.eof()){
56 if (m->control_pressed) { break; }
57 qScoreStringStream >> score;
58 qScores.push_back(score);
59 cout << score << '\t' << count << endl;
64 // string scores = "";
66 // while(!qFile.eof()){
71 // if (seqName.length() != 0) {
72 // seqName = seqName.substr(1);
73 // while (!qFile.eof()) {
74 // char c = qFile.get();
75 // //gobble junk on line
76 // if (c == 10 || c == 13){ break; }
83 // char letter=qFile.get();
84 // if((letter == '>')){ qFile.putback(letter); break; }
85 // else if (isprint(letter)) { scores += letter; }
87 // cout << scores << endl;
93 // //convert scores string to qScores
94 // istringstream qScoreStringStream(scores);
97 // while(!qScoreStringStream.eof()){
99 // if (m->control_pressed) { break; }
101 // qScoreStringStream >> score;
102 // qScores.push_back(score);
105 // qScores.pop_back();
107 seqLength = qScores.size();
111 catch(exception& e) {
112 m->errorOut(e, "QualityScores", "QualityScores");
118 /**************************************************************************************************/
120 string QualityScores::getName(){
123 cout << qScores.size() << '\t';
126 catch(exception& e) {
127 m->errorOut(e, "QualityScores", "getName");
132 /**************************************************************************************************/
134 void QualityScores::printQScores(ofstream& qFile){
137 double aveQScore = calculateAverage();
139 qFile << '>' << seqName << '\t' << aveQScore << endl;
141 for(int i=0;i<seqLength;i++){
142 qFile << qScores[i] << ' ';
147 catch(exception& e) {
148 m->errorOut(e, "QualityScores", "printQScores");
153 /**************************************************************************************************/
155 void QualityScores::trimQScores(int start, int end){
159 cout << seqName << '\t' << qScores.size() << '\t' << start << '\t' << end << endl;
162 hold = vector<int>(qScores.begin()+start, qScores.end());
166 if(qScores.size() > end){
167 hold = vector<int>(qScores.begin(), qScores.begin()+end);
172 seqLength = qScores.size();
174 catch(exception& e) {
175 m->errorOut(e, "QualityScores", "trimQScores");
180 /**************************************************************************************************/
182 void QualityScores::flipQScores(){
185 vector<int> temp = qScores;
186 for(int i=0;i<seqLength;i++){
187 qScores[seqLength - i - 1] = temp[i];
191 catch(exception& e) {
192 m->errorOut(e, "QualityScores", "flipQScores");
197 /**************************************************************************************************/
199 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
201 string rawSequence = sequence.getUnaligned();
202 int seqLength = sequence.getNumBases();
204 if(seqName != sequence.getName()){
205 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
206 m->mothurOutEndLine();
210 for(int i=0;i<seqLength;i++){
212 if(qScores[i] < qThreshold){
218 if (end == (seqLength-1)) { end = seqLength; }
220 sequence.setUnaligned(rawSequence.substr(0,end));
221 trimQScores(-1, end);
225 catch(exception& e) {
226 m->errorOut(e, "QualityScores", "flipQScores");
232 /**************************************************************************************************/
234 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
236 string rawSequence = sequence.getUnaligned();
237 int seqLength = sequence.getNumBases();
239 if(seqName != sequence.getName()){
240 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
241 m->mothurOutEndLine();
245 double rollingSum = 0.0000;
247 for(int i=0;i<seqLength;i++){
249 rollingSum += (double)qScores[i];
251 if(rollingSum / (double)(i+1) < qThreshold){
257 if(end == -1){ end = seqLength; }
260 sequence.setUnaligned(rawSequence.substr(0,end));
261 trimQScores(-1, end);
266 catch(exception& e) {
267 m->errorOut(e, "QualityScores", "flipQScores");
273 /**************************************************************************************************/
275 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
277 string rawSequence = sequence.getUnaligned();
278 int seqLength = sequence.getNumBases();
280 if(seqName != sequence.getName()){
281 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
282 m->mothurOutEndLine();
285 int end = windowSize;
288 if(seqLength < windowSize) { return 0; }
290 while(start < seqLength){
291 double windowSum = 0.0000;
293 for(int i=start;i<end;i++){
294 windowSum += qScores[i];
296 double windowAverage = windowSum / (double)(end-start);
298 if(windowAverage < qThreshold){
299 end = end - stepSize;
303 end = start + windowSize;
304 if(end >= seqLength){ end = seqLength - 1; }
307 if(end == -1){ end = seqLength; }
309 sequence.setUnaligned(rawSequence.substr(0,end));
310 trimQScores(-1, end);
314 catch(exception& e) {
315 m->errorOut(e, "QualityScores", "flipQScores");
321 /**************************************************************************************************/
323 double QualityScores::calculateAverage(){
325 double aveQScore = 0.0000;
327 for(int i=0;i<seqLength;i++){
328 aveQScore += (double) qScores[i];
330 aveQScore /= (double) seqLength;
335 /**************************************************************************************************/
337 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
339 string rawSequence = sequence.getUnaligned();
340 bool success = 0; //guilty until proven innocent
342 if(seqName != sequence.getName()) {
343 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
344 m->mothurOutEndLine();
347 double aveQScore = calculateAverage();
349 if(aveQScore >= qAverage) { success = 1; }
350 else { success = 0; }
354 catch(exception& e) {
355 m->errorOut(e, "QualityScores", "cullQualAverage");
360 /**************************************************************************************************/
362 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
365 int seqLength = errorSeq.size();
367 int qIndex = start - 1;
369 for(int i=0;i<seqLength;i++){
371 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
372 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
373 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
374 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
375 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
377 if(errorSeq[i] != 'd') { qIndex++; }
378 if(qIndex > stop){ break; }
381 catch(exception& e) {
382 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
387 /**************************************************************************************************/
389 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
393 for(int i=start-1;i<stop;i++){
394 forwardMap[index++][qScores[i]] += weight;
398 catch(exception& e) {
399 m->errorOut(e, "QualityScores", "updateForwardMap");
404 /**************************************************************************************************/
406 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
410 for(int i=stop-1;i>=start;i--){
411 reverseMap[index++][qScores[i]] += weight;
415 catch(exception& e) {
416 m->errorOut(e, "QualityScores", "updateForwardMap");
421 /**************************************************************************************************/