5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
47 string qScoreString = m->getline(qFile);
48 while(qFile.peek() != '>' && qFile.peek() != EOF){
49 qScoreString += ' ' + m->getline(qFile);
52 istringstream qScoreStringStream(qScoreString);
54 while(!qScoreStringStream.eof()){
55 if (m->control_pressed) { break; }
56 qScoreStringStream >> score;
57 qScores.push_back(score);
62 // string scores = "";
64 // while(!qFile.eof()){
69 // if (seqName.length() != 0) {
70 // seqName = seqName.substr(1);
71 // while (!qFile.eof()) {
72 // char c = qFile.get();
73 // //gobble junk on line
74 // if (c == 10 || c == 13){ break; }
81 // char letter=qFile.get();
82 // if((letter == '>')){ qFile.putback(letter); break; }
83 // else if (isprint(letter)) { scores += letter; }
85 // cout << scores << endl;
91 // //convert scores string to qScores
92 // istringstream qScoreStringStream(scores);
95 // while(!qScoreStringStream.eof()){
97 // if (m->control_pressed) { break; }
99 // qScoreStringStream >> score;
100 // qScores.push_back(score);
103 // qScores.pop_back();
105 seqLength = qScores.size();
109 catch(exception& e) {
110 m->errorOut(e, "QualityScores", "QualityScores");
116 /**************************************************************************************************/
118 string QualityScores::getName(){
121 cout << qScores.size() << '\t';
124 catch(exception& e) {
125 m->errorOut(e, "QualityScores", "getName");
130 /**************************************************************************************************/
132 void QualityScores::printQScores(ofstream& qFile){
135 double aveQScore = calculateAverage();
137 qFile << '>' << seqName << '\t' << aveQScore << endl;
139 for(int i=0;i<seqLength;i++){
140 qFile << qScores[i] << ' ';
145 catch(exception& e) {
146 m->errorOut(e, "QualityScores", "printQScores");
151 /**************************************************************************************************/
153 void QualityScores::trimQScores(int start, int end){
157 cout << seqName << '\t' << qScores.size() << '\t' << start << '\t' << end << endl;
160 hold = vector<int>(qScores.begin()+start, qScores.end());
164 if(qScores.size() > end){
165 hold = vector<int>(qScores.begin(), qScores.begin()+end);
170 seqLength = qScores.size();
172 catch(exception& e) {
173 m->errorOut(e, "QualityScores", "trimQScores");
178 /**************************************************************************************************/
180 void QualityScores::flipQScores(){
183 vector<int> temp = qScores;
184 for(int i=0;i<seqLength;i++){
185 qScores[seqLength - i - 1] = temp[i];
189 catch(exception& e) {
190 m->errorOut(e, "QualityScores", "flipQScores");
195 /**************************************************************************************************/
197 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
199 string rawSequence = sequence.getUnaligned();
200 int seqLength = sequence.getNumBases();
202 if(seqName != sequence.getName()){
203 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
204 m->mothurOutEndLine();
208 for(int i=0;i<seqLength;i++){
210 if(qScores[i] < qThreshold){
216 if (end == (seqLength-1)) { end = seqLength; }
218 sequence.setUnaligned(rawSequence.substr(0,end));
219 trimQScores(-1, end);
223 catch(exception& e) {
224 m->errorOut(e, "QualityScores", "flipQScores");
230 /**************************************************************************************************/
232 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
234 string rawSequence = sequence.getUnaligned();
235 int seqLength = sequence.getNumBases();
237 if(seqName != sequence.getName()){
238 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
239 m->mothurOutEndLine();
243 double rollingSum = 0.0000;
245 for(int i=0;i<seqLength;i++){
247 rollingSum += (double)qScores[i];
249 if(rollingSum / (double)(i+1) < qThreshold){
255 if(end == -1){ end = seqLength; }
258 sequence.setUnaligned(rawSequence.substr(0,end));
259 trimQScores(-1, end);
264 catch(exception& e) {
265 m->errorOut(e, "QualityScores", "flipQScores");
271 /**************************************************************************************************/
273 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
275 string rawSequence = sequence.getUnaligned();
276 int seqLength = sequence.getNumBases();
278 if(seqName != sequence.getName()){
279 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
280 m->mothurOutEndLine();
283 int end = windowSize;
286 if(seqLength < windowSize) { return 0; }
288 while(start < seqLength){
289 double windowSum = 0.0000;
291 for(int i=start;i<end;i++){
292 windowSum += qScores[i];
294 double windowAverage = windowSum / (double)(end-start);
296 if(windowAverage < qThreshold){
297 end = end - stepSize;
301 end = start + windowSize;
302 if(end >= seqLength){ end = seqLength - 1; }
305 if(end == -1){ end = seqLength; }
307 sequence.setUnaligned(rawSequence.substr(0,end));
308 trimQScores(-1, end);
312 catch(exception& e) {
313 m->errorOut(e, "QualityScores", "flipQScores");
319 /**************************************************************************************************/
321 double QualityScores::calculateAverage(){
323 double aveQScore = 0.0000;
325 for(int i=0;i<seqLength;i++){
326 aveQScore += (double) qScores[i];
328 aveQScore /= (double) seqLength;
333 /**************************************************************************************************/
335 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
337 string rawSequence = sequence.getUnaligned();
338 bool success = 0; //guilty until proven innocent
340 if(seqName != sequence.getName()) {
341 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
342 m->mothurOutEndLine();
345 double aveQScore = calculateAverage();
347 if(aveQScore >= qAverage) { success = 1; }
348 else { success = 0; }
352 catch(exception& e) {
353 m->errorOut(e, "QualityScores", "cullQualAverage");
358 /**************************************************************************************************/
360 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
363 int seqLength = errorSeq.size();
365 int qIndex = start - 1;
367 for(int i=0;i<seqLength;i++){
369 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
370 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
371 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
372 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
373 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
375 if(errorSeq[i] != 'd') { qIndex++; }
376 if(qIndex > stop){ break; }
379 catch(exception& e) {
380 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
385 /**************************************************************************************************/
387 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
391 for(int i=start-1;i<stop;i++){
392 forwardMap[index++][qScores[i]] += weight;
396 catch(exception& e) {
397 m->errorOut(e, "QualityScores", "updateForwardMap");
402 /**************************************************************************************************/
404 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
408 for(int i=stop-1;i>=start;i--){
409 reverseMap[index++][qScores[i]] += weight;
413 catch(exception& e) {
414 m->errorOut(e, "QualityScores", "updateForwardMap");
419 /**************************************************************************************************/