5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
35 seqName = getSequenceName(qFile);
37 if (!m->control_pressed) {
38 string qScoreString = m->getline(qFile);
39 //cout << qScoreString << endl;
40 while(qFile.peek() != '>' && qFile.peek() != EOF){
41 if (m->control_pressed) { break; }
42 string temp = m->getline(qFile);
43 //cout << temp << endl;
44 qScoreString += ' ' + temp;
46 //cout << "done reading " << endl;
47 istringstream qScoreStringStream(qScoreString);
49 while(!qScoreStringStream.eof()){
50 if (m->control_pressed) { break; }
52 qScoreStringStream >> temp; m->gobble(qScoreStringStream);
54 //check temp to make sure its a number
55 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
58 //cout << count << '\t' << score << endl;
59 qScores.push_back(score);
64 seqLength = qScores.size();
65 //cout << "seqlength = " << seqLength << '\t' << count << endl;
69 m->errorOut(e, "QualityScores", "QualityScores");
74 //********************************************************************************************************************
75 string QualityScores::getSequenceName(ifstream& qFile) {
82 if (name.length() != 0) {
84 name = name.substr(1);
86 for (int i = 0; i < name.length(); i++) {
87 if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
90 }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
95 m->errorOut(e, "QualityScores", "getSequenceName");
99 //********************************************************************************************************************
100 void QualityScores::setName(string name) {
103 for (int i = 0; i < name.length(); i++) {
104 if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
109 catch(exception& e) {
110 m->errorOut(e, "QualityScores", "setName");
114 /**************************************************************************************************/
116 string QualityScores::getName(){
121 catch(exception& e) {
122 m->errorOut(e, "QualityScores", "getName");
127 /**************************************************************************************************/
129 void QualityScores::printQScores(ofstream& qFile){
132 double aveQScore = calculateAverage();
134 qFile << '>' << seqName << '\t' << aveQScore << endl;
136 for(int i=0;i<seqLength;i++){
137 qFile << qScores[i] << ' ';
142 catch(exception& e) {
143 m->errorOut(e, "QualityScores", "printQScores");
148 /**************************************************************************************************/
150 void QualityScores::trimQScores(int start, int end){
155 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
156 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
158 hold = vector<int>(qScores.begin()+start, qScores.end());
162 if(qScores.size() > end){
163 hold = vector<int>(qScores.begin(), qScores.begin()+end);
168 seqLength = qScores.size();
170 catch(exception& e) {
171 m->errorOut(e, "QualityScores", "trimQScores");
176 /**************************************************************************************************/
178 void QualityScores::flipQScores(){
181 vector<int> temp = qScores;
182 for(int i=0;i<seqLength;i++){
183 qScores[seqLength - i - 1] = temp[i];
187 catch(exception& e) {
188 m->errorOut(e, "QualityScores", "flipQScores");
193 /**************************************************************************************************/
195 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
197 string rawSequence = sequence.getUnaligned();
198 int seqLength = sequence.getNumBases();
200 if(seqName != sequence.getName()){
201 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
202 m->mothurOutEndLine();
206 for(int i=0;i<seqLength;i++){
208 if(qScores[i] < qThreshold){
214 if (end == (seqLength-1)) { end = seqLength; }
216 sequence.setUnaligned(rawSequence.substr(0,end));
217 trimQScores(-1, end);
221 catch(exception& e) {
222 m->errorOut(e, "QualityScores", "flipQScores");
228 /**************************************************************************************************/
230 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
232 string rawSequence = sequence.getUnaligned();
233 int seqLength = sequence.getNumBases();
235 if(seqName != sequence.getName()){
236 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
237 m->mothurOutEndLine();
241 double rollingSum = 0.0000;
243 for(int i=0;i<seqLength;i++){
245 rollingSum += (double)qScores[i];
247 if(rollingSum / (double)(i+1) < qThreshold){
253 if(end == -1){ end = seqLength; }
256 sequence.setUnaligned(rawSequence.substr(0,end));
257 trimQScores(-1, end);
262 catch(exception& e) {
263 m->errorOut(e, "QualityScores", "flipQScores");
269 /**************************************************************************************************/
271 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
273 string rawSequence = sequence.getUnaligned();
274 int seqLength = sequence.getNumBases();
276 if(seqName != sequence.getName()){
277 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
278 m->mothurOutEndLine();
281 int end = windowSize;
284 if(seqLength < windowSize) { return 0; }
286 while((start+windowSize) < seqLength){
287 double windowSum = 0.0000;
289 for(int i=start;i<end;i++){
290 windowSum += qScores[i];
292 double windowAverage = windowSum / (double)(end-start);
294 if(windowAverage < qThreshold){
295 end = end - stepSize;
300 end = start + windowSize;
302 if(end >= seqLength){ end = seqLength; }
306 if(end == -1){ end = seqLength; }
308 //failed first window
309 if (end < windowSize) { return 0; }
311 sequence.setUnaligned(rawSequence.substr(0,end));
312 trimQScores(-1, end);
316 catch(exception& e) {
317 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
323 /**************************************************************************************************/
325 double QualityScores::calculateAverage(){
327 double aveQScore = 0.0000;
329 for(int i=0;i<seqLength;i++){
330 aveQScore += (double) qScores[i];
332 aveQScore /= (double) seqLength;
337 /**************************************************************************************************/
339 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
341 string rawSequence = sequence.getUnaligned();
342 bool success = 0; //guilty until proven innocent
344 if(seqName != sequence.getName()) {
345 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
346 m->mothurOutEndLine();
349 double aveQScore = calculateAverage();
351 if(aveQScore >= qAverage) { success = 1; }
352 else { success = 0; }
356 catch(exception& e) {
357 m->errorOut(e, "QualityScores", "cullQualAverage");
362 /**************************************************************************************************/
364 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
367 int seqLength = errorSeq.size();
369 int qIndex = start - 1;
371 for(int i=0;i<seqLength;i++){
373 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
374 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
375 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
376 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; /*if(qScores[qIndex] != 0){ cout << qIndex << '\t'; }*/ }
377 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
379 if(errorSeq[i] != 'd') { qIndex++; }
381 if(qIndex > stop){ break; }
384 catch(exception& e) {
385 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
390 /**************************************************************************************************/
392 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
396 for(int i=start-1;i<stop;i++){
397 forwardMap[index++][qScores[i]] += weight;
401 catch(exception& e) {
402 m->errorOut(e, "QualityScores", "updateForwardMap");
407 /**************************************************************************************************/
409 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
413 for(int i=stop-1;i>=start-1;i--){
414 reverseMap[index++][qScores[i]] += weight;
418 catch(exception& e) {
419 m->errorOut(e, "QualityScores", "updateReverseMap");
424 /**************************************************************************************************/