5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
26 /**************************************************************************************************/
28 QualityScores::QualityScores(string n, vector<int> s){
30 m = MothurOut::getInstance();
35 m->errorOut(e, "QualityScores", "QualityScores");
39 /**************************************************************************************************/
41 QualityScores::QualityScores(ifstream& qFile){
44 m = MothurOut::getInstance();
47 seqName = getSequenceName(qFile); m->gobble(qFile);
49 if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); }
51 if (!m->control_pressed) {
52 string qScoreString = m->getline(qFile); m->gobble(qFile);
54 if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); }
56 while(qFile.peek() != '>' && qFile.peek() != EOF){
57 if (m->control_pressed) { break; }
58 string temp = m->getline(qFile); m->gobble(qFile);
59 //if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n."); }
60 qScoreString += ' ' + temp;
62 //cout << "done reading " << endl;
63 istringstream qScoreStringStream(qScoreString);
65 while(!qScoreStringStream.eof()){
66 if (m->control_pressed) { break; }
68 qScoreStringStream >> temp; m->gobble(qScoreStringStream);
70 //if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n."); }
72 //check temp to make sure its a number
73 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
76 //cout << count << '\t' << score << endl;
77 qScores.push_back(score);
82 seqLength = qScores.size();
83 //cout << "seqlength = " << seqLength << endl;
87 m->errorOut(e, "QualityScores", "QualityScores");
92 //********************************************************************************************************************
93 string QualityScores::getSequenceName(ifstream& qFile) {
100 if (name.length() != 0) {
102 name = name.substr(1);
106 }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
110 catch(exception& e) {
111 m->errorOut(e, "QualityScores", "getSequenceName");
115 //********************************************************************************************************************
116 void QualityScores::setName(string name) {
122 catch(exception& e) {
123 m->errorOut(e, "QualityScores", "setName");
127 /**************************************************************************************************/
129 string QualityScores::getName(){
134 catch(exception& e) {
135 m->errorOut(e, "QualityScores", "getName");
140 /**************************************************************************************************/
142 void QualityScores::printQScores(ofstream& qFile){
145 double aveQScore = calculateAverage(false);
147 qFile << '>' << seqName << '\t' << aveQScore << endl;
149 for(int i=0;i<seqLength;i++){
150 qFile << qScores[i] << ' ';
155 catch(exception& e) {
156 m->errorOut(e, "QualityScores", "printQScores");
161 /**************************************************************************************************/
163 void QualityScores::trimQScores(int start, int end){
168 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
169 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
171 hold = vector<int>(qScores.begin()+start, qScores.end());
175 if(qScores.size() > end){
176 hold = vector<int>(qScores.begin(), qScores.begin()+end);
181 seqLength = qScores.size();
183 catch(exception& e) {
184 m->errorOut(e, "QualityScores", "trimQScores");
189 /**************************************************************************************************/
191 void QualityScores::flipQScores(){
194 vector<int> temp = qScores;
195 for(int i=0;i<seqLength;i++){
196 qScores[seqLength - i - 1] = temp[i];
200 catch(exception& e) {
201 m->errorOut(e, "QualityScores", "flipQScores");
206 /**************************************************************************************************/
208 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
210 string rawSequence = sequence.getUnaligned();
211 int seqLength = sequence.getNumBases();
213 if(seqName != sequence.getName()){
214 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
215 m->mothurOutEndLine(); m->control_pressed = true;
219 for(int i=0;i<seqLength;i++){
221 if(qScores[i] < qThreshold){
227 if (end == (seqLength-1)) { end = seqLength; }
229 sequence.setUnaligned(rawSequence.substr(0,end));
230 trimQScores(-1, end);
234 catch(exception& e) {
235 m->errorOut(e, "QualityScores", "flipQScores");
241 /**************************************************************************************************/
243 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
245 string rawSequence = sequence.getUnaligned();
246 int seqLength = sequence.getNumBases();
248 if(seqName != sequence.getName()){
249 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
250 m->mothurOutEndLine();
254 double rollingSum = 0.0000;
257 for(int i=0;i<seqLength;i++){
260 rollingSum += (double)pow(10.0, qScores[i]);
261 value = log10(rollingSum / (double)(i+1));
265 rollingSum += (double)qScores[i];
266 value = rollingSum / (double)(i+1);
270 if(value < qThreshold){
276 if(end == -1){ end = seqLength; }
279 sequence.setUnaligned(rawSequence.substr(0,end));
280 trimQScores(-1, end);
285 catch(exception& e) {
286 m->errorOut(e, "QualityScores", "flipQScores");
292 /**************************************************************************************************/
294 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
296 string rawSequence = sequence.getUnaligned();
297 int seqLength = sequence.getNumBases();
299 if(seqName != sequence.getName()){
300 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
301 m->mothurOutEndLine();
304 int end = windowSize;
307 if(seqLength < windowSize) { return 0; }
309 while((start+windowSize) < seqLength){
310 double windowSum = 0.0000;
312 for(int i=start;i<end;i++){
313 if (logTransform) { windowSum += pow(10.0, qScores[i]); }
314 else { windowSum += qScores[i]; }
316 double windowAverage = 0.0;
317 if (logTransform) { windowAverage = log10(windowSum / (double)(end-start)); }
318 else { windowAverage = windowSum / (double)(end-start); }
320 if(windowAverage < qThreshold){
321 end = end - stepSize;
326 end = start + windowSize;
328 if(end >= seqLength){ end = seqLength; }
332 if(end == -1){ end = seqLength; }
334 //failed first window
335 if (end < windowSize) { return 0; }
337 sequence.setUnaligned(rawSequence.substr(0,end));
338 trimQScores(-1, end);
342 catch(exception& e) {
343 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
349 /**************************************************************************************************/
351 double QualityScores::calculateAverage(bool logTransform){
353 double aveQScore = 0.0000;
355 for(int i=0;i<seqLength;i++){
356 if (logTransform) { aveQScore += pow(10.0, qScores[i]); }
357 else { aveQScore += qScores[i]; }
360 if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); }
361 else { aveQScore /= (double) seqLength; }
366 /**************************************************************************************************/
368 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
370 string rawSequence = sequence.getUnaligned();
371 bool success = 0; //guilty until proven innocent
373 if(seqName != sequence.getName()) {
374 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
375 m->mothurOutEndLine();
378 double aveQScore = calculateAverage(logTransform);
380 if (m->debug) { m->mothurOut("[DEBUG]: " + sequence.getName() + " average = " + toString(aveQScore) + "\n"); }
382 if(aveQScore >= qAverage) { success = 1; }
383 else { success = 0; }
387 catch(exception& e) {
388 m->errorOut(e, "QualityScores", "cullQualAverage");
393 /**************************************************************************************************/
395 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
398 int seqLength = errorSeq.size();
400 int qIndex = start - 1;
402 for(int i=0;i<seqLength;i++){
404 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
405 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
406 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
407 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; /*if(qScores[qIndex] != 0){ cout << qIndex << '\t'; }*/ }
408 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
410 if(errorSeq[i] != 'd') { qIndex++; }
412 if(qIndex > stop){ break; }
415 catch(exception& e) {
416 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
421 /**************************************************************************************************/
423 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
427 for(int i=start-1;i<stop;i++){
428 forwardMap[index++][qScores[i]] += weight;
432 catch(exception& e) {
433 m->errorOut(e, "QualityScores", "updateForwardMap");
438 /**************************************************************************************************/
440 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
444 for(int i=stop-1;i>=start-1;i--){
445 reverseMap[index++][qScores[i]] += weight;
449 catch(exception& e) {
450 m->errorOut(e, "QualityScores", "updateReverseMap");
455 /**************************************************************************************************/