5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
35 seqName = getSequenceName(qFile); m->gobble(qFile);
37 if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); }
39 if (!m->control_pressed) {
40 string qScoreString = m->getline(qFile); m->gobble(qFile);
42 if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); }
44 while(qFile.peek() != '>' && qFile.peek() != EOF){
45 if (m->control_pressed) { break; }
46 string temp = m->getline(qFile); m->gobble(qFile);
47 //if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n."); }
48 qScoreString += ' ' + temp;
50 //cout << "done reading " << endl;
51 istringstream qScoreStringStream(qScoreString);
53 while(!qScoreStringStream.eof()){
54 if (m->control_pressed) { break; }
56 qScoreStringStream >> temp; m->gobble(qScoreStringStream);
58 //if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n."); }
60 //check temp to make sure its a number
61 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
64 //cout << count << '\t' << score << endl;
65 qScores.push_back(score);
70 seqLength = qScores.size();
71 //cout << "seqlength = " << seqLength << endl;
75 m->errorOut(e, "QualityScores", "QualityScores");
80 //********************************************************************************************************************
81 string QualityScores::getSequenceName(ifstream& qFile) {
88 if (name.length() != 0) {
90 name = name.substr(1);
94 }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
99 m->errorOut(e, "QualityScores", "getSequenceName");
103 //********************************************************************************************************************
104 void QualityScores::setName(string name) {
110 catch(exception& e) {
111 m->errorOut(e, "QualityScores", "setName");
115 /**************************************************************************************************/
117 string QualityScores::getName(){
122 catch(exception& e) {
123 m->errorOut(e, "QualityScores", "getName");
128 /**************************************************************************************************/
130 void QualityScores::printQScores(ofstream& qFile){
133 double aveQScore = calculateAverage(false);
135 qFile << '>' << seqName << '\t' << aveQScore << endl;
137 for(int i=0;i<seqLength;i++){
138 qFile << qScores[i] << ' ';
143 catch(exception& e) {
144 m->errorOut(e, "QualityScores", "printQScores");
149 /**************************************************************************************************/
151 void QualityScores::trimQScores(int start, int end){
156 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
157 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
159 hold = vector<int>(qScores.begin()+start, qScores.end());
163 if(qScores.size() > end){
164 hold = vector<int>(qScores.begin(), qScores.begin()+end);
169 seqLength = qScores.size();
171 catch(exception& e) {
172 m->errorOut(e, "QualityScores", "trimQScores");
177 /**************************************************************************************************/
179 void QualityScores::flipQScores(){
182 vector<int> temp = qScores;
183 for(int i=0;i<seqLength;i++){
184 qScores[seqLength - i - 1] = temp[i];
188 catch(exception& e) {
189 m->errorOut(e, "QualityScores", "flipQScores");
194 /**************************************************************************************************/
196 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
198 string rawSequence = sequence.getUnaligned();
199 int seqLength = sequence.getNumBases();
201 if(seqName != sequence.getName()){
202 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
203 m->mothurOutEndLine(); m->control_pressed = true;
207 for(int i=0;i<seqLength;i++){
209 if(qScores[i] < qThreshold){
215 if (end == (seqLength-1)) { end = seqLength; }
217 sequence.setUnaligned(rawSequence.substr(0,end));
218 trimQScores(-1, end);
222 catch(exception& e) {
223 m->errorOut(e, "QualityScores", "flipQScores");
229 /**************************************************************************************************/
231 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold, bool logTransform){
233 string rawSequence = sequence.getUnaligned();
234 int seqLength = sequence.getNumBases();
236 if(seqName != sequence.getName()){
237 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
238 m->mothurOutEndLine();
242 double rollingSum = 0.0000;
245 for(int i=0;i<seqLength;i++){
248 rollingSum += (double)pow(10.0, qScores[i]);
249 value = log10(rollingSum / (double)(i+1));
253 rollingSum += (double)qScores[i];
254 value = rollingSum / (double)(i+1);
258 if(value < qThreshold){
264 if(end == -1){ end = seqLength; }
267 sequence.setUnaligned(rawSequence.substr(0,end));
268 trimQScores(-1, end);
273 catch(exception& e) {
274 m->errorOut(e, "QualityScores", "flipQScores");
280 /**************************************************************************************************/
282 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold, bool logTransform){
284 string rawSequence = sequence.getUnaligned();
285 int seqLength = sequence.getNumBases();
287 if(seqName != sequence.getName()){
288 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
289 m->mothurOutEndLine();
292 int end = windowSize;
295 if(seqLength < windowSize) { return 0; }
297 while((start+windowSize) < seqLength){
298 double windowSum = 0.0000;
300 for(int i=start;i<end;i++){
301 if (logTransform) { windowSum += pow(10.0, qScores[i]); }
302 else { windowSum += qScores[i]; }
304 double windowAverage = 0.0;
305 if (logTransform) { windowAverage = log10(windowSum / (double)(end-start)); }
306 else { windowAverage = windowSum / (double)(end-start); }
308 if(windowAverage < qThreshold){
309 end = end - stepSize;
314 end = start + windowSize;
316 if(end >= seqLength){ end = seqLength; }
320 if(end == -1){ end = seqLength; }
322 //failed first window
323 if (end < windowSize) { return 0; }
325 sequence.setUnaligned(rawSequence.substr(0,end));
326 trimQScores(-1, end);
330 catch(exception& e) {
331 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
337 /**************************************************************************************************/
339 double QualityScores::calculateAverage(bool logTransform){
341 double aveQScore = 0.0000;
343 for(int i=0;i<seqLength;i++){
344 if (logTransform) { aveQScore += pow(10.0, qScores[i]); }
345 else { aveQScore += qScores[i]; }
347 aveQScore /= (double) seqLength;
349 if (logTransform) { aveQScore = log10(aveQScore /(double) seqLength); }
350 else { aveQScore /= (double) seqLength; }
355 /**************************************************************************************************/
357 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool logTransform){
359 string rawSequence = sequence.getUnaligned();
360 bool success = 0; //guilty until proven innocent
362 if(seqName != sequence.getName()) {
363 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
364 m->mothurOutEndLine();
367 double aveQScore = calculateAverage(logTransform);
369 if(aveQScore >= qAverage) { success = 1; }
370 else { success = 0; }
374 catch(exception& e) {
375 m->errorOut(e, "QualityScores", "cullQualAverage");
380 /**************************************************************************************************/
382 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
385 int seqLength = errorSeq.size();
387 int qIndex = start - 1;
389 for(int i=0;i<seqLength;i++){
391 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
392 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
393 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
394 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; /*if(qScores[qIndex] != 0){ cout << qIndex << '\t'; }*/ }
395 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
397 if(errorSeq[i] != 'd') { qIndex++; }
399 if(qIndex > stop){ break; }
402 catch(exception& e) {
403 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
408 /**************************************************************************************************/
410 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
414 for(int i=start-1;i<stop;i++){
415 forwardMap[index++][qScores[i]] += weight;
419 catch(exception& e) {
420 m->errorOut(e, "QualityScores", "updateForwardMap");
425 /**************************************************************************************************/
427 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
431 for(int i=stop-1;i>=start-1;i--){
432 reverseMap[index++][qScores[i]] += weight;
436 catch(exception& e) {
437 m->errorOut(e, "QualityScores", "updateReverseMap");
442 /**************************************************************************************************/