5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
35 seqName = getSequenceName(qFile);
37 if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "'\n."); }
39 if (!m->control_pressed) {
40 string qScoreString = m->getline(qFile);
42 if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + qScoreString + "'\n."); }
44 while(qFile.peek() != '>' && qFile.peek() != EOF){
45 if (m->control_pressed) { break; }
46 string temp = m->getline(qFile);
47 if (m->debug) { m->mothurOut("[DEBUG]: scores = '" + temp + "'\n."); }
48 qScoreString += ' ' + temp;
50 //cout << "done reading " << endl;
51 istringstream qScoreStringStream(qScoreString);
53 while(!qScoreStringStream.eof()){
54 if (m->control_pressed) { break; }
56 qScoreStringStream >> temp; m->gobble(qScoreStringStream);
58 if (m->debug) { m->mothurOut("[DEBUG]: score " + toString(qScores.size()) + " = '" + temp + "'\n."); }
60 //check temp to make sure its a number
61 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
64 //cout << count << '\t' << score << endl;
65 qScores.push_back(score);
70 seqLength = qScores.size();
71 //cout << "seqlength = " << seqLength << '\t' << count << endl;
75 m->errorOut(e, "QualityScores", "QualityScores");
80 //********************************************************************************************************************
81 string QualityScores::getSequenceName(ifstream& qFile) {
88 if (name.length() != 0) {
90 name = name.substr(1);
92 for (int i = 0; i < name.length(); i++) {
93 if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
96 }else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
100 catch(exception& e) {
101 m->errorOut(e, "QualityScores", "getSequenceName");
105 //********************************************************************************************************************
106 void QualityScores::setName(string name) {
109 for (int i = 0; i < name.length(); i++) {
110 if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
115 catch(exception& e) {
116 m->errorOut(e, "QualityScores", "setName");
120 /**************************************************************************************************/
122 string QualityScores::getName(){
127 catch(exception& e) {
128 m->errorOut(e, "QualityScores", "getName");
133 /**************************************************************************************************/
135 void QualityScores::printQScores(ofstream& qFile){
138 double aveQScore = calculateAverage();
140 qFile << '>' << seqName << '\t' << aveQScore << endl;
142 for(int i=0;i<seqLength;i++){
143 qFile << qScores[i] << ' ';
148 catch(exception& e) {
149 m->errorOut(e, "QualityScores", "printQScores");
154 /**************************************************************************************************/
156 void QualityScores::trimQScores(int start, int end){
161 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
162 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
164 hold = vector<int>(qScores.begin()+start, qScores.end());
168 if(qScores.size() > end){
169 hold = vector<int>(qScores.begin(), qScores.begin()+end);
174 seqLength = qScores.size();
176 catch(exception& e) {
177 m->errorOut(e, "QualityScores", "trimQScores");
182 /**************************************************************************************************/
184 void QualityScores::flipQScores(){
187 vector<int> temp = qScores;
188 for(int i=0;i<seqLength;i++){
189 qScores[seqLength - i - 1] = temp[i];
193 catch(exception& e) {
194 m->errorOut(e, "QualityScores", "flipQScores");
199 /**************************************************************************************************/
201 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
203 string rawSequence = sequence.getUnaligned();
204 int seqLength = sequence.getNumBases();
206 if(seqName != sequence.getName()){
207 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
208 m->mothurOutEndLine();
212 for(int i=0;i<seqLength;i++){
214 if(qScores[i] < qThreshold){
220 if (end == (seqLength-1)) { end = seqLength; }
222 sequence.setUnaligned(rawSequence.substr(0,end));
223 trimQScores(-1, end);
227 catch(exception& e) {
228 m->errorOut(e, "QualityScores", "flipQScores");
234 /**************************************************************************************************/
236 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
238 string rawSequence = sequence.getUnaligned();
239 int seqLength = sequence.getNumBases();
241 if(seqName != sequence.getName()){
242 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
243 m->mothurOutEndLine();
247 double rollingSum = 0.0000;
249 for(int i=0;i<seqLength;i++){
251 rollingSum += (double)qScores[i];
253 if(rollingSum / (double)(i+1) < qThreshold){
259 if(end == -1){ end = seqLength; }
262 sequence.setUnaligned(rawSequence.substr(0,end));
263 trimQScores(-1, end);
268 catch(exception& e) {
269 m->errorOut(e, "QualityScores", "flipQScores");
275 /**************************************************************************************************/
277 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
279 string rawSequence = sequence.getUnaligned();
280 int seqLength = sequence.getNumBases();
282 if(seqName != sequence.getName()){
283 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
284 m->mothurOutEndLine();
287 int end = windowSize;
290 if(seqLength < windowSize) { return 0; }
292 while((start+windowSize) < seqLength){
293 double windowSum = 0.0000;
295 for(int i=start;i<end;i++){
296 windowSum += qScores[i];
298 double windowAverage = windowSum / (double)(end-start);
300 if(windowAverage < qThreshold){
301 end = end - stepSize;
306 end = start + windowSize;
308 if(end >= seqLength){ end = seqLength; }
312 if(end == -1){ end = seqLength; }
314 //failed first window
315 if (end < windowSize) { return 0; }
317 sequence.setUnaligned(rawSequence.substr(0,end));
318 trimQScores(-1, end);
322 catch(exception& e) {
323 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
329 /**************************************************************************************************/
331 double QualityScores::calculateAverage(){
333 double aveQScore = 0.0000;
335 for(int i=0;i<seqLength;i++){
336 aveQScore += (double) qScores[i];
338 aveQScore /= (double) seqLength;
343 /**************************************************************************************************/
345 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
347 string rawSequence = sequence.getUnaligned();
348 bool success = 0; //guilty until proven innocent
350 if(seqName != sequence.getName()) {
351 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
352 m->mothurOutEndLine();
355 double aveQScore = calculateAverage();
357 if(aveQScore >= qAverage) { success = 1; }
358 else { success = 0; }
362 catch(exception& e) {
363 m->errorOut(e, "QualityScores", "cullQualAverage");
368 /**************************************************************************************************/
370 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
373 int seqLength = errorSeq.size();
375 int qIndex = start - 1;
377 for(int i=0;i<seqLength;i++){
379 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
380 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
381 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
382 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; /*if(qScores[qIndex] != 0){ cout << qIndex << '\t'; }*/ }
383 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
385 if(errorSeq[i] != 'd') { qIndex++; }
387 if(qIndex > stop){ break; }
390 catch(exception& e) {
391 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
396 /**************************************************************************************************/
398 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
402 for(int i=start-1;i<stop;i++){
403 forwardMap[index++][qScores[i]] += weight;
407 catch(exception& e) {
408 m->errorOut(e, "QualityScores", "updateForwardMap");
413 /**************************************************************************************************/
415 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
419 for(int i=stop-1;i>=start-1;i--){
420 reverseMap[index++][qScores[i]] += weight;
424 catch(exception& e) {
425 m->errorOut(e, "QualityScores", "updateReverseMap");
430 /**************************************************************************************************/