5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
39 //cout << seqName << endl;
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
48 string qScoreString = m->getline(qFile);
49 //cout << qScoreString << endl;
50 while(qFile.peek() != '>' && qFile.peek() != EOF){
51 if (m->control_pressed) { break; }
52 string temp = m->getline(qFile);
53 //cout << temp << endl;
54 qScoreString += ' ' + temp;
56 //cout << "done reading " << endl;
57 istringstream qScoreStringStream(qScoreString);
59 while(!qScoreStringStream.eof()){
60 if (m->control_pressed) { break; }
62 qScoreStringStream >> temp; m->gobble(qScoreStringStream);
64 //check temp to make sure its a number
65 if (!m->isContainingOnlyDigits(temp)) { m->mothurOut("[ERROR]: In sequence " + seqName + "'s quality scores, expected a number and got " + temp + ", setting score to 0."); m->mothurOutEndLine(); temp = "0"; }
68 //cout << count << '\t' << score << endl;
69 qScores.push_back(score);
74 // string scores = "";
76 // while(!qFile.eof()){
81 // if (seqName.length() != 0) {
82 // seqName = seqName.substr(1);
83 // while (!qFile.eof()) {
84 // char c = qFile.get();
85 // //gobble junk on line
86 // if (c == 10 || c == 13){ break; }
93 // char letter=qFile.get();
94 // if((letter == '>')){ qFile.putback(letter); break; }
95 // else if (isprint(letter)) { scores += letter; }
102 // //convert scores string to qScores
103 // istringstream qScoreStringStream(scores);
106 // while(!qScoreStringStream.eof()){
108 // if (m->control_pressed) { break; }
110 // qScoreStringStream >> score;
111 // qScores.push_back(score);
114 // qScores.pop_back();
116 seqLength = qScores.size();
117 //cout << "seqlength = " << seqLength << '\t' << count << endl;
120 catch(exception& e) {
121 m->errorOut(e, "QualityScores", "QualityScores");
127 /**************************************************************************************************/
129 string QualityScores::getName(){
134 catch(exception& e) {
135 m->errorOut(e, "QualityScores", "getName");
140 /**************************************************************************************************/
142 void QualityScores::printQScores(ofstream& qFile){
145 double aveQScore = calculateAverage();
147 qFile << '>' << seqName << '\t' << aveQScore << endl;
149 for(int i=0;i<seqLength;i++){
150 qFile << qScores[i] << ' ';
155 catch(exception& e) {
156 m->errorOut(e, "QualityScores", "printQScores");
161 /**************************************************************************************************/
163 void QualityScores::trimQScores(int start, int end){
168 //cout << seqName << '\t' << start << '\t' << end << '\t' << qScores.size() << endl;
169 //for (int i = 0; i < qScores.size(); i++) { cout << qScores[i] << end; }
171 hold = vector<int>(qScores.begin()+start, qScores.end());
175 if(qScores.size() > end){
176 hold = vector<int>(qScores.begin(), qScores.begin()+end);
181 seqLength = qScores.size();
183 catch(exception& e) {
184 m->errorOut(e, "QualityScores", "trimQScores");
189 /**************************************************************************************************/
191 void QualityScores::flipQScores(){
194 vector<int> temp = qScores;
195 for(int i=0;i<seqLength;i++){
196 qScores[seqLength - i - 1] = temp[i];
200 catch(exception& e) {
201 m->errorOut(e, "QualityScores", "flipQScores");
206 /**************************************************************************************************/
208 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
210 string rawSequence = sequence.getUnaligned();
211 int seqLength = sequence.getNumBases();
213 if(seqName != sequence.getName()){
214 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
215 m->mothurOutEndLine();
219 for(int i=0;i<seqLength;i++){
221 if(qScores[i] < qThreshold){
227 if (end == (seqLength-1)) { end = seqLength; }
229 sequence.setUnaligned(rawSequence.substr(0,end));
230 trimQScores(-1, end);
234 catch(exception& e) {
235 m->errorOut(e, "QualityScores", "flipQScores");
241 /**************************************************************************************************/
243 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
245 string rawSequence = sequence.getUnaligned();
246 int seqLength = sequence.getNumBases();
248 if(seqName != sequence.getName()){
249 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
250 m->mothurOutEndLine();
254 double rollingSum = 0.0000;
256 for(int i=0;i<seqLength;i++){
258 rollingSum += (double)qScores[i];
260 if(rollingSum / (double)(i+1) < qThreshold){
266 if(end == -1){ end = seqLength; }
269 sequence.setUnaligned(rawSequence.substr(0,end));
270 trimQScores(-1, end);
275 catch(exception& e) {
276 m->errorOut(e, "QualityScores", "flipQScores");
282 /**************************************************************************************************/
284 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
286 string rawSequence = sequence.getUnaligned();
287 int seqLength = sequence.getNumBases();
289 if(seqName != sequence.getName()){
290 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
291 m->mothurOutEndLine();
294 int end = windowSize;
297 if(seqLength < windowSize) { return 0; }
299 while((start+windowSize) < seqLength){
300 double windowSum = 0.0000;
302 for(int i=start;i<end;i++){
303 windowSum += qScores[i];
305 double windowAverage = windowSum / (double)(end-start);
307 if(windowAverage < qThreshold){
308 end = end - stepSize;
313 end = start + windowSize;
315 if(end >= seqLength){ end = seqLength; }
319 if(end == -1){ end = seqLength; }
321 //failed first window
322 if (end < windowSize) { return 0; }
324 sequence.setUnaligned(rawSequence.substr(0,end));
325 trimQScores(-1, end);
329 catch(exception& e) {
330 m->errorOut(e, "QualityScores", "stripQualWindowAverage");
336 /**************************************************************************************************/
338 double QualityScores::calculateAverage(){
340 double aveQScore = 0.0000;
342 for(int i=0;i<seqLength;i++){
343 aveQScore += (double) qScores[i];
345 aveQScore /= (double) seqLength;
350 /**************************************************************************************************/
352 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
354 string rawSequence = sequence.getUnaligned();
355 bool success = 0; //guilty until proven innocent
357 if(seqName != sequence.getName()) {
358 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
359 m->mothurOutEndLine();
362 double aveQScore = calculateAverage();
364 if(aveQScore >= qAverage) { success = 1; }
365 else { success = 0; }
369 catch(exception& e) {
370 m->errorOut(e, "QualityScores", "cullQualAverage");
375 /**************************************************************************************************/
377 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
380 int seqLength = errorSeq.size();
382 int qIndex = start - 1;
384 for(int i=0;i<seqLength;i++){
386 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
387 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
388 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
389 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; /*if(qScores[qIndex] != 0){ cout << qIndex << '\t'; }*/ }
390 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
392 if(errorSeq[i] != 'd') { qIndex++; }
394 if(qIndex > stop){ break; }
397 catch(exception& e) {
398 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
403 /**************************************************************************************************/
405 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
409 for(int i=start-1;i<stop;i++){
410 forwardMap[index++][qScores[i]] += weight;
414 catch(exception& e) {
415 m->errorOut(e, "QualityScores", "updateForwardMap");
420 /**************************************************************************************************/
422 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
426 for(int i=stop-1;i>=start;i--){
427 reverseMap[index++][qScores[i]] += weight;
431 catch(exception& e) {
432 m->errorOut(e, "QualityScores", "updateForwardMap");
437 /**************************************************************************************************/