5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
21 m->errorOut(e, "QualityScores", "QualityScores");
26 /**************************************************************************************************/
28 QualityScores::QualityScores(ifstream& qFile, int l){
31 m = MothurOut::getInstance();
39 while (!qFile.eof()) { char c = qFile.get(); if (c == 10 || c == 13 || c == -1){ break; } } // get rest of line
41 if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
43 seqName = seqName.substr(1);
46 //m->getline(qFile, line);
47 //istringstream qualStream(line);
50 // qualStream >> score;
51 // qScores.push_back(score);
55 //seqLength = qScores.size();
63 //cout << name << endl;
64 if (name.length() != 0) {
65 saveName = name.substr(1);
68 if (c == 10 || c == 13){ break; }
75 char letter= in.get();
76 if(letter == '>'){ in.putback(letter); break; }
77 else{ scores += letter; }
80 //istringstream iss (scores,istringstream::in);
82 //int count = 0; int tempScore;
83 //while (iss) { iss >> tempScore; count++; }
84 //cout << saveName << '\t' << count << endl;
91 for(int i=0;i<seqLength;i++){
93 qScores.push_back(score);
99 m->errorOut(e, "QualityScores", "QualityScores");
104 /**************************************************************************************************/
106 string QualityScores::getName(){
111 catch(exception& e) {
112 m->errorOut(e, "QualityScores", "getName");
117 /**************************************************************************************************/
119 void QualityScores::printQScores(ofstream& qFile){
122 double aveQScore = calculateAverage();
124 qFile << '>' << seqName << '\t' << aveQScore << endl;
126 for(int i=0;i<seqLength;i++){
127 qFile << qScores[i] << ' ';
132 catch(exception& e) {
133 m->errorOut(e, "QualityScores", "printQScores");
138 /**************************************************************************************************/
140 void QualityScores::trimQScores(int start, int end){
145 hold = vector<int>(qScores.begin()+start, qScores.end());
149 if(qScores.size() > end){
150 hold = vector<int>(qScores.begin(), qScores.begin()+end);
155 seqLength = qScores.size();
157 catch(exception& e) {
158 m->errorOut(e, "QualityScores", "trimQScores");
163 /**************************************************************************************************/
165 void QualityScores::flipQScores(){
168 vector<int> temp = qScores;
169 for(int i=0;i<seqLength;i++){
170 qScores[seqLength - i - 1] = temp[i];
174 catch(exception& e) {
175 m->errorOut(e, "QualityScores", "flipQScores");
180 /**************************************************************************************************/
182 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
184 string rawSequence = sequence.getUnaligned();
185 int seqLength = sequence.getNumBases();
187 if(seqName != sequence.getName()){
188 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
189 m->mothurOutEndLine();
193 for(int i=0;i<seqLength;i++){
195 if(qScores[i] < qThreshold){
200 sequence.setUnaligned(rawSequence.substr(0,end));
201 trimQScores(-1, end);
205 catch(exception& e) {
206 m->errorOut(e, "QualityScores", "flipQScores");
212 /**************************************************************************************************/
214 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
216 string rawSequence = sequence.getUnaligned();
217 int seqLength = sequence.getNumBases();
219 if(seqName != sequence.getName()){
220 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
221 m->mothurOutEndLine();
225 double rollingSum = 0.0000;
227 for(int i=0;i<seqLength;i++){
229 rollingSum += (double)qScores[i];
231 if(rollingSum / (double)(i+1) < qThreshold){
233 // cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
238 if(end == -1){ end = seqLength; }
240 sequence.setUnaligned(rawSequence.substr(0,end));
241 trimQScores(-1, end);
245 catch(exception& e) {
246 m->errorOut(e, "QualityScores", "flipQScores");
252 /**************************************************************************************************/
254 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
256 string rawSequence = sequence.getUnaligned();
257 int seqLength = sequence.getNumBases();
259 if(seqName != sequence.getName()){
260 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
261 m->mothurOutEndLine();
264 int end = windowSize;
267 if(seqLength < windowSize) { return 0; }
269 while(start < seqLength){
270 double windowSum = 0.0000;
272 for(int i=start;i<end;i++){
273 windowSum += qScores[i];
275 double windowAverage = windowSum / (double)(end-start);
277 if(windowAverage < qThreshold){
278 end = end - stepSize;
282 end = start + windowSize;
283 if(end >= seqLength){ end = seqLength - 1; }
287 if(end == -1){ end = seqLength; }
289 sequence.setUnaligned(rawSequence.substr(0,end));
290 trimQScores(-1, end);
294 catch(exception& e) {
295 m->errorOut(e, "QualityScores", "flipQScores");
301 /**************************************************************************************************/
303 double QualityScores::calculateAverage(){
305 double aveQScore = 0.0000;
307 for(int i=0;i<seqLength;i++){
308 aveQScore += (double) qScores[i];
310 aveQScore /= (double) seqLength;
315 /**************************************************************************************************/
317 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
319 string rawSequence = sequence.getUnaligned();
320 bool success = 0; //guilty until proven innocent
322 if(seqName != sequence.getName()) {
323 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
324 m->mothurOutEndLine();
327 double aveQScore = calculateAverage();
329 if(aveQScore >= qAverage) { success = 1; }
330 else { success = 0; }
334 catch(exception& e) {
335 m->errorOut(e, "QualityScores", "cullQualAverage");
340 /**************************************************************************************************/
342 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
345 int seqLength = errorSeq.size();
346 int qIndex = start - 1;
347 for(int i=0;i<seqLength;i++){
349 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
350 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
351 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
352 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
353 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
355 if(errorSeq[i] != 'd') { qIndex++; }
359 catch(exception& e) {
360 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
365 /**************************************************************************************************/
367 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
371 for(int i=start-1;i<stop;i++){
372 forwardMap[index++][qScores[i]] += weight;
376 catch(exception& e) {
377 m->errorOut(e, "QualityScores", "updateForwardMap");
382 /**************************************************************************************************/
384 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
388 for(int i=stop-1;i>=start;i--){
389 reverseMap[index++][qScores[i]] += weight;
393 catch(exception& e) {
394 m->errorOut(e, "QualityScores", "updateForwardMap");
399 /**************************************************************************************************/