5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
48 string qScoreString = m->getline(qFile);
50 istringstream qScoreStringStream(qScoreString);
51 while(!qScoreStringStream.eof()){
52 qScoreStringStream >> score;
53 qScores.push_back(score);
56 seqLength = qScores.size();
59 m->errorOut(e, "QualityScores", "QualityScores");
65 /**************************************************************************************************/
67 string QualityScores::getName(){
73 m->errorOut(e, "QualityScores", "getName");
78 /**************************************************************************************************/
80 void QualityScores::printQScores(ofstream& qFile){
83 double aveQScore = calculateAverage();
85 qFile << '>' << seqName << '\t' << aveQScore << endl;
87 for(int i=0;i<seqLength;i++){
88 qFile << qScores[i] << ' ';
94 m->errorOut(e, "QualityScores", "printQScores");
99 /**************************************************************************************************/
101 void QualityScores::trimQScores(int start, int end){
106 hold = vector<int>(qScores.begin()+start, qScores.end());
110 if(qScores.size() > end){
111 hold = vector<int>(qScores.begin(), qScores.begin()+end);
116 seqLength = qScores.size();
118 catch(exception& e) {
119 m->errorOut(e, "QualityScores", "trimQScores");
124 /**************************************************************************************************/
126 void QualityScores::flipQScores(){
129 vector<int> temp = qScores;
130 for(int i=0;i<seqLength;i++){
131 qScores[seqLength - i - 1] = temp[i];
135 catch(exception& e) {
136 m->errorOut(e, "QualityScores", "flipQScores");
141 /**************************************************************************************************/
143 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
145 string rawSequence = sequence.getUnaligned();
146 int seqLength = sequence.getNumBases();
148 if(seqName != sequence.getName()){
149 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
150 m->mothurOutEndLine();
154 for(int i=0;i<seqLength;i++){
156 if(qScores[i] < qThreshold){
162 if (end == (seqLength-1)) { end = seqLength; }
164 sequence.setUnaligned(rawSequence.substr(0,end));
165 trimQScores(-1, end);
169 catch(exception& e) {
170 m->errorOut(e, "QualityScores", "flipQScores");
176 /**************************************************************************************************/
178 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
180 string rawSequence = sequence.getUnaligned();
181 int seqLength = sequence.getNumBases();
183 if(seqName != sequence.getName()){
184 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
185 m->mothurOutEndLine();
189 double rollingSum = 0.0000;
191 for(int i=0;i<seqLength;i++){
193 rollingSum += (double)qScores[i];
195 if(rollingSum / (double)(i+1) < qThreshold){
201 if(end == -1){ end = seqLength; }
204 sequence.setUnaligned(rawSequence.substr(0,end));
205 trimQScores(-1, end);
210 catch(exception& e) {
211 m->errorOut(e, "QualityScores", "flipQScores");
217 /**************************************************************************************************/
219 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
221 string rawSequence = sequence.getUnaligned();
222 int seqLength = sequence.getNumBases();
224 if(seqName != sequence.getName()){
225 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
226 m->mothurOutEndLine();
229 int end = windowSize;
232 if(seqLength < windowSize) { return 0; }
234 while(start < seqLength){
235 double windowSum = 0.0000;
237 for(int i=start;i<end;i++){
238 windowSum += qScores[i];
240 double windowAverage = windowSum / (double)(end-start);
242 if(windowAverage < qThreshold){
243 end = end - stepSize;
247 end = start + windowSize;
248 if(end >= seqLength){ end = seqLength - 1; }
251 if(end == -1){ end = seqLength; }
253 sequence.setUnaligned(rawSequence.substr(0,end));
254 trimQScores(-1, end);
258 catch(exception& e) {
259 m->errorOut(e, "QualityScores", "flipQScores");
265 /**************************************************************************************************/
267 double QualityScores::calculateAverage(){
269 double aveQScore = 0.0000;
271 for(int i=0;i<seqLength;i++){
272 aveQScore += (double) qScores[i];
274 aveQScore /= (double) seqLength;
279 /**************************************************************************************************/
281 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
283 string rawSequence = sequence.getUnaligned();
284 bool success = 0; //guilty until proven innocent
286 if(seqName != sequence.getName()) {
287 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
288 m->mothurOutEndLine();
291 double aveQScore = calculateAverage();
293 if(aveQScore >= qAverage) { success = 1; }
294 else { success = 0; }
298 catch(exception& e) {
299 m->errorOut(e, "QualityScores", "cullQualAverage");
304 /**************************************************************************************************/
306 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
309 int seqLength = errorSeq.size();
311 int qIndex = start - 1;
313 for(int i=0;i<seqLength;i++){
315 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
316 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
317 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
318 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
319 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
321 if(errorSeq[i] != 'd') { qIndex++; }
322 if(qIndex > stop){ break; }
325 catch(exception& e) {
326 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
331 /**************************************************************************************************/
333 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
337 for(int i=start-1;i<stop;i++){
338 forwardMap[index++][qScores[i]] += weight;
342 catch(exception& e) {
343 m->errorOut(e, "QualityScores", "updateForwardMap");
348 /**************************************************************************************************/
350 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
354 for(int i=stop-1;i>=start;i--){
355 reverseMap[index++][qScores[i]] += weight;
359 catch(exception& e) {
360 m->errorOut(e, "QualityScores", "updateForwardMap");
365 /**************************************************************************************************/