5 * Created by Pat Schloss on 7/12/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "qualityscores.h"
12 /**************************************************************************************************/
14 QualityScores::QualityScores(){
16 m = MothurOut::getInstance();
22 m->errorOut(e, "QualityScores", "QualityScores");
27 /**************************************************************************************************/
29 QualityScores::QualityScores(ifstream& qFile){
32 m = MothurOut::getInstance();
41 m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg()));
42 m->mothurOutEndLine();
45 seqName = seqName.substr(1);
47 string qScoreString = m->getline(qFile);
48 while(qFile.peek() != '>' && qFile.peek() != EOF){
49 qScoreString += ' ' + m->getline(qFile);
52 istringstream qScoreStringStream(qScoreString);
54 while(!qScoreStringStream.eof()){
55 if (m->control_pressed) { break; }
56 qScoreStringStream >> score;
57 qScores.push_back(score);
62 // string scores = "";
64 // while(!qFile.eof()){
69 // if (seqName.length() != 0) {
70 // seqName = seqName.substr(1);
71 // while (!qFile.eof()) {
72 // char c = qFile.get();
73 // //gobble junk on line
74 // if (c == 10 || c == 13){ break; }
81 // char letter=qFile.get();
82 // if((letter == '>')){ qFile.putback(letter); break; }
83 // else if (isprint(letter)) { scores += letter; }
90 // //convert scores string to qScores
91 // istringstream qScoreStringStream(scores);
94 // while(!qScoreStringStream.eof()){
96 // if (m->control_pressed) { break; }
98 // qScoreStringStream >> score;
99 // qScores.push_back(score);
102 // qScores.pop_back();
104 seqLength = qScores.size();
108 catch(exception& e) {
109 m->errorOut(e, "QualityScores", "QualityScores");
115 /**************************************************************************************************/
117 string QualityScores::getName(){
122 catch(exception& e) {
123 m->errorOut(e, "QualityScores", "getName");
128 /**************************************************************************************************/
130 void QualityScores::printQScores(ofstream& qFile){
133 double aveQScore = calculateAverage();
135 qFile << '>' << seqName << '\t' << aveQScore << endl;
137 for(int i=0;i<seqLength;i++){
138 qFile << qScores[i] << ' ';
143 catch(exception& e) {
144 m->errorOut(e, "QualityScores", "printQScores");
149 /**************************************************************************************************/
151 void QualityScores::trimQScores(int start, int end){
156 hold = vector<int>(qScores.begin()+start, qScores.end());
160 if(qScores.size() > end){
161 hold = vector<int>(qScores.begin(), qScores.begin()+end);
166 seqLength = qScores.size();
168 catch(exception& e) {
169 m->errorOut(e, "QualityScores", "trimQScores");
174 /**************************************************************************************************/
176 void QualityScores::flipQScores(){
179 vector<int> temp = qScores;
180 for(int i=0;i<seqLength;i++){
181 qScores[seqLength - i - 1] = temp[i];
185 catch(exception& e) {
186 m->errorOut(e, "QualityScores", "flipQScores");
191 /**************************************************************************************************/
193 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
195 string rawSequence = sequence.getUnaligned();
196 int seqLength = sequence.getNumBases();
198 if(seqName != sequence.getName()){
199 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
200 m->mothurOutEndLine();
204 for(int i=0;i<seqLength;i++){
206 if(qScores[i] < qThreshold){
212 if (end == (seqLength-1)) { end = seqLength; }
214 sequence.setUnaligned(rawSequence.substr(0,end));
215 trimQScores(-1, end);
219 catch(exception& e) {
220 m->errorOut(e, "QualityScores", "flipQScores");
226 /**************************************************************************************************/
228 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
230 string rawSequence = sequence.getUnaligned();
231 int seqLength = sequence.getNumBases();
233 if(seqName != sequence.getName()){
234 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
235 m->mothurOutEndLine();
239 double rollingSum = 0.0000;
241 for(int i=0;i<seqLength;i++){
243 rollingSum += (double)qScores[i];
245 if(rollingSum / (double)(i+1) < qThreshold){
251 if(end == -1){ end = seqLength; }
254 sequence.setUnaligned(rawSequence.substr(0,end));
255 trimQScores(-1, end);
260 catch(exception& e) {
261 m->errorOut(e, "QualityScores", "flipQScores");
267 /**************************************************************************************************/
269 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
271 string rawSequence = sequence.getUnaligned();
272 int seqLength = sequence.getNumBases();
274 if(seqName != sequence.getName()){
275 m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
276 m->mothurOutEndLine();
279 int end = windowSize;
282 if(seqLength < windowSize) { return 0; }
284 while(start < seqLength){
285 double windowSum = 0.0000;
287 for(int i=start;i<end;i++){
288 windowSum += qScores[i];
290 double windowAverage = windowSum / (double)(end-start);
292 if(windowAverage < qThreshold){
293 end = end - stepSize;
297 end = start + windowSize;
298 if(end >= seqLength){ end = seqLength - 1; }
301 if(end == -1){ end = seqLength; }
303 //failed first window
304 if (end < windowSize) { return 0; }
306 sequence.setUnaligned(rawSequence.substr(0,end));
307 trimQScores(-1, end);
311 catch(exception& e) {
312 m->errorOut(e, "QualityScores", "flipQScores");
318 /**************************************************************************************************/
320 double QualityScores::calculateAverage(){
322 double aveQScore = 0.0000;
324 for(int i=0;i<seqLength;i++){
325 aveQScore += (double) qScores[i];
327 aveQScore /= (double) seqLength;
332 /**************************************************************************************************/
334 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
336 string rawSequence = sequence.getUnaligned();
337 bool success = 0; //guilty until proven innocent
339 if(seqName != sequence.getName()) {
340 m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
341 m->mothurOutEndLine();
344 double aveQScore = calculateAverage();
346 if(aveQScore >= qAverage) { success = 1; }
347 else { success = 0; }
351 catch(exception& e) {
352 m->errorOut(e, "QualityScores", "cullQualAverage");
357 /**************************************************************************************************/
359 void QualityScores::updateQScoreErrorMap(map<char, vector<int> >& qualErrorMap, string errorSeq, int start, int stop, int weight){
362 int seqLength = errorSeq.size();
364 int qIndex = start - 1;
366 for(int i=0;i<seqLength;i++){
368 if(errorSeq[i] == 'm') { qualErrorMap['m'][qScores[qIndex]] += weight; }
369 else if(errorSeq[i] == 's') { qualErrorMap['s'][qScores[qIndex]] += weight; }
370 else if(errorSeq[i] == 'i') { qualErrorMap['i'][qScores[qIndex]] += weight; }
371 else if(errorSeq[i] == 'a') { qualErrorMap['a'][qScores[qIndex]] += weight; }
372 else if(errorSeq[i] == 'd') { /* there are no qScores for deletions */ }
374 if(errorSeq[i] != 'd') { qIndex++; }
375 if(qIndex > stop){ break; }
378 catch(exception& e) {
379 m->errorOut(e, "QualityScores", "updateQScoreErrorMap");
384 /**************************************************************************************************/
386 void QualityScores::updateForwardMap(vector<vector<int> >& forwardMap, int start, int stop, int weight){
390 for(int i=start-1;i<stop;i++){
391 forwardMap[index++][qScores[i]] += weight;
395 catch(exception& e) {
396 m->errorOut(e, "QualityScores", "updateForwardMap");
401 /**************************************************************************************************/
403 void QualityScores::updateReverseMap(vector<vector<int> >& reverseMap, int start, int stop, int weight){
407 for(int i=stop-1;i>=start;i--){
408 reverseMap[index++][qScores[i]] += weight;
412 catch(exception& e) {
413 m->errorOut(e, "QualityScores", "updateForwardMap");
418 /**************************************************************************************************/