]> git.donarmstrong.com Git - mothur.git/blob - qualityscores.cpp
fixed metastats, added resize to cluster.classic, added code to kill children if...
[mothur.git] / qualityscores.cpp
1 /*
2  *  qualityscores.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "qualityscores.h"
11
12 /**************************************************************************************************/
13
14 QualityScores::QualityScores(){
15         try {
16                 m = MothurOut::getInstance();
17                 seqName = "";
18                 seqLength = -1;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "QualityScores", "QualityScores");
22                 exit(1);
23         }                                                       
24 }
25
26 /**************************************************************************************************/
27
28 QualityScores::QualityScores(ifstream& qFile, int l){
29         try {
30                 
31                 m = MothurOut::getInstance();
32
33                 seqName = "";
34                 seqLength = l;
35                 int score;
36                 
37                 //string line;
38                 //m->getline(qFile, line); 
39                 //istringstream nameStream(line);
40         
41                 qFile >> seqName; 
42                 while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13 || c == -1){       break;  }       } // get rest of line 
43                 m->gobble(qFile);
44                 if (seqName == "") { m->mothurOut("Error reading quality file, name blank at position, " + toString(qFile.tellg())); m->mothurOutEndLine(); }
45                 else {
46                         seqName = seqName.substr(1); 
47                 }
48                 
49                 //m->getline(qFile, line);
50                 //istringstream qualStream(line);
51         
52                 //while(qualStream){
53                 //      qualStream >> score;
54                 //      qScores.push_back(score);
55                 //}
56                 //qScores.pop_back();
57                 
58                 //seqLength = qScores.size();   
59                 
60                 /*while(!in.eof()){     
61                         string saveName = "";
62                         string name = "";
63                         string scores = "";
64                         
65                         in >> name; 
66                         //cout << name << endl;         
67                         if (name.length() != 0) { 
68                                 saveName = name.substr(1);
69                                 while (!in.eof())       {       
70                                         char c = in.get(); 
71                                         if (c == 10 || c == 13){        break;  }
72                                         else { name += c; }     
73                                 } 
74                                 m->gobble(in);
75                         }
76                         
77                         while(in){
78                                 char letter= in.get();
79                                 if(letter == '>'){      in.putback(letter);     break;  }
80                                 else{ scores += letter; }
81                         }
82                         
83                  //istringstream iss (scores,istringstream::in);
84                  
85                  //int count = 0; int tempScore;
86                  //while (iss) { iss >> tempScore; count++; }
87                  //cout << saveName << '\t' << count << endl;   
88                         
89                         m->gobble(in);
90                 }*/             
91                 
92                 
93                 
94                 for(int i=0;i<seqLength;i++){
95                         qFile >> score;
96                         qScores.push_back(score);
97                 }
98                 m->gobble(qFile);
99
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "QualityScores", "QualityScores");
103                 exit(1);
104         }                                                       
105
106 }
107 /**************************************************************************************************/
108
109 /**************************************************************************************************/
110
111 string QualityScores::getName(){
112         
113         try {
114                 return seqName;
115         }
116         catch(exception& e) {
117                 m->errorOut(e, "QualityScores", "getName");
118                 exit(1);
119         }                                                       
120 }
121
122 /**************************************************************************************************/
123
124 void QualityScores::printQScores(ofstream& qFile){
125         try {
126                 
127                 double aveQScore = calculateAverage();
128                 
129                 qFile << '>' << seqName << '\t' << aveQScore << endl;
130                 
131                 for(int i=0;i<seqLength;i++){
132                         qFile << qScores[i] << ' ';
133                 }
134                 qFile << endl;
135                                 
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "QualityScores", "printQScores");
139                 exit(1);
140         }                                                       
141 }
142
143 /**************************************************************************************************/
144
145 void QualityScores::trimQScores(int start, int end){
146         try {
147                 vector<int> hold;
148                 
149                 if(end == -1){          
150                         hold = vector<int>(qScores.begin()+start, qScores.end());
151                         qScores = hold;         
152                 }
153                 if(start == -1){
154                         hold = vector<int>(qScores.begin(), qScores.begin()+end);       //not sure if indexing is correct
155                         qScores = hold;         
156                 }
157
158                 seqLength = qScores.size();
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "QualityScores", "trimQScores");
162                 exit(1);
163         }                                                       
164 }
165
166 /**************************************************************************************************/
167
168 void QualityScores::flipQScores(){
169         try {
170                 
171                 vector<int> temp = qScores;
172                 for(int i=0;i<seqLength;i++){
173                         qScores[seqLength - i - 1] = temp[i];
174                 }
175                 
176         }
177         catch(exception& e) {
178                 m->errorOut(e, "QualityScores", "flipQScores");
179                 exit(1);
180         }                                                       
181 }
182
183 /**************************************************************************************************/
184
185 bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
186         try {
187                 string rawSequence = sequence.getUnaligned();
188                 int seqLength = sequence.getNumBases();
189                 
190                 if(seqName != sequence.getName()){
191                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
192                         m->mothurOutEndLine();  
193                 }
194                 
195                 int end;
196                 for(int i=0;i<seqLength;i++){
197                         end = i;
198                         if(qScores[i] < qThreshold){
199                                 break;
200                         }
201                 }
202                 
203                 sequence.setUnaligned(rawSequence.substr(0,end));
204                 trimQScores(-1, end);
205         
206                 return 1;
207         }
208         catch(exception& e) {
209                 m->errorOut(e, "QualityScores", "flipQScores");
210                 exit(1);
211         }                                                       
212         
213 }
214
215 /**************************************************************************************************/
216
217 bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
218         try {
219                 string rawSequence = sequence.getUnaligned();
220                 int seqLength = sequence.getNumBases();
221                 
222                 if(seqName != sequence.getName()){
223                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
224                         m->mothurOutEndLine();  
225                 }
226                 
227                 int end = -1;
228                 double rollingSum = 0.0000;
229                 
230                 for(int i=0;i<seqLength;i++){
231
232                         rollingSum += (double)qScores[i];
233                         
234                         if(rollingSum / (double)(i+1) < qThreshold){
235                                 end = i;
236 //                              cout << i+1 << '\t' << seqName << '\t' << rollingSum / (double)(i+1) << endl;
237                                 break;
238                         }
239                 }
240                 
241                 if(end == -1){  end = seqLength;        }
242                 
243                 sequence.setUnaligned(rawSequence.substr(0,end));
244                 trimQScores(-1, end);
245                 
246                 return 1;
247         }
248         catch(exception& e) {
249                 m->errorOut(e, "QualityScores", "flipQScores");
250                 exit(1);
251         }                                                       
252         
253 }
254
255 /**************************************************************************************************/
256
257 bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
258         try {
259                 string rawSequence = sequence.getUnaligned();
260                 int seqLength = sequence.getNumBases();
261                 
262                 if(seqName != sequence.getName()){
263                         m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
264                         m->mothurOutEndLine();
265                 }
266                 
267                 int end = windowSize;
268                 int start = 0;
269                 
270
271                 while(start < seqLength){
272                         double windowSum = 0.0000;
273
274                         for(int i=start;i<end;i++){
275                                 windowSum += qScores[i];                                
276                         }
277                         double windowAverage = windowSum / (double)(end-start);
278                         
279                         if(windowAverage < qThreshold){
280                                 end = end - stepSize;
281                                 break;
282                         }
283                         start += stepSize;
284                         end = start + windowSize;
285                         if(end >= seqLength){   end = seqLength - 1;    }
286                 }
287                 
288                 
289                 if(end == -1){  end = seqLength;        }
290                 
291                 sequence.setUnaligned(rawSequence.substr(0,end));
292                 trimQScores(-1, end);
293                 
294                 return 1;
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "QualityScores", "flipQScores");
298                 exit(1);
299         }                                                       
300         
301 }
302
303 /**************************************************************************************************/
304
305 double QualityScores::calculateAverage(){
306         
307         double aveQScore = 0.0000;
308         
309         for(int i=0;i<seqLength;i++){
310                 aveQScore += (double) qScores[i];
311         }
312         aveQScore /= (double) seqLength;
313         
314         return aveQScore;
315 }
316
317 /**************************************************************************************************/
318
319 bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
320         try {
321                 string rawSequence = sequence.getUnaligned();
322                 bool success = 0;       //guilty until proven innocent
323                 
324                 if(seqName != sequence.getName())       {
325                         m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
326                         m->mothurOutEndLine();  
327                 } 
328                         
329                 double aveQScore = calculateAverage();
330                 
331                 if(aveQScore >= qAverage)       {       success = 1;    }
332                 else                                            {       success = 0;    }
333                 
334                 return success;
335         }
336         catch(exception& e) {
337                 m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
338                 exit(1);
339         }
340 }
341
342 /**************************************************************************************************/