]> git.donarmstrong.com Git - mothur.git/blob - seqerrorcommand.cpp
more changes to error.seqs command and quality scores classses
[mothur.git] / seqerrorcommand.cpp
1 /*
2  *  seqerrorcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "seqerrorcommand.h"
11 #include "reportfile.h"
12 #include "qualityscores.h"
13
14 //**********************************************************************************************************************
15 vector<string> SeqErrorCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "SeqErrorCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 SeqErrorCommand::SeqErrorCommand(){     
28         try {
29                 abort = true;
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["error"] = tempOutNames;
33                 outputTypes["count"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SeqErrorCommand::getRequiredParameters(){        
42         try {
43                 string Array[] =  {"query","reference"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SeqErrorCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SeqErrorCommand::getRequiredFiles(){     
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SeqErrorCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //***************************************************************************************************************
64
65 SeqErrorCommand::SeqErrorCommand(string option)  {
66         try {
67                 
68                 abort = false;
69                 
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         string temp;
75                         
76                         //valid paramters for this command
77                         string AlignArray[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
78                         
79                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
80                         
81                         OptionParser parser(option);
82                         map<string,string> parameters = parser.getParameters();
83                         
84                         ValidParameters validParameter;
85                         map<string,string>::iterator it;
86                         
87                         //check to make sure all parameters are valid for command
88                         for (it = parameters.begin(); it != parameters.end(); it++) { 
89                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
90                         }
91                         
92                         //initialize outputTypes
93                         vector<string> tempOutNames;
94                         outputTypes["error"] = tempOutNames;
95                         outputTypes["count"] = tempOutNames;
96                         
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("query");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["query"] = inputDir + it->second;            }
108                                 }
109                                 
110                                 it = parameters.find("reference");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
116                                 }
117                                 
118                                 it = parameters.find("name");
119                                 //user has given a names file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
124                                 }
125
126                                 it = parameters.find("qfile");
127                                 //user has given a quality score file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
132                                 }
133                                 
134                                 it = parameters.find("report");
135                                 //user has given a alignment report file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["report"] = inputDir + it->second;           }
140                                 }
141                                 
142                         }
143                         //check for required parameters
144                         queryFileName = validParameter.validFile(parameters, "query", true);
145                         if (queryFileName == "not found") { m->mothurOut("query is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
146                         else if (queryFileName == "not open") { abort = true; } 
147                         
148                         referenceFileName = validParameter.validFile(parameters, "reference", true);
149                         if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
150                         else if (referenceFileName == "not open") { abort = true; }     
151                         
152
153                         //check for optional parameters
154                         namesFileName = validParameter.validFile(parameters, "name", true);
155                         if(namesFileName == "not found"){       namesFileName = "";     }
156                         
157                         qualFileName = validParameter.validFile(parameters, "qfile", true);
158                         if(qualFileName == "not found"){        qualFileName = "";      }
159
160                         reportFileName = validParameter.validFile(parameters, "report", true);
161                         if(reportFileName == "not found"){      reportFileName = "";    }
162                         
163                         if((reportFileName != "" && qualFileName == "") || (reportFileName == "" && qualFileName != "")){
164                                 m->mothurOut("if you use either a qual file or a report file, you have to have both.");
165                                 m->mothurOutEndLine();
166                                 abort = true; 
167                         }
168                         
169                         outputDir = validParameter.validFile(parameters, "outputdir", false);
170                         if (outputDir == "not found"){  
171                                 outputDir = ""; 
172                                 outputDir += m->hasPath(queryFileName); //if user entered a file with a path then preserve it   
173                         }
174                         
175                         //check for optional parameter and set defaults
176                         // ...at some point should added some additional type checking...
177                         temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
178                         convert(temp, threshold);  
179                                                 
180                         errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
181                         m->openOutputFile(errorSummaryFileName, errorSummaryFile);
182                         outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
183                         printErrorHeader();
184
185                         errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
186                         m->openOutputFile(errorSeqFileName, errorSeqFile);
187                         outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
188                         
189                         substitutionMatrix.resize(6);
190                         for(int i=0;i<6;i++){   substitutionMatrix[i].assign(6,0);      }
191                 }
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
195                 exit(1);
196         }
197 }
198
199 //**********************************************************************************************************************
200
201 void SeqErrorCommand::help(){
202         try {
203                 m->mothurOut("The seq.error command reads a query alignment file and a reference alignment file and creates .....\n");
204                 
205                 
206                 
207                 m->mothurOut("Example seq.error(...).\n");
208                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
209                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/seq.error .\n\n");
210         }
211         catch(exception& e) {
212                 m->errorOut(e, "SeqErrorCommand", "help");
213                 exit(1);
214         }
215 }
216
217 //***************************************************************************************************************
218
219 SeqErrorCommand::~SeqErrorCommand(){
220         errorSummaryFile.close();       
221         errorSeqFile.close();
222 }
223
224 //***************************************************************************************************************
225
226 int SeqErrorCommand::execute(){
227         try{
228                 if (abort == true) { return 0; }
229
230                 getReferences();        //read in reference sequences - make sure there's no ambiguous bases
231
232                 map<string, int> weights;
233                 if(namesFileName != ""){        weights = getWeights(); }
234                 
235                 ifstream queryFile;
236                 m->openInputFile(queryFileName, queryFile);
237                 
238                 ifstream reportFile;
239                 ifstream qualFile;
240
241                 ReportFile report;
242                 QualityScores quality;
243                 vector<vector<int> > qualForwardMap;
244                 vector<vector<int> > qualReverseMap;
245                 
246                 if(qualFileName != "" && reportFileName != ""){
247                         m->openInputFile(qualFileName, qualFile);
248                         report = ReportFile(reportFile, reportFileName);
249                         
250                         qualForwardMap.resize(1000);
251                         qualReverseMap.resize(1000);
252                         for(int i=0;i<1000;i++){
253                                 qualForwardMap[i].assign(100,0);
254                                 qualReverseMap[i].assign(100,0);
255                         }                               
256                 }
257                 
258                 int totalBases = 0;
259                 int totalMatches = 0;
260                 
261                 vector<int> misMatchCounts(11, 0);
262                 int maxMismatch = 0;
263                 int numSeqs = 0;
264                 
265                 map<string, int>::iterator it;
266                 map<char, vector<int> > qScoreErrorMap;
267                 qScoreErrorMap['m'].assign(41, 0);
268                 qScoreErrorMap['s'].assign(41, 0);
269                 qScoreErrorMap['i'].assign(41, 0);
270                 qScoreErrorMap['a'].assign(41, 0);
271                 
272                 
273                 
274                 map<char, vector<int> > errorForward;
275                 errorForward['m'].assign(1000,0);
276                 errorForward['s'].assign(1000,0);
277                 errorForward['i'].assign(1000,0);
278                 errorForward['d'].assign(1000,0);
279                 errorForward['a'].assign(1000,0);
280                 
281                 map<char, vector<int> > errorReverse;
282                 errorReverse['m'].assign(1000,0);
283                 errorReverse['s'].assign(1000,0);
284                 errorReverse['i'].assign(1000,0);
285                 errorReverse['d'].assign(1000,0);
286                 errorReverse['a'].assign(1000,0);
287                 
288                 
289                 
290                 while(queryFile){
291                         Compare minCompare;
292                         Sequence query(queryFile);
293                         
294                         for(int i=0;i<numRefs;i++){
295                                 Compare currCompare = getErrors(query, referenceSeqs[i]);
296                                 
297                                 if(currCompare.errorRate < minCompare.errorRate){
298                                         minCompare = currCompare;
299                                 }
300                         }
301
302                         if(namesFileName != ""){
303                                 it = weights.find(query.getName());
304                                 minCompare.weight = it->second;
305                         }
306                         else    {       minCompare.weight = 1;  }
307
308                         printErrorData(minCompare);
309
310                         for(int i=0;i<minCompare.total;i++){
311                                 char letter = minCompare.sequence[i];
312                                 errorForward[letter][i] += minCompare.weight;
313                                 errorReverse[letter][minCompare.total-i-1] += minCompare.weight;                                
314                         }
315                                                 
316                         if(qualFileName != "" && reportFileName != ""){
317                                 report = ReportFile(reportFile);
318                                 
319                                 int origLength = report.getQueryLength();
320                                 int startBase = report.getQueryStart();
321                                 int endBase = report.getQueryEnd();
322
323                                 quality = QualityScores(qualFile, origLength);
324                                 quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
325                                 quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight);
326                                 quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
327                         }                       
328                         
329                         if(minCompare.errorRate < threshold){
330                                 totalBases += (minCompare.total * minCompare.weight);
331                                 totalMatches += minCompare.matches * minCompare.weight;
332                                 if(minCompare.mismatches > maxMismatch){
333                                         maxMismatch = minCompare.mismatches;
334                                         misMatchCounts.resize(maxMismatch + 1, 0);
335                                 }                               
336                                 misMatchCounts[minCompare.mismatches] += minCompare.weight;
337                                 numSeqs++;
338                         }
339                         
340                         
341                 }
342                 queryFile.close();
343                 
344                 int total = 0;
345                 
346                 if(qualFileName != "" && reportFileName != ""){
347                         string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
348                         ofstream errorQualityFile;
349                         m->openOutputFile(errorQualityFileName, errorQualityFile);
350                         outputNames.push_back(errorQualityFileName);  outputTypes["error.quality"].push_back(errorQualityFileName);
351                         
352                         errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
353                         for(int i=0;i<41;i++){
354                                 errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
355                         }
356                         errorQualityFile.close();
357                         
358
359                         
360                         int lastRow = 0;
361                         int lastColumn = 0;
362                         
363                         for(int i=0;i<qualForwardMap.size();i++){
364                                 for(int j=0;j<qualForwardMap[i].size();j++){
365                                         if(qualForwardMap[i][j] != 0){
366                                                 if(lastRow < i)         {       lastRow = i+2;  }
367                                                 if(lastColumn < j)      {       lastColumn = j+2;       }
368                                         }
369                                         
370                                 }
371                         }
372
373                         
374                         string qualityForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.forward";
375                         ofstream qualityForwardFile;
376                         m->openOutputFile(qualityForwardFileName, qualityForwardFile);
377                         outputNames.push_back(errorQualityFileName);  outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
378                         
379                         for(int i=0;i<lastColumn;i++){  qualityForwardFile << '\t' << i;        }       qualityForwardFile << endl;
380                         for(int i=0;i<lastRow;i++){
381                                 qualityForwardFile << i+1;
382                                 for(int j=0;j<lastColumn;j++){
383                                         qualityForwardFile << '\t' << qualForwardMap[i][j];
384                                 }
385                                 qualityForwardFile << endl;
386                         }
387                         qualityForwardFile.close();
388                         
389                         
390                         string qualityReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.reverse";
391                         ofstream qualityReverseFile;
392                         m->openOutputFile(qualityReverseFileName, qualityReverseFile);
393                         outputNames.push_back(errorQualityFileName);  outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
394
395                         for(int i=0;i<lastColumn;i++){  qualityReverseFile << '\t' << i;        }       qualityReverseFile << endl;
396                         for(int i=0;i<lastRow;i++){
397                                 qualityReverseFile << i+1;
398                                 for(int j=0;j<lastColumn;j++){
399                                         qualityReverseFile << '\t' << qualReverseMap[i][j];
400                                 }
401                                 qualityReverseFile << endl;
402                         }
403                         
404                 }
405                 
406                 
407                 string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward";
408                 ofstream errorForwardFile;
409                 m->openOutputFile(errorForwardFileName, errorForwardFile);
410                 outputNames.push_back(errorForwardFileName);  outputTypes["error.forward"].push_back(errorForwardFileName);
411                 
412                 errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
413                 for(int i=0;i<1000;i++){
414                         float match = (float)errorForward['m'][i];
415                         float subst = (float)errorForward['s'][i];
416                         float insert = (float)errorForward['i'][i];
417                         float del = (float)errorForward['d'][i];
418                         float amb = (float)errorForward['a'][i];
419                         float total = match + subst + insert + del + amb;
420                         if(total == 0){ break;  }
421                         errorForwardFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
422                 }
423                 errorForwardFile.close();
424
425                 
426                 string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse";
427                 ofstream errorReverseFile;
428                 m->openOutputFile(errorReverseFileName, errorReverseFile);
429                 outputNames.push_back(errorReverseFileName);  outputTypes["error.reverse"].push_back(errorReverseFileName);
430                 
431                 errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
432                 for(int i=0;i<1000;i++){
433                         float match = (float)errorReverse['m'][i];
434                         float subst = (float)errorReverse['s'][i];
435                         float insert = (float)errorReverse['i'][i];
436                         float del = (float)errorReverse['d'][i];
437                         float amb = (float)errorReverse['a'][i];
438                         float total = match + subst + insert + del + amb;
439                         if(total == 0){ break;  }
440                         errorReverseFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
441                 }
442                 errorReverseFile.close();
443
444
445                 
446                 string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
447                 ofstream errorCountFile;
448                 m->openOutputFile(errorCountFileName, errorCountFile);
449                 outputNames.push_back(errorCountFileName);  outputTypes["error.count"].push_back(errorCountFileName);
450                 m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n\n");
451                 m->mothurOut("Errors\tSequences\n");
452                 errorCountFile << "Errors\tSequences\n";                
453                 for(int i=0;i<misMatchCounts.size();i++){
454                         m->mothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n');
455                         errorCountFile << i << '\t' << misMatchCounts[i] << endl;
456                 }
457                 errorCountFile.close();
458
459
460                 
461                 
462                 
463                 string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix";
464                 ofstream subMatrixFile;
465                 m->openOutputFile(subMatrixFileName, subMatrixFile);
466                 outputNames.push_back(subMatrixFileName);  outputTypes["error.matrix"].push_back(subMatrixFileName);
467                 vector<string> bases(6);
468                 bases[0] = "A";
469                 bases[1] = "T";
470                 bases[2] = "G";
471                 bases[3] = "C";
472                 bases[4] = "Gap";
473                 bases[5] = "N";
474                 vector<int> refSums(5,1);
475                 
476                 for(int i=0;i<5;i++){
477                         subMatrixFile << "\tr" << bases[i];
478                         
479                         for(int j=0;j<6;j++){
480                                 refSums[i] += substitutionMatrix[i][j];                         
481                         }
482                         
483                 }
484                 subMatrixFile << endl;
485                 
486                 for(int i=0;i<6;i++){
487                         subMatrixFile << 'q' << bases[i];
488                         for(int j=0;j<5;j++){
489                                 subMatrixFile << '\t' << substitutionMatrix[j][i];                              
490                         }
491                         subMatrixFile << endl;
492                 }
493                 subMatrixFile << "total";
494                 for(int i=0;i<5;i++){
495                         subMatrixFile << '\t' << refSums[i];
496                 }
497                 subMatrixFile << endl;
498                 subMatrixFile.close();
499                 
500                 
501                 
502                 return 0;       
503         }
504         catch(exception& e) {
505                 m->errorOut(e, "SeqErrorCommand", "execute");
506                 exit(1);
507         }
508 }
509
510 //***************************************************************************************************************
511
512 void SeqErrorCommand::getReferences(){
513         try {
514                 
515                 ifstream referenceFile;
516                 m->openInputFile(referenceFileName, referenceFile);
517                 
518                 while(referenceFile){
519                         Sequence currentSeq(referenceFile);
520                         int numAmbigs = currentSeq.getAmbigBases();
521                         
522                         if(numAmbigs != 0){
523                                 m->mothurOut("Warning: " + toString(currentSeq.getName()) + " has " + toString(numAmbigs) + " ambiguous bases, these bases will be removed\n");
524                                 currentSeq.removeAmbigBases();
525                         }
526                         referenceSeqs.push_back(currentSeq);
527                         m->gobble(referenceFile);
528                 }
529                 numRefs = referenceSeqs.size();
530                 
531                 referenceFile.close();
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "SeqErrorCommand", "getReferences");
535                 exit(1);
536         }
537 }
538
539 //***************************************************************************************************************
540
541 Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
542         try {
543                 if(query.getAlignLength() != reference.getAlignLength()){
544                         m->mothurOut("Warning: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n");
545                 }
546                 int alignLength = query.getAlignLength();
547         
548                 string q = query.getAligned();
549                 string r = reference.getAligned();
550
551                 int started = 0;
552                 Compare errors;
553
554                 for(int i=0;i<alignLength;i++){
555                         if(q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){                 //      no missing data and no double gaps
556                                 started = 1;
557                                 
558                                 if(q[i] == 'A'){
559                                         if(r[i] == 'A'){        errors.AA++;    errors.matches++;       errors.sequence += 'm'; }
560                                         if(r[i] == 'T'){        errors.AT++;    errors.sequence += 's'; }
561                                         if(r[i] == 'G'){        errors.AG++;    errors.sequence += 's'; }
562                                         if(r[i] == 'C'){        errors.AC++;    errors.sequence += 's'; }
563                                         if(r[i] == '-'){        errors.Ai++;    errors.sequence += 'i'; }
564                                 }
565                                 else if(q[i] == 'T'){
566                                         if(r[i] == 'A'){        errors.TA++;    errors.sequence += 's'; }
567                                         if(r[i] == 'T'){        errors.TT++;    errors.matches++;       errors.sequence += 'm'; }
568                                         if(r[i] == 'G'){        errors.TG++;    errors.sequence += 's'; }
569                                         if(r[i] == 'C'){        errors.TC++;    errors.sequence += 's'; }
570                                         if(r[i] == '-'){        errors.Ti++;    errors.sequence += 'i'; }
571                                 }
572                                 else if(q[i] == 'G'){
573                                         if(r[i] == 'A'){        errors.GA++;    errors.sequence += 's'; }
574                                         if(r[i] == 'T'){        errors.GT++;    errors.sequence += 's'; }
575                                         if(r[i] == 'G'){        errors.GG++;    errors.matches++;       errors.sequence += 'm'; }
576                                         if(r[i] == 'C'){        errors.GC++;    errors.sequence += 's'; }
577                                         if(r[i] == '-'){        errors.Gi++;    errors.sequence += 'i'; }
578                                 }
579                                 else if(q[i] == 'C'){
580                                         if(r[i] == 'A'){        errors.CA++;    errors.sequence += 's'; }
581                                         if(r[i] == 'T'){        errors.CT++;    errors.sequence += 's'; }
582                                         if(r[i] == 'G'){        errors.CG++;    errors.sequence += 's'; }
583                                         if(r[i] == 'C'){        errors.CC++;    errors.matches++;       errors.sequence += 'm'; }
584                                         if(r[i] == '-'){        errors.Ci++;    errors.sequence += 'i'; }
585                                 }
586                                 else if(q[i] == 'N'){
587                                         if(r[i] == 'A'){        errors.NA++;    errors.sequence += 'a'; }
588                                         if(r[i] == 'T'){        errors.NT++;    errors.sequence += 'a'; }
589                                         if(r[i] == 'G'){        errors.NG++;    errors.sequence += 'a'; }
590                                         if(r[i] == 'C'){        errors.NC++;    errors.sequence += 'a'; }
591                                         if(r[i] == '-'){        errors.Ni++;    errors.sequence += 'a'; }
592                                 }
593                                 else if(q[i] == '-' && r[i] != '-'){
594                                         if(r[i] == 'A'){        errors.dA++;    errors.sequence += 'd'; }
595                                         if(r[i] == 'T'){        errors.dT++;    errors.sequence += 'd'; }
596                                         if(r[i] == 'G'){        errors.dG++;    errors.sequence += 'd'; }
597                                         if(r[i] == 'C'){        errors.dC++;    errors.sequence += 'd'; }
598                                 }
599                                 errors.total++; 
600                                 
601                         }
602                         else if(q[i] == '.' && r[i] != '.'){            //      reference extends beyond query
603                                 if(started == 1){       break;  }
604                         }
605                         else if(q[i] != '.' && r[i] == '.'){            //      query extends beyond reference
606                                 m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ".  Ignoring the extra bases in the query\n");
607                                 if(started == 1){       break;  }
608                         }
609                         else if(q[i] == '.' && r[i] == '.'){            //      both are missing data
610                                 if(started == 1){       break;  }                       
611                         }
612                         
613                 }
614                 errors.mismatches = errors.total-errors.matches;
615                 errors.errorRate = (double)(errors.total-errors.matches) / (double)errors.total;
616                 errors.queryName = query.getName();
617                 errors.refName = reference.getName();
618                 
619                 return errors;
620         }
621         catch(exception& e) {
622                 m->errorOut(e, "SeqErrorCommand", "getErrors");
623                 exit(1);
624         }
625 }
626
627 //***************************************************************************************************************
628
629 map<string, int> SeqErrorCommand::getWeights(){
630         ifstream nameFile;
631         m->openInputFile(namesFileName, nameFile);
632         
633         string seqName;
634         string redundantSeqs;
635         map<string, int> nameCountMap;
636         
637         while(nameFile){
638                 nameFile >> seqName >> redundantSeqs;
639                 nameCountMap[seqName] = m->getNumNames(redundantSeqs); 
640                 m->gobble(nameFile);
641         }
642         return nameCountMap;
643 }
644
645
646 //***************************************************************************************************************
647
648 void SeqErrorCommand::printErrorHeader(){
649         try {
650                 errorSummaryFile << "query\treference\tweight\t";
651                 errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
652                 errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\n";
653                 
654                 errorSummaryFile << setprecision(6);
655                 errorSummaryFile.setf(ios::fixed);
656         }
657         catch(exception& e) {
658                 m->errorOut(e, "SeqErrorCommand", "printErrorHeader");
659                 exit(1);
660         }
661 }
662
663 //***************************************************************************************************************
664
665 void SeqErrorCommand::printErrorData(Compare error){
666         try {
667                 errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
668                 errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
669                 errorSummaryFile << error.TA << '\t' << error.TT << '\t' << error.TG << '\t' << error.TC << '\t';
670                 errorSummaryFile << error.GA << '\t' << error.GT << '\t' << error.GG << '\t' << error.GC << '\t';
671                 errorSummaryFile << error.CA << '\t' << error.CT << '\t' << error.CG << '\t' << error.CC << '\t';
672                 errorSummaryFile << error.NA << '\t' << error.NT << '\t' << error.NG << '\t' << error.NC << '\t';
673                 errorSummaryFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t' ;
674                 errorSummaryFile << error.dA << '\t' << error.dT << '\t' << error.dG << '\t' << error.dC << '\t';
675                 
676                 errorSummaryFile << error.Ai + error.Ti + error.Gi + error.Ci << '\t';                  //insertions
677                 errorSummaryFile << error.dA + error.dT + error.dG + error.dC << '\t';                  //deletions
678                 errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t';      //substitutions
679                 errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t';       //ambiguities
680                 errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << endl;
681                 
682                 errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl;
683                 
684                 
685                 int a=0;                int t=1;                int g=2;                int c=3;
686                 int gap=4;              int n=5;
687                 
688                 substitutionMatrix[a][a] += error.weight * error.AA;
689                 substitutionMatrix[a][t] += error.weight * error.TA;
690                 substitutionMatrix[a][g] += error.weight * error.GA;
691                 substitutionMatrix[a][c] += error.weight * error.CA;
692                 substitutionMatrix[a][gap] += error.weight * error.dA;
693                 substitutionMatrix[a][n] += error.weight * error.NA;
694
695                 substitutionMatrix[t][a] += error.weight * error.AT;
696                 substitutionMatrix[t][t] += error.weight * error.TT;
697                 substitutionMatrix[t][g] += error.weight * error.GT;
698                 substitutionMatrix[t][c] += error.weight * error.CT;
699                 substitutionMatrix[t][gap] += error.weight * error.dT;
700                 substitutionMatrix[t][n] += error.weight * error.NT;
701
702                 substitutionMatrix[g][a] += error.weight * error.AG;
703                 substitutionMatrix[g][t] += error.weight * error.TG;
704                 substitutionMatrix[g][g] += error.weight * error.GG;
705                 substitutionMatrix[g][c] += error.weight * error.CG;
706                 substitutionMatrix[g][gap] += error.weight * error.dG;
707                 substitutionMatrix[g][n] += error.weight * error.NG;
708
709                 substitutionMatrix[c][a] += error.weight * error.AC;
710                 substitutionMatrix[c][t] += error.weight * error.TC;
711                 substitutionMatrix[c][g] += error.weight * error.GC;
712                 substitutionMatrix[c][c] += error.weight * error.CC;
713                 substitutionMatrix[c][gap] += error.weight * error.dC;
714                 substitutionMatrix[c][n] += error.weight * error.NC;
715
716                 substitutionMatrix[gap][a] += error.weight * error.Ai;
717                 substitutionMatrix[gap][t] += error.weight * error.Ti;
718                 substitutionMatrix[gap][g] += error.weight * error.Gi;
719                 substitutionMatrix[gap][c] += error.weight * error.Ci;
720                 substitutionMatrix[gap][n] += error.weight * error.Ni;
721                 
722         }
723         catch(exception& e) {
724                 m->errorOut(e, "SeqErrorCommand", "printErrorData");
725                 exit(1);
726         }
727 }
728
729 //***************************************************************************************************************
730
731
732
733
734
735
736