]> git.donarmstrong.com Git - mothur.git/blob - seqerrorcommand.cpp
*** empty log message ***
[mothur.git] / seqerrorcommand.cpp
1 /*
2  *  seqerrorcommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 7/15/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "seqerrorcommand.h"
11 #include "reportfile.h"
12 #include "qualityscores.h"
13 #include "refchimeratest.h"
14
15 //**********************************************************************************************************************
16 vector<string> SeqErrorCommand::getValidParameters(){   
17         try {
18                 string Array[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
19                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "SeqErrorCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 SeqErrorCommand::SeqErrorCommand(){     
29         try {
30                 abort = true;
31                 //initialize outputTypes
32                 vector<string> tempOutNames;
33                 outputTypes["error.summary"] = tempOutNames;
34                 outputTypes["error.seq"] = tempOutNames;
35                 outputTypes["error.quality"] = tempOutNames;
36                 outputTypes["error.qual.forward"] = tempOutNames;
37                 outputTypes["error.qual.reverse"] = tempOutNames;
38                 outputTypes["error.forward"] = tempOutNames;
39                 outputTypes["error.reverse"] = tempOutNames;
40                 outputTypes["error.count"] = tempOutNames;
41                 outputTypes["error.matrix"] = tempOutNames;
42         }
43         catch(exception& e) {
44                 m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
45                 exit(1);
46         }
47 }
48 //**********************************************************************************************************************
49 vector<string> SeqErrorCommand::getRequiredParameters(){        
50         try {
51                 string Array[] =  {"query","reference"};
52                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "SeqErrorCommand", "getRequiredParameters");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 vector<string> SeqErrorCommand::getRequiredFiles(){     
62         try {
63                 vector<string> myArray;
64                 return myArray;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "SeqErrorCommand", "getRequiredFiles");
68                 exit(1);
69         }
70 }
71 //***************************************************************************************************************
72
73 SeqErrorCommand::SeqErrorCommand(string option)  {
74         try {
75                 
76                 abort = false;
77                 
78                 //allow user to run help
79                 if(option == "help") { help(); abort = true; }
80                 
81                 else {
82                         string temp;
83                         
84                         //valid paramters for this command
85                         string AlignArray[] =  {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "ignorechimeras", "outputdir"};
86                         
87                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
88                         
89                         OptionParser parser(option);
90                         map<string,string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string,string>::iterator it;
94                         
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["error.summary"] = tempOutNames;
103                         outputTypes["error.seq"] = tempOutNames;
104                         outputTypes["error.quality"] = tempOutNames;
105                         outputTypes["error.qual.forward"] = tempOutNames;
106                         outputTypes["error.qual.reverse"] = tempOutNames;
107                         outputTypes["error.forward"] = tempOutNames;
108                         outputTypes["error.reverse"] = tempOutNames;
109                         outputTypes["error.count"] = tempOutNames;
110                         outputTypes["error.matrix"] = tempOutNames;
111
112                         
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("query");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["query"] = inputDir + it->second;            }
124                                 }
125                                 
126                                 it = parameters.find("reference");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
132                                 }
133                                 
134                                 it = parameters.find("name");
135                                 //user has given a names file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
140                                 }
141
142                                 it = parameters.find("qfile");
143                                 //user has given a quality score file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
148                                 }
149                                 
150                                 it = parameters.find("report");
151                                 //user has given a alignment report file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["report"] = inputDir + it->second;           }
156                                 }
157                                 
158                         }
159                         //check for required parameters
160                         queryFileName = validParameter.validFile(parameters, "query", true);
161                         if (queryFileName == "not found") { m->mothurOut("query is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
162                         else if (queryFileName == "not open") { abort = true; } 
163                         
164                         referenceFileName = validParameter.validFile(parameters, "reference", true);
165                         if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
166                         else if (referenceFileName == "not open") { abort = true; }     
167                         
168
169                         //check for optional parameters
170                         namesFileName = validParameter.validFile(parameters, "name", true);
171                         if(namesFileName == "not found"){       namesFileName = "";     }
172                         
173                         qualFileName = validParameter.validFile(parameters, "qfile", true);
174                         if(qualFileName == "not found"){        qualFileName = "";      }
175
176                         reportFileName = validParameter.validFile(parameters, "report", true);
177                         if(reportFileName == "not found"){      reportFileName = "";    }
178                         
179                         if((reportFileName != "" && qualFileName == "") || (reportFileName == "" && qualFileName != "")){
180                                 m->mothurOut("if you use either a qual file or a report file, you have to have both.");
181                                 m->mothurOutEndLine();
182                                 abort = true; 
183                         }
184                         
185                         outputDir = validParameter.validFile(parameters, "outputdir", false);
186                         if (outputDir == "not found"){  
187                                 outputDir = ""; 
188                                 outputDir += m->hasPath(queryFileName); //if user entered a file with a path then preserve it   
189                         }
190                         
191                         //check for optional parameter and set defaults
192                         // ...at some point should added some additional type checking...
193                         temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
194                         convert(temp, threshold);  
195                         
196                         temp = validParameter.validFile(parameters, "ignorechimeras", false);   if (temp == "not found") { temp = "1"; }
197                         convert(temp, ignoreChimeras);  
198
199                         substitutionMatrix.resize(6);
200                         for(int i=0;i<6;i++){   substitutionMatrix[i].assign(6,0);      }
201                 }
202         }
203         catch(exception& e) {
204                 m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
205                 exit(1);
206         }
207 }
208
209 //**********************************************************************************************************************
210
211 void SeqErrorCommand::help(){
212         try {
213                 m->mothurOut("The seq.error command reads a query alignment file and a reference alignment file and creates .....\n");
214                 m->mothurOut("Example seq.error(...).\n");
215                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
216                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/seq.error .\n\n");
217         }
218         catch(exception& e) {
219                 m->errorOut(e, "SeqErrorCommand", "help");
220                 exit(1);
221         }
222 }
223
224 //***************************************************************************************************************
225
226 SeqErrorCommand::~SeqErrorCommand(){
227
228 }
229
230 //***************************************************************************************************************
231
232 int SeqErrorCommand::execute(){
233         try{
234                 if (abort == true) { return 0; }
235
236                 string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
237                 m->openOutputFile(errorSummaryFileName, errorSummaryFile);
238                 outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
239                 printErrorHeader();
240                 
241                 string errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
242                 m->openOutputFile(errorSeqFileName, errorSeqFile);
243                 outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
244
245                 getReferences();        //read in reference sequences - make sure there's no ambiguous bases
246
247                 map<string, int> weights;
248                 if(namesFileName != ""){        weights = getWeights(); }
249                 
250                 ifstream queryFile;
251                 m->openInputFile(queryFileName, queryFile);
252                 
253                 ifstream reportFile;
254                 ifstream qualFile;
255
256                 ReportFile report;
257                 QualityScores quality;
258                 vector<vector<int> > qualForwardMap;
259                 vector<vector<int> > qualReverseMap;
260                 
261                 if(qualFileName != "" && reportFileName != ""){
262                         m->openInputFile(qualFileName, qualFile);
263                         report = ReportFile(reportFile, reportFileName);
264                         
265                         qualForwardMap.resize(1000);
266                         qualReverseMap.resize(1000);
267                         for(int i=0;i<1000;i++){
268                                 qualForwardMap[i].assign(100,0);
269                                 qualReverseMap[i].assign(100,0);
270                         }                               
271                 }
272                 
273                 int totalBases = 0;
274                 int totalMatches = 0;
275                 
276                 vector<int> misMatchCounts(11, 0);
277                 int maxMismatch = 0;
278                 int numSeqs = 0;
279                 
280                 map<string, int>::iterator it;
281                 map<char, vector<int> > qScoreErrorMap;
282                 qScoreErrorMap['m'].assign(41, 0);
283                 qScoreErrorMap['s'].assign(41, 0);
284                 qScoreErrorMap['i'].assign(41, 0);
285                 qScoreErrorMap['a'].assign(41, 0);
286                 
287                 map<char, vector<int> > errorForward;
288                 errorForward['m'].assign(1000,0);
289                 errorForward['s'].assign(1000,0);
290                 errorForward['i'].assign(1000,0);
291                 errorForward['d'].assign(1000,0);
292                 errorForward['a'].assign(1000,0);
293                 
294                 map<char, vector<int> > errorReverse;
295                 errorReverse['m'].assign(1000,0);
296                 errorReverse['s'].assign(1000,0);
297                 errorReverse['i'].assign(1000,0);
298                 errorReverse['d'].assign(1000,0);
299                 errorReverse['a'].assign(1000,0);       
300                 
301
302                 string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
303                 RefChimeraTest chimeraTest(referenceSeqs, errorChimeraFileName);
304                 outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
305                 
306                 int index = 0;
307                 bool ignoreSeq = 0;
308                 
309                 while(queryFile){
310                         
311                         if (m->control_pressed) { errorSummaryFile.close();     errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
312                 
313                         Sequence query(queryFile);
314                                                 
315                         int numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned());
316                         int closestRefIndex = chimeraTest.getClosestRefIndex();
317
318                         if(numParentSeqs > 1 && ignoreChimeras == 1)    {       ignoreSeq = 1;  }
319                         else                                                                                    {       ignoreSeq = 0;  }
320
321
322                         Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
323                         
324                         if(namesFileName != ""){
325                                 it = weights.find(query.getName());
326                                 minCompare.weight = it->second;
327                         }
328                         else    {       minCompare.weight = 1;  }
329
330                         printErrorData(minCompare, numParentSeqs);
331
332                         if(!ignoreSeq){
333                                 for(int i=0;i<minCompare.total;i++){
334                                         char letter = minCompare.sequence[i];
335                                         errorForward[letter][i] += minCompare.weight;
336                                         errorReverse[letter][minCompare.total-i-1] += minCompare.weight;                                
337                                 }
338                         }
339                         
340                         if(qualFileName != "" && reportFileName != ""){
341                                 report = ReportFile(reportFile);
342                                 
343                                 int origLength = report.getQueryLength();
344                                 int startBase = report.getQueryStart();
345                                 int endBase = report.getQueryEnd();
346
347                                 quality = QualityScores(qualFile, origLength);
348
349                                 if(!ignoreSeq){
350                                         quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
351                                         quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight);
352                                         quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
353                                 }
354                         }                       
355                         
356                         if(minCompare.errorRate < threshold && !ignoreSeq){
357                                 totalBases += (minCompare.total * minCompare.weight);
358                                 totalMatches += minCompare.matches * minCompare.weight;
359                                 if(minCompare.mismatches > maxMismatch){
360                                         maxMismatch = minCompare.mismatches;
361                                         misMatchCounts.resize(maxMismatch + 1, 0);
362                                 }                               
363                                 misMatchCounts[minCompare.mismatches] += minCompare.weight;
364                                 numSeqs++;
365                         }
366                         
367                         index++;
368                         if(index % 1000 == 0){  cout << index << endl;  }
369                 }
370                 queryFile.close();
371                 errorSummaryFile.close();       
372                 errorSeqFile.close();
373
374                 if(qualFileName != "" && reportFileName != ""){         
375                         printErrorQuality(qScoreErrorMap);
376                         printQualityFR(qualForwardMap, qualReverseMap);
377                 }
378                 
379                 printErrorFRFile(errorForward, errorReverse);
380                 
381                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
382
383                 string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
384                 ofstream errorCountFile;
385                 m->openOutputFile(errorCountFileName, errorCountFile);
386                 outputNames.push_back(errorCountFileName);  outputTypes["error.count"].push_back(errorCountFileName);
387                 m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n\n");
388                 m->mothurOut("Errors\tSequences\n");
389                 errorCountFile << "Errors\tSequences\n";                
390                 for(int i=0;i<misMatchCounts.size();i++){
391                         m->mothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n');
392                         errorCountFile << i << '\t' << misMatchCounts[i] << endl;
393                 }
394                 errorCountFile.close();
395                 
396                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
397
398                 printSubMatrix();
399                                 
400                 m->mothurOutEndLine();
401                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
402                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
403                 m->mothurOutEndLine();
404                 
405                 return 0;       
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "SeqErrorCommand", "execute");
409                 exit(1);
410         }
411 }
412
413 //***************************************************************************************************************
414
415 void SeqErrorCommand::getReferences(){
416         try {
417                 
418                 ifstream referenceFile;
419                 m->openInputFile(referenceFileName, referenceFile);
420                 
421                 while(referenceFile){
422                         Sequence currentSeq(referenceFile);
423                         int numAmbigs = currentSeq.getAmbigBases();
424                         
425                         if(numAmbigs != 0){
426                                 m->mothurOut("Warning: " + toString(currentSeq.getName()) + " has " + toString(numAmbigs) + " ambiguous bases, these bases will be removed\n");
427                                 currentSeq.removeAmbigBases();
428                         }
429                         referenceSeqs.push_back(currentSeq);
430                         m->gobble(referenceFile);
431                 }
432                 numRefs = referenceSeqs.size();
433                 
434                 referenceFile.close();
435         }
436         catch(exception& e) {
437                 m->errorOut(e, "SeqErrorCommand", "getReferences");
438                 exit(1);
439         }
440 }
441
442 //***************************************************************************************************************
443
444 Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
445         try {
446                 if(query.getAlignLength() != reference.getAlignLength()){
447                         m->mothurOut("Warning: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n");
448                 }
449                 int alignLength = query.getAlignLength();
450         
451                 string q = query.getAligned();
452                 string r = reference.getAligned();
453
454                 int started = 0;
455                 Compare errors;
456
457                 for(int i=0;i<alignLength;i++){
458                         if(q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){                 //      no missing data and no double gaps
459                                 started = 1;
460                                 
461                                 if(q[i] == 'A'){
462                                         if(r[i] == 'A'){        errors.AA++;    errors.matches++;       errors.sequence += 'm'; }
463                                         if(r[i] == 'T'){        errors.AT++;    errors.sequence += 's'; }
464                                         if(r[i] == 'G'){        errors.AG++;    errors.sequence += 's'; }
465                                         if(r[i] == 'C'){        errors.AC++;    errors.sequence += 's'; }
466                                         if(r[i] == '-'){        errors.Ai++;    errors.sequence += 'i'; }
467                                 }
468                                 else if(q[i] == 'T'){
469                                         if(r[i] == 'A'){        errors.TA++;    errors.sequence += 's'; }
470                                         if(r[i] == 'T'){        errors.TT++;    errors.matches++;       errors.sequence += 'm'; }
471                                         if(r[i] == 'G'){        errors.TG++;    errors.sequence += 's'; }
472                                         if(r[i] == 'C'){        errors.TC++;    errors.sequence += 's'; }
473                                         if(r[i] == '-'){        errors.Ti++;    errors.sequence += 'i'; }
474                                 }
475                                 else if(q[i] == 'G'){
476                                         if(r[i] == 'A'){        errors.GA++;    errors.sequence += 's'; }
477                                         if(r[i] == 'T'){        errors.GT++;    errors.sequence += 's'; }
478                                         if(r[i] == 'G'){        errors.GG++;    errors.matches++;       errors.sequence += 'm'; }
479                                         if(r[i] == 'C'){        errors.GC++;    errors.sequence += 's'; }
480                                         if(r[i] == '-'){        errors.Gi++;    errors.sequence += 'i'; }
481                                 }
482                                 else if(q[i] == 'C'){
483                                         if(r[i] == 'A'){        errors.CA++;    errors.sequence += 's'; }
484                                         if(r[i] == 'T'){        errors.CT++;    errors.sequence += 's'; }
485                                         if(r[i] == 'G'){        errors.CG++;    errors.sequence += 's'; }
486                                         if(r[i] == 'C'){        errors.CC++;    errors.matches++;       errors.sequence += 'm'; }
487                                         if(r[i] == '-'){        errors.Ci++;    errors.sequence += 'i'; }
488                                 }
489                                 else if(q[i] == 'N'){
490                                         if(r[i] == 'A'){        errors.NA++;    errors.sequence += 'a'; }
491                                         if(r[i] == 'T'){        errors.NT++;    errors.sequence += 'a'; }
492                                         if(r[i] == 'G'){        errors.NG++;    errors.sequence += 'a'; }
493                                         if(r[i] == 'C'){        errors.NC++;    errors.sequence += 'a'; }
494                                         if(r[i] == '-'){        errors.Ni++;    errors.sequence += 'a'; }
495                                 }
496                                 else if(q[i] == '-' && r[i] != '-'){
497                                         if(r[i] == 'A'){        errors.dA++;    errors.sequence += 'd'; }
498                                         if(r[i] == 'T'){        errors.dT++;    errors.sequence += 'd'; }
499                                         if(r[i] == 'G'){        errors.dG++;    errors.sequence += 'd'; }
500                                         if(r[i] == 'C'){        errors.dC++;    errors.sequence += 'd'; }
501                                 }
502                                 errors.total++; 
503                                 
504                         }
505                         else if(q[i] == '.' && r[i] != '.'){            //      reference extends beyond query
506                                 if(started == 1){       break;  }
507                         }
508                         else if(q[i] != '.' && r[i] == '.'){            //      query extends beyond reference
509                                 m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ".  Ignoring the extra bases in the query\n");
510                                 if(started == 1){       break;  }
511                         }
512                         else if(q[i] == '.' && r[i] == '.'){            //      both are missing data
513                                 if(started == 1){       break;  }                       
514                         }
515                         
516                 }
517                 errors.mismatches = errors.total-errors.matches;
518                 errors.errorRate = (double)(errors.total-errors.matches) / (double)errors.total;
519                 errors.queryName = query.getName();
520                 errors.refName = reference.getName();
521                 
522                 return errors;
523         }
524         catch(exception& e) {
525                 m->errorOut(e, "SeqErrorCommand", "getErrors");
526                 exit(1);
527         }
528 }
529
530 //***************************************************************************************************************
531
532 map<string, int> SeqErrorCommand::getWeights(){
533         ifstream nameFile;
534         m->openInputFile(namesFileName, nameFile);
535         
536         string seqName;
537         string redundantSeqs;
538         map<string, int> nameCountMap;
539         
540         while(nameFile){
541                 nameFile >> seqName >> redundantSeqs;
542                 nameCountMap[seqName] = m->getNumNames(redundantSeqs); 
543                 m->gobble(nameFile);
544         }
545         return nameCountMap;
546 }
547
548
549 //***************************************************************************************************************
550
551 void SeqErrorCommand::printErrorHeader(){
552         try {
553                 errorSummaryFile << "query\treference\tweight\t";
554                 errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
555                 errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\tnumparents\n";
556                 
557                 errorSummaryFile << setprecision(6);
558                 errorSummaryFile.setf(ios::fixed);
559         }
560         catch(exception& e) {
561                 m->errorOut(e, "SeqErrorCommand", "printErrorHeader");
562                 exit(1);
563         }
564 }
565
566 //***************************************************************************************************************
567
568 void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs){
569         try {
570                 errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
571                 errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
572                 errorSummaryFile << error.TA << '\t' << error.TT << '\t' << error.TG << '\t' << error.TC << '\t';
573                 errorSummaryFile << error.GA << '\t' << error.GT << '\t' << error.GG << '\t' << error.GC << '\t';
574                 errorSummaryFile << error.CA << '\t' << error.CT << '\t' << error.CG << '\t' << error.CC << '\t';
575                 errorSummaryFile << error.NA << '\t' << error.NT << '\t' << error.NG << '\t' << error.NC << '\t';
576                 errorSummaryFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t' ;
577                 errorSummaryFile << error.dA << '\t' << error.dT << '\t' << error.dG << '\t' << error.dC << '\t';
578                 
579                 errorSummaryFile << error.Ai + error.Ti + error.Gi + error.Ci << '\t';                  //insertions
580                 errorSummaryFile << error.dA + error.dT + error.dG + error.dC << '\t';                  //deletions
581                 errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t';      //substitutions
582                 errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t';       //ambiguities
583                 errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << '\t' << numParentSeqs << endl;
584                 
585                 errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl;
586                 
587                 
588                 int a=0;                int t=1;                int g=2;                int c=3;
589                 int gap=4;              int n=5;
590                 if(numParentSeqs == 1 || ignoreChimeras == 0){
591                         substitutionMatrix[a][a] += error.weight * error.AA;
592                         substitutionMatrix[a][t] += error.weight * error.TA;
593                         substitutionMatrix[a][g] += error.weight * error.GA;
594                         substitutionMatrix[a][c] += error.weight * error.CA;
595                         substitutionMatrix[a][gap] += error.weight * error.dA;
596                         substitutionMatrix[a][n] += error.weight * error.NA;
597
598                         substitutionMatrix[t][a] += error.weight * error.AT;
599                         substitutionMatrix[t][t] += error.weight * error.TT;
600                         substitutionMatrix[t][g] += error.weight * error.GT;
601                         substitutionMatrix[t][c] += error.weight * error.CT;
602                         substitutionMatrix[t][gap] += error.weight * error.dT;
603                         substitutionMatrix[t][n] += error.weight * error.NT;
604
605                         substitutionMatrix[g][a] += error.weight * error.AG;
606                         substitutionMatrix[g][t] += error.weight * error.TG;
607                         substitutionMatrix[g][g] += error.weight * error.GG;
608                         substitutionMatrix[g][c] += error.weight * error.CG;
609                         substitutionMatrix[g][gap] += error.weight * error.dG;
610                         substitutionMatrix[g][n] += error.weight * error.NG;
611
612                         substitutionMatrix[c][a] += error.weight * error.AC;
613                         substitutionMatrix[c][t] += error.weight * error.TC;
614                         substitutionMatrix[c][g] += error.weight * error.GC;
615                         substitutionMatrix[c][c] += error.weight * error.CC;
616                         substitutionMatrix[c][gap] += error.weight * error.dC;
617                         substitutionMatrix[c][n] += error.weight * error.NC;
618
619                         substitutionMatrix[gap][a] += error.weight * error.Ai;
620                         substitutionMatrix[gap][t] += error.weight * error.Ti;
621                         substitutionMatrix[gap][g] += error.weight * error.Gi;
622                         substitutionMatrix[gap][c] += error.weight * error.Ci;
623                         substitutionMatrix[gap][n] += error.weight * error.Ni;
624                 }
625         }
626         catch(exception& e) {
627                 m->errorOut(e, "SeqErrorCommand", "printErrorData");
628                 exit(1);
629         }
630 }
631
632 //***************************************************************************************************************
633
634 void SeqErrorCommand::printSubMatrix(){
635         try {
636                 string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix";
637                 ofstream subMatrixFile;
638                 m->openOutputFile(subMatrixFileName, subMatrixFile);
639                 outputNames.push_back(subMatrixFileName);  outputTypes["error.matrix"].push_back(subMatrixFileName);
640                 vector<string> bases(6);
641                 bases[0] = "A";
642                 bases[1] = "T";
643                 bases[2] = "G";
644                 bases[3] = "C";
645                 bases[4] = "Gap";
646                 bases[5] = "N";
647                 vector<int> refSums(5,1);
648
649                 for(int i=0;i<5;i++){
650                         subMatrixFile << "\tr" << bases[i];
651                         
652                         for(int j=0;j<6;j++){
653                                 refSums[i] += substitutionMatrix[i][j];                         
654                         }
655                 }
656                 subMatrixFile << endl;
657                 
658                 for(int i=0;i<6;i++){
659                         subMatrixFile << 'q' << bases[i];
660                         for(int j=0;j<5;j++){
661                                 subMatrixFile << '\t' << substitutionMatrix[j][i];                              
662                         }
663                         subMatrixFile << endl;
664                 }
665
666                 subMatrixFile << "total";
667                 for(int i=0;i<5;i++){
668                         subMatrixFile << '\t' << refSums[i];
669                 }
670                 subMatrixFile << endl;
671                 subMatrixFile.close();
672         }
673         catch(exception& e) {
674                 m->errorOut(e, "SeqErrorCommand", "printSubMatrix");
675                 exit(1);
676         }
677 }
678 //***************************************************************************************************************
679
680 void SeqErrorCommand::printErrorFRFile(map<char, vector<int> > errorForward, map<char, vector<int> > errorReverse){
681         try{
682                 string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward";
683                 ofstream errorForwardFile;
684                 m->openOutputFile(errorForwardFileName, errorForwardFile);
685                 outputNames.push_back(errorForwardFileName);  outputTypes["error.forward"].push_back(errorForwardFileName);
686
687                 errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
688                 for(int i=0;i<1000;i++){
689                         float match = (float)errorForward['m'][i];
690                         float subst = (float)errorForward['s'][i];
691                         float insert = (float)errorForward['i'][i];
692                         float del = (float)errorForward['d'][i];
693                         float amb = (float)errorForward['a'][i];
694                         float total = match + subst + insert + del + amb;
695                         if(total == 0){ break;  }
696                         errorForwardFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
697                 }
698                 errorForwardFile.close();
699
700                 string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse";
701                 ofstream errorReverseFile;
702                 m->openOutputFile(errorReverseFileName, errorReverseFile);
703                 outputNames.push_back(errorReverseFileName);  outputTypes["error.reverse"].push_back(errorReverseFileName);
704
705                 errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
706                 for(int i=0;i<1000;i++){
707                         float match = (float)errorReverse['m'][i];
708                         float subst = (float)errorReverse['s'][i];
709                         float insert = (float)errorReverse['i'][i];
710                         float del = (float)errorReverse['d'][i];
711                         float amb = (float)errorReverse['a'][i];
712                         float total = match + subst + insert + del + amb;
713                         if(total == 0){ break;  }
714                         errorReverseFile << i+1 << '\t' << total << '\t' << match/total  << '\t' << subst/total  << '\t' << insert/total  << '\t' << del/total  << '\t' << amb/total << endl;
715                 }
716                 errorReverseFile.close();
717         }
718         catch(exception& e) {
719                 m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
720                 exit(1);
721         }
722 }
723
724 //***************************************************************************************************************
725
726 void SeqErrorCommand::printErrorQuality(map<char, vector<int> > qScoreErrorMap){
727         try{
728
729                 string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
730                 ofstream errorQualityFile;
731                 m->openOutputFile(errorQualityFileName, errorQualityFile);
732                 outputNames.push_back(errorQualityFileName);  outputTypes["error.quality"].push_back(errorQualityFileName);
733
734                 errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
735                 for(int i=0;i<41;i++){
736                         errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
737                 }
738                 errorQualityFile.close();
739         }
740         catch(exception& e) {
741                 m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
742                 exit(1);
743         }
744 }
745
746
747 //***************************************************************************************************************
748
749 void SeqErrorCommand::printQualityFR(vector<vector<int> > qualForwardMap, vector<vector<int> > qualReverseMap){
750         try{
751
752
753                 int lastRow = 0;
754                 int lastColumn = 0;
755
756                 for(int i=0;i<qualForwardMap.size();i++){
757                         for(int j=0;j<qualForwardMap[i].size();j++){
758                                 if(qualForwardMap[i][j] != 0){
759                                         if(lastRow < i)         {       lastRow = i+2;          }
760                                         if(lastColumn < j)      {       lastColumn = j+2;       }
761                                 }
762                         }
763                 }
764
765                 string qualityForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.forward";
766                 ofstream qualityForwardFile;
767                 m->openOutputFile(qualityForwardFileName, qualityForwardFile);
768                 outputNames.push_back(qualityForwardFileName);  outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
769
770                 for(int i=0;i<lastColumn;i++){  qualityForwardFile << '\t' << i;        }       qualityForwardFile << endl;
771
772                 for(int i=0;i<lastRow;i++){
773                         qualityForwardFile << i+1;
774                         for(int j=0;j<lastColumn;j++){
775                                 qualityForwardFile << '\t' << qualForwardMap[i][j];
776                         }
777
778                         qualityForwardFile << endl;
779                 }
780                 qualityForwardFile.close();
781
782                 
783                 string qualityReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.reverse";
784                 ofstream qualityReverseFile;
785                 m->openOutputFile(qualityReverseFileName, qualityReverseFile);
786                 outputNames.push_back(qualityReverseFileName);  outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
787                 
788                 for(int i=0;i<lastColumn;i++){  qualityReverseFile << '\t' << i;        }       qualityReverseFile << endl;
789                 for(int i=0;i<lastRow;i++){
790                         
791                         qualityReverseFile << i+1;
792                         for(int j=0;j<lastColumn;j++){
793                                 qualityReverseFile << '\t' << qualReverseMap[i][j];
794                         }
795                         qualityReverseFile << endl;
796                 }
797                 qualityReverseFile.close();
798         }
799         catch(exception& e) {
800                 m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
801                 exit(1);
802         }
803 }
804
805
806 //***************************************************************************************************************