5 * Created by Pat Schloss on 7/15/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "seqerrorcommand.h"
11 #include "reportfile.h"
12 #include "qualityscores.h"
13 #include "refchimeratest.h"
15 //**********************************************************************************************************************
16 vector<string> SeqErrorCommand::getValidParameters(){
18 string Array[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
19 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
23 m->errorOut(e, "SeqErrorCommand", "getValidParameters");
27 //**********************************************************************************************************************
28 SeqErrorCommand::SeqErrorCommand(){
31 //initialize outputTypes
32 vector<string> tempOutNames;
33 outputTypes["error.summary"] = tempOutNames;
34 outputTypes["error.seq"] = tempOutNames;
35 outputTypes["error.quality"] = tempOutNames;
36 outputTypes["error.qual.forward"] = tempOutNames;
37 outputTypes["error.qual.reverse"] = tempOutNames;
38 outputTypes["error.forward"] = tempOutNames;
39 outputTypes["error.reverse"] = tempOutNames;
40 outputTypes["error.count"] = tempOutNames;
41 outputTypes["error.matrix"] = tempOutNames;
44 m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
48 //**********************************************************************************************************************
49 vector<string> SeqErrorCommand::getRequiredParameters(){
51 string Array[] = {"query","reference"};
52 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
56 m->errorOut(e, "SeqErrorCommand", "getRequiredParameters");
60 //**********************************************************************************************************************
61 vector<string> SeqErrorCommand::getRequiredFiles(){
63 vector<string> myArray;
67 m->errorOut(e, "SeqErrorCommand", "getRequiredFiles");
71 //***************************************************************************************************************
73 SeqErrorCommand::SeqErrorCommand(string option) {
78 //allow user to run help
79 if(option == "help") { help(); abort = true; }
84 //valid paramters for this command
85 string AlignArray[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "ignorechimeras", "outputdir"};
87 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
89 OptionParser parser(option);
90 map<string,string> parameters = parser.getParameters();
92 ValidParameters validParameter;
93 map<string,string>::iterator it;
95 //check to make sure all parameters are valid for command
96 for (it = parameters.begin(); it != parameters.end(); it++) {
97 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
100 //initialize outputTypes
101 vector<string> tempOutNames;
102 outputTypes["error.summary"] = tempOutNames;
103 outputTypes["error.seq"] = tempOutNames;
104 outputTypes["error.quality"] = tempOutNames;
105 outputTypes["error.qual.forward"] = tempOutNames;
106 outputTypes["error.qual.reverse"] = tempOutNames;
107 outputTypes["error.forward"] = tempOutNames;
108 outputTypes["error.reverse"] = tempOutNames;
109 outputTypes["error.count"] = tempOutNames;
110 outputTypes["error.matrix"] = tempOutNames;
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("query");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["query"] = inputDir + it->second; }
126 it = parameters.find("reference");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["reference"] = inputDir + it->second; }
134 it = parameters.find("name");
135 //user has given a names file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["name"] = inputDir + it->second; }
142 it = parameters.find("qfile");
143 //user has given a quality score file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["qfile"] = inputDir + it->second; }
150 it = parameters.find("report");
151 //user has given a alignment report file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["report"] = inputDir + it->second; }
159 //check for required parameters
160 queryFileName = validParameter.validFile(parameters, "query", true);
161 if (queryFileName == "not found") { m->mothurOut("query is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
162 else if (queryFileName == "not open") { abort = true; }
164 referenceFileName = validParameter.validFile(parameters, "reference", true);
165 if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
166 else if (referenceFileName == "not open") { abort = true; }
169 //check for optional parameters
170 namesFileName = validParameter.validFile(parameters, "name", true);
171 if(namesFileName == "not found"){ namesFileName = ""; }
173 qualFileName = validParameter.validFile(parameters, "qfile", true);
174 if(qualFileName == "not found"){ qualFileName = ""; }
176 reportFileName = validParameter.validFile(parameters, "report", true);
177 if(reportFileName == "not found"){ reportFileName = ""; }
179 if((reportFileName != "" && qualFileName == "") || (reportFileName == "" && qualFileName != "")){
180 m->mothurOut("if you use either a qual file or a report file, you have to have both.");
181 m->mothurOutEndLine();
185 outputDir = validParameter.validFile(parameters, "outputdir", false);
186 if (outputDir == "not found"){
188 outputDir += m->hasPath(queryFileName); //if user entered a file with a path then preserve it
191 //check for optional parameter and set defaults
192 // ...at some point should added some additional type checking...
193 temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "1.00"; }
194 convert(temp, threshold);
196 temp = validParameter.validFile(parameters, "ignorechimeras", false); if (temp == "not found") { temp = "1"; }
197 convert(temp, ignoreChimeras);
199 substitutionMatrix.resize(6);
200 for(int i=0;i<6;i++){ substitutionMatrix[i].assign(6,0); }
203 catch(exception& e) {
204 m->errorOut(e, "SeqErrorCommand", "SeqErrorCommand");
209 //**********************************************************************************************************************
211 void SeqErrorCommand::help(){
213 m->mothurOut("The seq.error command reads a query alignment file and a reference alignment file and creates .....\n");
214 m->mothurOut("Example seq.error(...).\n");
215 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
216 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/seq.error .\n\n");
218 catch(exception& e) {
219 m->errorOut(e, "SeqErrorCommand", "help");
224 //***************************************************************************************************************
226 SeqErrorCommand::~SeqErrorCommand(){
230 //***************************************************************************************************************
232 int SeqErrorCommand::execute(){
234 if (abort == true) { return 0; }
236 string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
237 m->openOutputFile(errorSummaryFileName, errorSummaryFile);
238 outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
241 string errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq";
242 m->openOutputFile(errorSeqFileName, errorSeqFile);
243 outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName);
245 getReferences(); //read in reference sequences - make sure there's no ambiguous bases
247 map<string, int> weights;
248 if(namesFileName != ""){ weights = getWeights(); }
251 m->openInputFile(queryFileName, queryFile);
257 QualityScores quality;
258 vector<vector<int> > qualForwardMap;
259 vector<vector<int> > qualReverseMap;
261 if(qualFileName != "" && reportFileName != ""){
262 m->openInputFile(qualFileName, qualFile);
263 report = ReportFile(reportFile, reportFileName);
265 qualForwardMap.resize(1000);
266 qualReverseMap.resize(1000);
267 for(int i=0;i<1000;i++){
268 qualForwardMap[i].assign(100,0);
269 qualReverseMap[i].assign(100,0);
274 int totalMatches = 0;
276 vector<int> misMatchCounts(11, 0);
280 map<string, int>::iterator it;
281 map<char, vector<int> > qScoreErrorMap;
282 qScoreErrorMap['m'].assign(41, 0);
283 qScoreErrorMap['s'].assign(41, 0);
284 qScoreErrorMap['i'].assign(41, 0);
285 qScoreErrorMap['a'].assign(41, 0);
287 map<char, vector<int> > errorForward;
288 errorForward['m'].assign(1000,0);
289 errorForward['s'].assign(1000,0);
290 errorForward['i'].assign(1000,0);
291 errorForward['d'].assign(1000,0);
292 errorForward['a'].assign(1000,0);
294 map<char, vector<int> > errorReverse;
295 errorReverse['m'].assign(1000,0);
296 errorReverse['s'].assign(1000,0);
297 errorReverse['i'].assign(1000,0);
298 errorReverse['d'].assign(1000,0);
299 errorReverse['a'].assign(1000,0);
302 string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
303 RefChimeraTest chimeraTest(referenceSeqs, errorChimeraFileName);
304 outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
311 if (m->control_pressed) { errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
313 Sequence query(queryFile);
315 int numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned());
316 int closestRefIndex = chimeraTest.getClosestRefIndex();
318 if(numParentSeqs > 1 && ignoreChimeras == 1) { ignoreSeq = 1; }
319 else { ignoreSeq = 0; }
322 Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
324 if(namesFileName != ""){
325 it = weights.find(query.getName());
326 minCompare.weight = it->second;
328 else { minCompare.weight = 1; }
330 printErrorData(minCompare, numParentSeqs);
333 for(int i=0;i<minCompare.total;i++){
334 char letter = minCompare.sequence[i];
335 errorForward[letter][i] += minCompare.weight;
336 errorReverse[letter][minCompare.total-i-1] += minCompare.weight;
340 if(qualFileName != "" && reportFileName != ""){
341 report = ReportFile(reportFile);
343 int origLength = report.getQueryLength();
344 int startBase = report.getQueryStart();
345 int endBase = report.getQueryEnd();
347 quality = QualityScores(qualFile, origLength);
350 quality.updateQScoreErrorMap(qScoreErrorMap, minCompare.sequence, startBase, endBase, minCompare.weight);
351 quality.updateForwardMap(qualForwardMap, startBase, endBase, minCompare.weight);
352 quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
356 if(minCompare.errorRate < threshold && !ignoreSeq){
357 totalBases += (minCompare.total * minCompare.weight);
358 totalMatches += minCompare.matches * minCompare.weight;
359 if(minCompare.mismatches > maxMismatch){
360 maxMismatch = minCompare.mismatches;
361 misMatchCounts.resize(maxMismatch + 1, 0);
363 misMatchCounts[minCompare.mismatches] += minCompare.weight;
368 if(index % 1000 == 0){ cout << index << endl; }
371 errorSummaryFile.close();
372 errorSeqFile.close();
374 if(qualFileName != "" && reportFileName != ""){
375 printErrorQuality(qScoreErrorMap);
376 printQualityFR(qualForwardMap, qualReverseMap);
379 printErrorFRFile(errorForward, errorReverse);
381 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
383 string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count";
384 ofstream errorCountFile;
385 m->openOutputFile(errorCountFileName, errorCountFile);
386 outputNames.push_back(errorCountFileName); outputTypes["error.count"].push_back(errorCountFileName);
387 m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n\n");
388 m->mothurOut("Errors\tSequences\n");
389 errorCountFile << "Errors\tSequences\n";
390 for(int i=0;i<misMatchCounts.size();i++){
391 m->mothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n');
392 errorCountFile << i << '\t' << misMatchCounts[i] << endl;
394 errorCountFile.close();
396 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
400 m->mothurOutEndLine();
401 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
402 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
403 m->mothurOutEndLine();
407 catch(exception& e) {
408 m->errorOut(e, "SeqErrorCommand", "execute");
413 //***************************************************************************************************************
415 void SeqErrorCommand::getReferences(){
418 ifstream referenceFile;
419 m->openInputFile(referenceFileName, referenceFile);
421 while(referenceFile){
422 Sequence currentSeq(referenceFile);
423 int numAmbigs = currentSeq.getAmbigBases();
426 m->mothurOut("Warning: " + toString(currentSeq.getName()) + " has " + toString(numAmbigs) + " ambiguous bases, these bases will be removed\n");
427 currentSeq.removeAmbigBases();
429 referenceSeqs.push_back(currentSeq);
430 m->gobble(referenceFile);
432 numRefs = referenceSeqs.size();
434 referenceFile.close();
436 catch(exception& e) {
437 m->errorOut(e, "SeqErrorCommand", "getReferences");
442 //***************************************************************************************************************
444 Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){
446 if(query.getAlignLength() != reference.getAlignLength()){
447 m->mothurOut("Warning: " + toString(query.getName()) + " and " + toString(reference.getName()) + " are different lengths\n");
449 int alignLength = query.getAlignLength();
451 string q = query.getAligned();
452 string r = reference.getAligned();
457 for(int i=0;i<alignLength;i++){
458 if(q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){ // no missing data and no double gaps
462 if(r[i] == 'A'){ errors.AA++; errors.matches++; errors.sequence += 'm'; }
463 if(r[i] == 'T'){ errors.AT++; errors.sequence += 's'; }
464 if(r[i] == 'G'){ errors.AG++; errors.sequence += 's'; }
465 if(r[i] == 'C'){ errors.AC++; errors.sequence += 's'; }
466 if(r[i] == '-'){ errors.Ai++; errors.sequence += 'i'; }
468 else if(q[i] == 'T'){
469 if(r[i] == 'A'){ errors.TA++; errors.sequence += 's'; }
470 if(r[i] == 'T'){ errors.TT++; errors.matches++; errors.sequence += 'm'; }
471 if(r[i] == 'G'){ errors.TG++; errors.sequence += 's'; }
472 if(r[i] == 'C'){ errors.TC++; errors.sequence += 's'; }
473 if(r[i] == '-'){ errors.Ti++; errors.sequence += 'i'; }
475 else if(q[i] == 'G'){
476 if(r[i] == 'A'){ errors.GA++; errors.sequence += 's'; }
477 if(r[i] == 'T'){ errors.GT++; errors.sequence += 's'; }
478 if(r[i] == 'G'){ errors.GG++; errors.matches++; errors.sequence += 'm'; }
479 if(r[i] == 'C'){ errors.GC++; errors.sequence += 's'; }
480 if(r[i] == '-'){ errors.Gi++; errors.sequence += 'i'; }
482 else if(q[i] == 'C'){
483 if(r[i] == 'A'){ errors.CA++; errors.sequence += 's'; }
484 if(r[i] == 'T'){ errors.CT++; errors.sequence += 's'; }
485 if(r[i] == 'G'){ errors.CG++; errors.sequence += 's'; }
486 if(r[i] == 'C'){ errors.CC++; errors.matches++; errors.sequence += 'm'; }
487 if(r[i] == '-'){ errors.Ci++; errors.sequence += 'i'; }
489 else if(q[i] == 'N'){
490 if(r[i] == 'A'){ errors.NA++; errors.sequence += 'a'; }
491 if(r[i] == 'T'){ errors.NT++; errors.sequence += 'a'; }
492 if(r[i] == 'G'){ errors.NG++; errors.sequence += 'a'; }
493 if(r[i] == 'C'){ errors.NC++; errors.sequence += 'a'; }
494 if(r[i] == '-'){ errors.Ni++; errors.sequence += 'a'; }
496 else if(q[i] == '-' && r[i] != '-'){
497 if(r[i] == 'A'){ errors.dA++; errors.sequence += 'd'; }
498 if(r[i] == 'T'){ errors.dT++; errors.sequence += 'd'; }
499 if(r[i] == 'G'){ errors.dG++; errors.sequence += 'd'; }
500 if(r[i] == 'C'){ errors.dC++; errors.sequence += 'd'; }
505 else if(q[i] == '.' && r[i] != '.'){ // reference extends beyond query
506 if(started == 1){ break; }
508 else if(q[i] != '.' && r[i] == '.'){ // query extends beyond reference
509 m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ". Ignoring the extra bases in the query\n");
510 if(started == 1){ break; }
512 else if(q[i] == '.' && r[i] == '.'){ // both are missing data
513 if(started == 1){ break; }
517 errors.mismatches = errors.total-errors.matches;
518 errors.errorRate = (double)(errors.total-errors.matches) / (double)errors.total;
519 errors.queryName = query.getName();
520 errors.refName = reference.getName();
524 catch(exception& e) {
525 m->errorOut(e, "SeqErrorCommand", "getErrors");
530 //***************************************************************************************************************
532 map<string, int> SeqErrorCommand::getWeights(){
534 m->openInputFile(namesFileName, nameFile);
537 string redundantSeqs;
538 map<string, int> nameCountMap;
541 nameFile >> seqName >> redundantSeqs;
542 nameCountMap[seqName] = m->getNumNames(redundantSeqs);
549 //***************************************************************************************************************
551 void SeqErrorCommand::printErrorHeader(){
553 errorSummaryFile << "query\treference\tweight\t";
554 errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t";
555 errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\tnumparents\n";
557 errorSummaryFile << setprecision(6);
558 errorSummaryFile.setf(ios::fixed);
560 catch(exception& e) {
561 m->errorOut(e, "SeqErrorCommand", "printErrorHeader");
566 //***************************************************************************************************************
568 void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs){
570 errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
571 errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
572 errorSummaryFile << error.TA << '\t' << error.TT << '\t' << error.TG << '\t' << error.TC << '\t';
573 errorSummaryFile << error.GA << '\t' << error.GT << '\t' << error.GG << '\t' << error.GC << '\t';
574 errorSummaryFile << error.CA << '\t' << error.CT << '\t' << error.CG << '\t' << error.CC << '\t';
575 errorSummaryFile << error.NA << '\t' << error.NT << '\t' << error.NG << '\t' << error.NC << '\t';
576 errorSummaryFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t' ;
577 errorSummaryFile << error.dA << '\t' << error.dT << '\t' << error.dG << '\t' << error.dC << '\t';
579 errorSummaryFile << error.Ai + error.Ti + error.Gi + error.Ci << '\t'; //insertions
580 errorSummaryFile << error.dA + error.dT + error.dG + error.dC << '\t'; //deletions
581 errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t'; //substitutions
582 errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t'; //ambiguities
583 errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << '\t' << numParentSeqs << endl;
585 errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl;
588 int a=0; int t=1; int g=2; int c=3;
590 if(numParentSeqs == 1 || ignoreChimeras == 0){
591 substitutionMatrix[a][a] += error.weight * error.AA;
592 substitutionMatrix[a][t] += error.weight * error.TA;
593 substitutionMatrix[a][g] += error.weight * error.GA;
594 substitutionMatrix[a][c] += error.weight * error.CA;
595 substitutionMatrix[a][gap] += error.weight * error.dA;
596 substitutionMatrix[a][n] += error.weight * error.NA;
598 substitutionMatrix[t][a] += error.weight * error.AT;
599 substitutionMatrix[t][t] += error.weight * error.TT;
600 substitutionMatrix[t][g] += error.weight * error.GT;
601 substitutionMatrix[t][c] += error.weight * error.CT;
602 substitutionMatrix[t][gap] += error.weight * error.dT;
603 substitutionMatrix[t][n] += error.weight * error.NT;
605 substitutionMatrix[g][a] += error.weight * error.AG;
606 substitutionMatrix[g][t] += error.weight * error.TG;
607 substitutionMatrix[g][g] += error.weight * error.GG;
608 substitutionMatrix[g][c] += error.weight * error.CG;
609 substitutionMatrix[g][gap] += error.weight * error.dG;
610 substitutionMatrix[g][n] += error.weight * error.NG;
612 substitutionMatrix[c][a] += error.weight * error.AC;
613 substitutionMatrix[c][t] += error.weight * error.TC;
614 substitutionMatrix[c][g] += error.weight * error.GC;
615 substitutionMatrix[c][c] += error.weight * error.CC;
616 substitutionMatrix[c][gap] += error.weight * error.dC;
617 substitutionMatrix[c][n] += error.weight * error.NC;
619 substitutionMatrix[gap][a] += error.weight * error.Ai;
620 substitutionMatrix[gap][t] += error.weight * error.Ti;
621 substitutionMatrix[gap][g] += error.weight * error.Gi;
622 substitutionMatrix[gap][c] += error.weight * error.Ci;
623 substitutionMatrix[gap][n] += error.weight * error.Ni;
626 catch(exception& e) {
627 m->errorOut(e, "SeqErrorCommand", "printErrorData");
632 //***************************************************************************************************************
634 void SeqErrorCommand::printSubMatrix(){
636 string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix";
637 ofstream subMatrixFile;
638 m->openOutputFile(subMatrixFileName, subMatrixFile);
639 outputNames.push_back(subMatrixFileName); outputTypes["error.matrix"].push_back(subMatrixFileName);
640 vector<string> bases(6);
647 vector<int> refSums(5,1);
649 for(int i=0;i<5;i++){
650 subMatrixFile << "\tr" << bases[i];
652 for(int j=0;j<6;j++){
653 refSums[i] += substitutionMatrix[i][j];
656 subMatrixFile << endl;
658 for(int i=0;i<6;i++){
659 subMatrixFile << 'q' << bases[i];
660 for(int j=0;j<5;j++){
661 subMatrixFile << '\t' << substitutionMatrix[j][i];
663 subMatrixFile << endl;
666 subMatrixFile << "total";
667 for(int i=0;i<5;i++){
668 subMatrixFile << '\t' << refSums[i];
670 subMatrixFile << endl;
671 subMatrixFile.close();
673 catch(exception& e) {
674 m->errorOut(e, "SeqErrorCommand", "printSubMatrix");
678 //***************************************************************************************************************
680 void SeqErrorCommand::printErrorFRFile(map<char, vector<int> > errorForward, map<char, vector<int> > errorReverse){
682 string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward";
683 ofstream errorForwardFile;
684 m->openOutputFile(errorForwardFileName, errorForwardFile);
685 outputNames.push_back(errorForwardFileName); outputTypes["error.forward"].push_back(errorForwardFileName);
687 errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
688 for(int i=0;i<1000;i++){
689 float match = (float)errorForward['m'][i];
690 float subst = (float)errorForward['s'][i];
691 float insert = (float)errorForward['i'][i];
692 float del = (float)errorForward['d'][i];
693 float amb = (float)errorForward['a'][i];
694 float total = match + subst + insert + del + amb;
695 if(total == 0){ break; }
696 errorForwardFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl;
698 errorForwardFile.close();
700 string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse";
701 ofstream errorReverseFile;
702 m->openOutputFile(errorReverseFileName, errorReverseFile);
703 outputNames.push_back(errorReverseFileName); outputTypes["error.reverse"].push_back(errorReverseFileName);
705 errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
706 for(int i=0;i<1000;i++){
707 float match = (float)errorReverse['m'][i];
708 float subst = (float)errorReverse['s'][i];
709 float insert = (float)errorReverse['i'][i];
710 float del = (float)errorReverse['d'][i];
711 float amb = (float)errorReverse['a'][i];
712 float total = match + subst + insert + del + amb;
713 if(total == 0){ break; }
714 errorReverseFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl;
716 errorReverseFile.close();
718 catch(exception& e) {
719 m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
724 //***************************************************************************************************************
726 void SeqErrorCommand::printErrorQuality(map<char, vector<int> > qScoreErrorMap){
729 string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality";
730 ofstream errorQualityFile;
731 m->openOutputFile(errorQualityFileName, errorQualityFile);
732 outputNames.push_back(errorQualityFileName); outputTypes["error.quality"].push_back(errorQualityFileName);
734 errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl;
735 for(int i=0;i<41;i++){
736 errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl;
738 errorQualityFile.close();
740 catch(exception& e) {
741 m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
747 //***************************************************************************************************************
749 void SeqErrorCommand::printQualityFR(vector<vector<int> > qualForwardMap, vector<vector<int> > qualReverseMap){
756 for(int i=0;i<qualForwardMap.size();i++){
757 for(int j=0;j<qualForwardMap[i].size();j++){
758 if(qualForwardMap[i][j] != 0){
759 if(lastRow < i) { lastRow = i+2; }
760 if(lastColumn < j) { lastColumn = j+2; }
765 string qualityForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.forward";
766 ofstream qualityForwardFile;
767 m->openOutputFile(qualityForwardFileName, qualityForwardFile);
768 outputNames.push_back(qualityForwardFileName); outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
770 for(int i=0;i<lastColumn;i++){ qualityForwardFile << '\t' << i; } qualityForwardFile << endl;
772 for(int i=0;i<lastRow;i++){
773 qualityForwardFile << i+1;
774 for(int j=0;j<lastColumn;j++){
775 qualityForwardFile << '\t' << qualForwardMap[i][j];
778 qualityForwardFile << endl;
780 qualityForwardFile.close();
783 string qualityReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.qual.reverse";
784 ofstream qualityReverseFile;
785 m->openOutputFile(qualityReverseFileName, qualityReverseFile);
786 outputNames.push_back(qualityReverseFileName); outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
788 for(int i=0;i<lastColumn;i++){ qualityReverseFile << '\t' << i; } qualityReverseFile << endl;
789 for(int i=0;i<lastRow;i++){
791 qualityReverseFile << i+1;
792 for(int j=0;j<lastColumn;j++){
793 qualityReverseFile << '\t' << qualReverseMap[i][j];
795 qualityReverseFile << endl;
797 qualityReverseFile.close();
799 catch(exception& e) {
800 m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
806 //***************************************************************************************************************