1 #ifndef SEQERRORCOMMAND
2 #define SEQERRORCOMMAND
8 * Created by Pat Schloss on 7/15/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "referencedb.h"
16 #include "counttable.h"
19 class SeqErrorCommand : public Command {
21 SeqErrorCommand(string);
25 vector<string> setParameters();
26 string getCommandName() { return "seq.error"; }
27 string getCommandCategory() { return "Sequence Processing"; }
29 string getHelpString();
30 string getOutputPattern(string);
31 string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nhttp://www.mothur.org/wiki/Seq.error"; }
32 string getDescription() { return "seq.error"; }
36 void help() { m->mothurOut(getHelpString()); }
43 unsigned long long start;
44 unsigned long long end;
45 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
50 int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
51 string refName, queryName, sequence;
53 int weight, matches, mismatches, total;
56 AA=0; AT=0; AG=0; AC=0;
57 TA=0; TT=0; TG=0; TC=0;
58 GA=0; GT=0; GG=0; GC=0;
59 CA=0; CT=0; CG=0; CC=0;
60 NA=0; NT=0; NG=0; NC=0;
61 Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
62 dA=0; dT=0; dG=0; dC=0;
75 vector<int> processIDS; //processid
76 vector<linePair> lines;
77 vector<linePair> qLines;
78 vector<linePair> rLines;
81 map<string,int> getWeights();
82 int getErrors(Sequence, Sequence, Compare&);
83 void printErrorHeader(ofstream&);
84 void printErrorData(Compare, int, ofstream&, ofstream&);
85 void printSubMatrix();
86 void printErrorFRFile(map<char, vector<int> >, map<char, vector<int> >);
87 void printErrorQuality(map<char, vector<int> >);
88 void printQualityFR(vector<vector<int> >, vector<vector<int> >);
90 int setLines(string, string, string, vector<unsigned long long>&, vector<unsigned long long>&, vector<unsigned long long>&);
91 int driver(string, string, string, string, string, string, linePair, linePair, linePair);
92 int createProcesses(string, string, string, string, string, string);
94 string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir, countfile;
96 bool ignoreChimeras, save, aligned;
97 int numRefs, processors;
98 int maxLength, totalBases, totalMatches;
99 //ofstream errorSummaryFile, errorSeqFile;
100 vector<string> outputNames;
102 vector<Sequence> referenceSeqs;
103 vector<vector<int> > substitutionMatrix;
104 vector<vector<int> > qualForwardMap;
105 vector<vector<int> > qualReverseMap;
106 vector<int> misMatchCounts;
107 map<char, vector<int> > qScoreErrorMap;
108 map<char, vector<int> > errorForward;
109 map<char, vector<int> > errorReverse;
110 map<string, int> weights;
111 vector<string> megaAlignVector;