1 #ifndef SEQERRORCOMMAND
2 #define SEQERRORCOMMAND
8 * Created by Pat Schloss on 7/15/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
13 #include "command.hpp"
14 #include "sequence.hpp"
15 #include "referencedb.h"
18 class SeqErrorCommand : public Command {
20 SeqErrorCommand(string);
24 vector<string> setParameters();
25 string getCommandName() { return "seq.error"; }
26 string getCommandCategory() { return "Sequence Processing"; }
28 string getHelpString();
29 string getOutputPattern(string);
30 string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nhttp://www.mothur.org/wiki/Seq.error"; }
31 string getDescription() { return "seq.error"; }
35 void help() { m->mothurOut(getHelpString()); }
42 unsigned long long start;
43 unsigned long long end;
44 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
49 int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
50 string refName, queryName, sequence;
52 int weight, matches, mismatches, total;
55 AA=0; AT=0; AG=0; AC=0;
56 TA=0; TT=0; TG=0; TC=0;
57 GA=0; GT=0; GG=0; GC=0;
58 CA=0; CT=0; CG=0; CC=0;
59 NA=0; NT=0; NG=0; NC=0;
60 Ai=0; Ti=0; Gi=0; Ci=0; Ni=0;
61 dA=0; dT=0; dG=0; dC=0;
74 vector<int> processIDS; //processid
75 vector<linePair> lines;
76 vector<linePair> qLines;
77 vector<linePair> rLines;
80 map<string,int> getWeights();
81 int getErrors(Sequence, Sequence, Compare&);
82 void printErrorHeader(ofstream&);
83 void printErrorData(Compare, int, ofstream&, ofstream&);
84 void printSubMatrix();
85 void printErrorFRFile(map<char, vector<int> >, map<char, vector<int> >);
86 void printErrorQuality(map<char, vector<int> >);
87 void printQualityFR(vector<vector<int> >, vector<vector<int> >);
89 int setLines(string, string, string, vector<unsigned long long>&, vector<unsigned long long>&, vector<unsigned long long>&);
90 int driver(string, string, string, string, string, string, linePair, linePair, linePair);
91 int createProcesses(string, string, string, string, string, string);
93 string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir;
95 bool ignoreChimeras, save;
96 int numRefs, processors;
97 int maxLength, totalBases, totalMatches;
98 //ofstream errorSummaryFile, errorSeqFile;
99 vector<string> outputNames;
101 vector<Sequence> referenceSeqs;
102 vector<vector<int> > substitutionMatrix;
103 vector<vector<int> > qualForwardMap;
104 vector<vector<int> > qualReverseMap;
105 vector<int> misMatchCounts;
106 map<char, vector<int> > qScoreErrorMap;
107 map<char, vector<int> > errorForward;
108 map<char, vector<int> > errorReverse;
109 map<string, int> weights;
110 vector<string> megaAlignVector;