//**********************************************************************************************************************
vector<string> SeqErrorCommand::getValidParameters(){
try {
- string Array[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"};
+ string Array[] = {"query", "reference", "name", "qfile", "report", "threshold", "ignorechimeras", "inputdir", "outputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
}
convert(temp, ignoreChimeras);
substitutionMatrix.resize(6);
- for(int i=0;i<6;i++){ substitutionMatrix[i].assign(6,0); }
+ for(int i=0;i<6;i++){ substitutionMatrix[i].resize(6,0); }
}
}
catch(exception& e) {
try{
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+ maxLength = 2000;
+
string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary";
m->openOutputFile(errorSummaryFileName, errorSummaryFile);
outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName);
m->openInputFile(qualFileName, qualFile);
report = ReportFile(reportFile, reportFileName);
- qualForwardMap.resize(1000);
- qualReverseMap.resize(1000);
- for(int i=0;i<1000;i++){
- qualForwardMap[i].assign(100,0);
- qualReverseMap[i].assign(100,0);
+ qualForwardMap.resize(maxLength);
+ qualReverseMap.resize(maxLength);
+ for(int i=0;i<maxLength;i++){
+ qualForwardMap[i].assign(41,0);
+ qualReverseMap[i].assign(41,0);
}
}
qScoreErrorMap['a'].assign(41, 0);
map<char, vector<int> > errorForward;
- errorForward['m'].assign(1000,0);
- errorForward['s'].assign(1000,0);
- errorForward['i'].assign(1000,0);
- errorForward['d'].assign(1000,0);
- errorForward['a'].assign(1000,0);
+ errorForward['m'].assign(maxLength,0);
+ errorForward['s'].assign(maxLength,0);
+ errorForward['i'].assign(maxLength,0);
+ errorForward['d'].assign(maxLength,0);
+ errorForward['a'].assign(maxLength,0);
map<char, vector<int> > errorReverse;
- errorReverse['m'].assign(1000,0);
- errorReverse['s'].assign(1000,0);
- errorReverse['i'].assign(1000,0);
- errorReverse['d'].assign(1000,0);
- errorReverse['a'].assign(1000,0);
+ errorReverse['m'].assign(maxLength,0);
+ errorReverse['s'].assign(maxLength,0);
+ errorReverse['i'].assign(maxLength,0);
+ errorReverse['d'].assign(maxLength,0);
+ errorReverse['a'].assign(maxLength,0);
string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera";
outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName);
vector<string> megaAlignVector(numRefs, "");
-
+
int index = 0;
bool ignoreSeq = 0;
while(queryFile){
-
+
if (m->control_pressed) { errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
Sequence query(queryFile);
-
+
int numParentSeqs = chimeraTest.analyzeQuery(query.getName(), query.getAligned());
int closestRefIndex = chimeraTest.getClosestRefIndex();
if(numParentSeqs > 1 && ignoreChimeras == 1) { ignoreSeq = 1; }
else { ignoreSeq = 0; }
-
Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]);
if(namesFileName != ""){
it = weights.find(query.getName());
minCompare.weight = it->second;
}
- else { minCompare.weight = 1; }
+ else{ minCompare.weight = 1; }
printErrorData(minCompare, numParentSeqs);
-
-
+
if(!ignoreSeq){
+
for(int i=0;i<minCompare.total;i++){
char letter = minCompare.sequence[i];
+
errorForward[letter][i] += minCompare.weight;
errorReverse[letter][minCompare.total-i-1] += minCompare.weight;
}
}
-
+
if(qualFileName != "" && reportFileName != ""){
report = ReportFile(reportFile);
quality.updateReverseMap(qualReverseMap, startBase, endBase, minCompare.weight);
}
}
-
+
if(minCompare.errorRate < threshold && !ignoreSeq){
totalBases += (minCompare.total * minCompare.weight);
totalMatches += minCompare.matches * minCompare.weight;
misMatchCounts[minCompare.mismatches] += minCompare.weight;
numSeqs++;
-
megaAlignVector[closestRefIndex] += query.getInlineSeq() + '\n';
}
-
+
index++;
- if(index % 1000 == 0){ cout << index << endl; }
+
+ if(index % 1000 == 0){ m->mothurOut(toString(index) + '\n'); }
}
queryFile.close();
errorSummaryFile.close();
printSubMatrix();
- string megAlignmentFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".ref-query";
+ string megAlignmentFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.ref-query";
ofstream megAlignmentFile;
m->openOutputFile(megAlignmentFileName, megAlignmentFile);
+ outputNames.push_back(megAlignmentFileName); outputTypes["error.ref-query"].push_back(megAlignmentFileName);
for(int i=0;i<numRefs;i++){
megAlignmentFile << referenceSeqs[i].getInlineSeq() << endl;
referenceFile.close();
numRefs = referenceSeqs.size();
+
for(int i=0;i<numRefs;i++){
referenceSeqs[i].padToPos(maxStartPos);
referenceSeqs[i].padFromPos(minEndPos);
Compare errors;
for(int i=0;i<alignLength;i++){
- if(q[i] != 'N' && q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){ // no missing data and no double gaps
+ if(r[i] != 'N' && q[i] != '.' && r[i] != '.' && (q[i] != '-' || r[i] != '-')){ // no missing data and no double gaps
started = 1;
if(q[i] == 'A'){
void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs){
try {
+
errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t';
errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t';
errorSummaryFile << error.TA << '\t' << error.TT << '\t' << error.TG << '\t' << error.TC << '\t';
errorSummaryFile << error.GA << '\t' << error.GT << '\t' << error.GG << '\t' << error.GC << '\t';
errorSummaryFile << error.CA << '\t' << error.CT << '\t' << error.CG << '\t' << error.CC << '\t';
errorSummaryFile << error.NA << '\t' << error.NT << '\t' << error.NG << '\t' << error.NC << '\t';
- errorSummaryFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t' ;
+ errorSummaryFile << error.Ai << '\t' << error.Ti << '\t' << error.Gi << '\t' << error.Ci << '\t' << error.Ni << '\t';
errorSummaryFile << error.dA << '\t' << error.dT << '\t' << error.dG << '\t' << error.dC << '\t';
errorSummaryFile << error.Ai + error.Ti + error.Gi + error.Ci << '\t'; //insertions
errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t'; //substitutions
errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t'; //ambiguities
errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << '\t' << numParentSeqs << endl;
-
+
errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl;
-
int a=0; int t=1; int g=2; int c=3;
int gap=4; int n=5;
+
if(numParentSeqs == 1 || ignoreChimeras == 0){
substitutionMatrix[a][a] += error.weight * error.AA;
substitutionMatrix[a][t] += error.weight * error.TA;
substitutionMatrix[a][c] += error.weight * error.CA;
substitutionMatrix[a][gap] += error.weight * error.dA;
substitutionMatrix[a][n] += error.weight * error.NA;
-
+
substitutionMatrix[t][a] += error.weight * error.AT;
substitutionMatrix[t][t] += error.weight * error.TT;
substitutionMatrix[t][g] += error.weight * error.GT;
outputNames.push_back(errorForwardFileName); outputTypes["error.forward"].push_back(errorForwardFileName);
errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
- for(int i=0;i<1000;i++){
+ for(int i=0;i<maxLength;i++){
float match = (float)errorForward['m'][i];
float subst = (float)errorForward['s'][i];
float insert = (float)errorForward['i'][i];
outputNames.push_back(errorReverseFileName); outputTypes["error.reverse"].push_back(errorReverseFileName);
errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl;
- for(int i=0;i<1000;i++){
+ for(int i=0;i<maxLength;i++){
float match = (float)errorReverse['m'][i];
float subst = (float)errorReverse['s'][i];
float insert = (float)errorReverse['i'][i];
//***************************************************************************************************************
void SeqErrorCommand::printQualityFR(vector<vector<int> > qualForwardMap, vector<vector<int> > qualReverseMap){
- try{
-
- int lastRow = 0;
- int lastColumn = 0;
+ try{
+ int numRows = 0;
+ int numColumns = qualForwardMap[0].size();
for(int i=0;i<qualForwardMap.size();i++){
- for(int j=0;j<qualForwardMap[i].size();j++){
+ for(int j=0;j<numColumns;j++){
if(qualForwardMap[i][j] != 0){
- if(lastRow < i) { lastRow = i+2; }
- if(lastColumn < j) { lastColumn = j+2; }
+ if(numRows < i) { numRows = i+20; }
}
}
}
m->openOutputFile(qualityForwardFileName, qualityForwardFile);
outputNames.push_back(qualityForwardFileName); outputTypes["error.qual.forward"].push_back(qualityForwardFileName);
- for(int i=0;i<lastColumn;i++){ qualityForwardFile << '\t' << i; } qualityForwardFile << endl;
+ for(int i=0;i<numColumns;i++){ qualityForwardFile << '\t' << i; } qualityForwardFile << endl;
- for(int i=0;i<lastRow;i++){
+ for(int i=0;i<numRows;i++){
qualityForwardFile << i+1;
- for(int j=0;j<lastColumn;j++){
+ for(int j=0;j<numColumns;j++){
qualityForwardFile << '\t' << qualForwardMap[i][j];
}
m->openOutputFile(qualityReverseFileName, qualityReverseFile);
outputNames.push_back(qualityReverseFileName); outputTypes["error.qual.reverse"].push_back(qualityReverseFileName);
- for(int i=0;i<lastColumn;i++){ qualityReverseFile << '\t' << i; } qualityReverseFile << endl;
- for(int i=0;i<lastRow;i++){
+ for(int i=0;i<numColumns;i++){ qualityReverseFile << '\t' << i; } qualityReverseFile << endl;
+ for(int i=0;i<numRows;i++){
qualityReverseFile << i+1;
- for(int j=0;j<lastColumn;j++){
+ for(int j=0;j<numColumns;j++){
qualityReverseFile << '\t' << qualReverseMap[i][j];
}
qualityReverseFile << endl;
m->errorOut(e, "SeqErrorCommand", "printErrorFRFile");
exit(1);
}
+
}
-
//***************************************************************************************************************