From: pschloss Date: Tue, 21 Dec 2010 20:35:44 +0000 (+0000) Subject: more changes to error.seqs command and quality scores classses X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=bdb5d82e2a73829b4e1fa42656ad9bcb57e3e948 more changes to error.seqs command and quality scores classses --- diff --git a/mothur b/mothur index 1389411..c6482bc 100755 Binary files a/mothur and b/mothur differ diff --git a/qualityscores.cpp b/qualityscores.cpp index 2ee71e0..aafa579 100644 --- a/qualityscores.cpp +++ b/qualityscores.cpp @@ -332,7 +332,7 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){ return success; } catch(exception& e) { - m->errorOut(e, "TrimSeqsCommand", "cullQualAverage"); + m->errorOut(e, "QualityScores", "cullQualAverage"); exit(1); } } @@ -341,19 +341,57 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){ void QualityScores::updateQScoreErrorMap(map >& qualErrorMap, string errorSeq, int start, int stop, int weight){ try { - - for(int i=start-1;ierrorOut(e, "TrimSeqsCommand", "updateQScoreErrorMap"); + m->errorOut(e, "QualityScores", "updateQScoreErrorMap"); + exit(1); + } +} + +/**************************************************************************************************/ + +void QualityScores::updateForwardMap(vector >& forwardMap, int start, int stop, int weight){ + try { + + int index = 0; + for(int i=start-1;ierrorOut(e, "QualityScores", "updateForwardMap"); + exit(1); + } +} + +/**************************************************************************************************/ + +void QualityScores::updateReverseMap(vector >& reverseMap, int start, int stop, int weight){ + try { + + int index = 0; + for(int i=stop-1;i>=start;i--){ + reverseMap[index++][qScores[i]] += weight; + } + + } + catch(exception& e) { + m->errorOut(e, "QualityScores", "updateForwardMap"); exit(1); } } diff --git a/qualityscores.h b/qualityscores.h index dee5917..8f2105a 100644 --- a/qualityscores.h +++ b/qualityscores.h @@ -30,6 +30,9 @@ public: bool stripQualWindowAverage(Sequence&, int, int, double); bool cullQualAverage(Sequence&, double); void updateQScoreErrorMap(map >&, string, int, int, int); + void updateForwardMap(vector >&, int, int, int); + void updateReverseMap(vector >&, int, int, int); + private: double calculateAverage(); diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index 77b12ed..615206b 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -76,8 +76,6 @@ SeqErrorCommand::SeqErrorCommand(string option) { //valid paramters for this command string AlignArray[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"}; -//need to implement name file option - vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); @@ -168,8 +166,6 @@ SeqErrorCommand::SeqErrorCommand(string option) { abort = true; } - - outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; @@ -189,8 +185,9 @@ SeqErrorCommand::SeqErrorCommand(string option) { errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq"; m->openOutputFile(errorSeqFileName, errorSeqFile); outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName); - printErrorHeader(); + substitutionMatrix.resize(6); + for(int i=0;i<6;i++){ substitutionMatrix[i].assign(6,0); } } } catch(exception& e) { @@ -221,7 +218,7 @@ void SeqErrorCommand::help(){ SeqErrorCommand::~SeqErrorCommand(){ errorSummaryFile.close(); - errorSeqFile.close(); + errorSeqFile.close(); } //*************************************************************************************************************** @@ -243,10 +240,19 @@ int SeqErrorCommand::execute(){ ReportFile report; QualityScores quality; - + vector > qualForwardMap; + vector > qualReverseMap; + if(qualFileName != "" && reportFileName != ""){ m->openInputFile(qualFileName, qualFile); report = ReportFile(reportFile, reportFileName); + + qualForwardMap.resize(1000); + qualReverseMap.resize(1000); + for(int i=0;i<1000;i++){ + qualForwardMap[i].assign(100,0); + qualReverseMap[i].assign(100,0); + } } int totalBases = 0; @@ -263,6 +269,24 @@ int SeqErrorCommand::execute(){ qScoreErrorMap['i'].assign(41, 0); qScoreErrorMap['a'].assign(41, 0); + + + map > errorForward; + errorForward['m'].assign(1000,0); + errorForward['s'].assign(1000,0); + errorForward['i'].assign(1000,0); + errorForward['d'].assign(1000,0); + errorForward['a'].assign(1000,0); + + map > errorReverse; + errorReverse['m'].assign(1000,0); + errorReverse['s'].assign(1000,0); + errorReverse['i'].assign(1000,0); + errorReverse['d'].assign(1000,0); + errorReverse['a'].assign(1000,0); + + + while(queryFile){ Compare minCompare; Sequence query(queryFile); @@ -283,6 +307,12 @@ int SeqErrorCommand::execute(){ printErrorData(minCompare); + for(int i=0;iopenOutputFile(qualityForwardFileName, qualityForwardFile); + outputNames.push_back(errorQualityFileName); outputTypes["error.qual.forward"].push_back(qualityForwardFileName); + + for(int i=0;iopenOutputFile(qualityReverseFileName, qualityReverseFile); + outputNames.push_back(errorQualityFileName); outputTypes["error.qual.reverse"].push_back(qualityReverseFileName); + + for(int i=0;iopenOutputFile(errorForwardFileName, errorForwardFile); + outputNames.push_back(errorForwardFileName); outputTypes["error.forward"].push_back(errorForwardFileName); + + errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; + for(int i=0;i<1000;i++){ + float match = (float)errorForward['m'][i]; + float subst = (float)errorForward['s'][i]; + float insert = (float)errorForward['i'][i]; + float del = (float)errorForward['d'][i]; + float amb = (float)errorForward['a'][i]; + float total = match + subst + insert + del + amb; + if(total == 0){ break; } + errorForwardFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl; + } + errorForwardFile.close(); + + + string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse"; + ofstream errorReverseFile; + m->openOutputFile(errorReverseFileName, errorReverseFile); + outputNames.push_back(errorReverseFileName); outputTypes["error.reverse"].push_back(errorReverseFileName); + + errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; + for(int i=0;i<1000;i++){ + float match = (float)errorReverse['m'][i]; + float subst = (float)errorReverse['s'][i]; + float insert = (float)errorReverse['i'][i]; + float del = (float)errorReverse['d'][i]; + float amb = (float)errorReverse['a'][i]; + float total = match + subst + insert + del + amb; + if(total == 0){ break; } + errorReverseFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl; + } + errorReverseFile.close(); + + + string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count"; ofstream errorCountFile; m->openOutputFile(errorCountFileName, errorCountFile); outputNames.push_back(errorCountFileName); outputTypes["error.count"].push_back(errorCountFileName); - m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n\n"); m->mothurOut("Errors\tSequences\n"); - - errorCountFile << "Errors\tSequences\n"; - + errorCountFile << "Errors\tSequences\n"; for(int i=0;imothurOut(toString(i) + '\t' + toString(misMatchCounts[i]) + '\n'); errorCountFile << i << '\t' << misMatchCounts[i] << endl; } + errorCountFile.close(); + + + + + + string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix"; + ofstream subMatrixFile; + m->openOutputFile(subMatrixFileName, subMatrixFile); + outputNames.push_back(subMatrixFileName); outputTypes["error.matrix"].push_back(subMatrixFileName); + vector bases(6); + bases[0] = "A"; + bases[1] = "T"; + bases[2] = "G"; + bases[3] = "C"; + bases[4] = "Gap"; + bases[5] = "N"; + vector refSums(5,1); + + for(int i=0;i<5;i++){ + subMatrixFile << "\tr" << bases[i]; + + for(int j=0;j<6;j++){ + refSums[i] += substitutionMatrix[i][j]; + } + + } + subMatrixFile << endl; + + for(int i=0;i<6;i++){ + subMatrixFile << 'q' << bases[i]; + for(int j=0;j<5;j++){ + subMatrixFile << '\t' << substitutionMatrix[j][i]; + } + subMatrixFile << endl; + } + subMatrixFile << "total"; + for(int i=0;i<5;i++){ + subMatrixFile << '\t' << refSums[i]; + } + subMatrixFile << endl; + subMatrixFile.close(); + + return 0; } @@ -387,7 +548,6 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){ string q = query.getAligned(); string r = reference.getAligned(); - int started = 0; Compare errors; @@ -520,6 +680,45 @@ void SeqErrorCommand::printErrorData(Compare error){ errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << endl; errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl; + + + int a=0; int t=1; int g=2; int c=3; + int gap=4; int n=5; + + substitutionMatrix[a][a] += error.weight * error.AA; + substitutionMatrix[a][t] += error.weight * error.TA; + substitutionMatrix[a][g] += error.weight * error.GA; + substitutionMatrix[a][c] += error.weight * error.CA; + substitutionMatrix[a][gap] += error.weight * error.dA; + substitutionMatrix[a][n] += error.weight * error.NA; + + substitutionMatrix[t][a] += error.weight * error.AT; + substitutionMatrix[t][t] += error.weight * error.TT; + substitutionMatrix[t][g] += error.weight * error.GT; + substitutionMatrix[t][c] += error.weight * error.CT; + substitutionMatrix[t][gap] += error.weight * error.dT; + substitutionMatrix[t][n] += error.weight * error.NT; + + substitutionMatrix[g][a] += error.weight * error.AG; + substitutionMatrix[g][t] += error.weight * error.TG; + substitutionMatrix[g][g] += error.weight * error.GG; + substitutionMatrix[g][c] += error.weight * error.CG; + substitutionMatrix[g][gap] += error.weight * error.dG; + substitutionMatrix[g][n] += error.weight * error.NG; + + substitutionMatrix[c][a] += error.weight * error.AC; + substitutionMatrix[c][t] += error.weight * error.TC; + substitutionMatrix[c][g] += error.weight * error.GC; + substitutionMatrix[c][c] += error.weight * error.CC; + substitutionMatrix[c][gap] += error.weight * error.dC; + substitutionMatrix[c][n] += error.weight * error.NC; + + substitutionMatrix[gap][a] += error.weight * error.Ai; + substitutionMatrix[gap][t] += error.weight * error.Ti; + substitutionMatrix[gap][g] += error.weight * error.Gi; + substitutionMatrix[gap][c] += error.weight * error.Ci; + substitutionMatrix[gap][n] += error.weight * error.Ni; + } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "printErrorData"); diff --git a/seqerrorcommand.h b/seqerrorcommand.h index a012d68..e8ca96a 100644 --- a/seqerrorcommand.h +++ b/seqerrorcommand.h @@ -68,6 +68,8 @@ private: map > outputTypes; vector referenceSeqs; + vector > substitutionMatrix; + int a,t,g,c,gap,n; }; #endif