From fa08e82ec2dd48f73d051c210dad54a403308949 Mon Sep 17 00:00:00 2001 From: pschloss Date: Thu, 3 Feb 2011 12:29:09 +0000 Subject: [PATCH] *** empty log message *** --- Mothur.xcodeproj/project.pbxproj | 17 +- corraxescommand.cpp | 59 ++-- distancedb.cpp | 20 +- eachgapdist.h | 2 - getseqscommand.cpp | 2 - seqerrorcommand.cpp | 470 +++++++++++++++++-------------- seqerrorcommand.h | 11 +- 7 files changed, 332 insertions(+), 249 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 2f3cc55..112cf3c 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXBuildFile section */ + 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; }; 8DD76FB00486AB0100D96B5E /* mothur.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* mothur.1 */; }; A70332B712D3A13400761E33 /* makefile in Sources */ = {isa = PBXBuildFile; fileRef = A70332B512D3A13400761E33 /* makefile */; }; A713EBAC12DC7613000092AC /* readphylipvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBAB12DC7613000092AC /* readphylipvector.cpp */; }; @@ -302,6 +303,8 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = refchimeratest.h; sourceTree = ""; }; + 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = refchimeratest.cpp; sourceTree = ""; }; 8DD76FB20486AB0100D96B5E /* Mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Mothur; sourceTree = BUILT_PRODUCTS_DIR; }; A70332B512D3A13400761E33 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = ""; }; A713EBAA12DC7613000092AC /* readphylipvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readphylipvector.h; sourceTree = ""; }; @@ -1011,6 +1014,8 @@ A7E9BA3812D3956100DA6239 /* commands */ = { isa = PBXGroup; children = ( + 7E6BE10812F710D8007ADDBE /* refchimeratest.h */, + 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */, A7E9B6AE12D37EC400DA6239 /* command.hpp */, A7E9B65112D37EC300DA6239 /* aligncommand.cpp */, A7E9B65212D37EC300DA6239 /* aligncommand.h */, @@ -1593,7 +1598,7 @@ attributes = { ORGANIZATIONNAME = "Schloss Lab"; }; - buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */; compatibilityVersion = "Xcode 3.1"; developmentRegion = English; hasScannedForEncodings = 1; @@ -1895,6 +1900,7 @@ A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */, A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */, A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */, + 7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1934,7 +1940,7 @@ ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; DEPLOYMENT_LOCATION = NO; GCC_C_LANGUAGE_STANDARD = gnu99; - GCC_OPTIMIZATION_LEVEL = 0; + GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "MOTHUR_FILES=\"\\\"../release\\\"\"", "VERSION=\"\\\"1.16.0\\\"\"", @@ -1944,6 +1950,7 @@ GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_UNUSED_VARIABLE = YES; INSTALL_PATH = ""; + MACH_O_TYPE = mh_execute; ONLY_ACTIVE_ARCH = YES; OTHER_CPLUSPLUSFLAGS = ( "-DUSE_READLINE", @@ -1965,6 +1972,9 @@ ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; DEPLOYMENT_LOCATION = NO; GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_GENERATE_DEBUGGING_SYMBOLS = NO; + GCC_MODEL_TUNING = ""; + GCC_OPTIMIZATION_LEVEL = 3; GCC_PREPROCESSOR_DEFINITIONS = ( "MOTHUR_FILES=\"\\\"../release\\\"\"", "VERSION=\"\\\"1.15.0\\\"\"", @@ -1979,6 +1989,7 @@ GCC_WARN_UNUSED_VALUE = YES; GCC_WARN_UNUSED_VARIABLE = YES; INSTALL_PATH = ""; + MACH_O_TYPE = mh_execute; OTHER_CPLUSPLUSFLAGS = ( "-DUSE_READLINE", "-DBIT_VERSION", @@ -2005,7 +2016,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */ = { + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB928A08733DD80010E9CD /* Debug */, diff --git a/corraxescommand.cpp b/corraxescommand.cpp index 8ebd22c..db2e989 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -283,11 +283,11 @@ int CorrAxesCommand::execute(){ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); //output headings - if (metadatafile == "") { out << "OTU\t"; } - else { out << "Feature\t"; } + if (metadatafile == "") { out << "OTU"; } + else { out << "Feature"; } - for (int i = 0; i < numaxes; i++) { out << "axis" << (i+1) << '\t'; } - out << endl; + for (int i = 0; i < numaxes; i++) { out << '\t' << "axis" << (i+1); } + out << "\tlength" << endl; if (method == "pearson") { calcPearson(axes, out); } else if (method == "spearman") { calcSpearman(axes, out); } @@ -329,9 +329,9 @@ int CorrAxesCommand::calcPearson(map >& axes, ofstream& ou //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << i+1 << '\t'; } - else { out << metadataLabels[i] << '\t'; } - + if (metadatafile == "") { out << i+1; } + else { out << metadataLabels[i]; } + //find the averages this otu - Y float sumOtu = 0.0; for (int j = 0; j < lookupFloat.size(); j++) { @@ -339,6 +339,8 @@ int CorrAxesCommand::calcPearson(map >& axes, ofstream& ou } float Ybar = sumOtu / (float) lookupFloat.size(); + vector rValues(averageAxes.size()); + //find r value for each axis for (int k = 0; k < averageAxes.size(); k++) { @@ -358,11 +360,15 @@ int CorrAxesCommand::calcPearson(map >& axes, ofstream& ou double denom = (sqrt(denomTerm1) * sqrt(denomTerm2)); r = numerator / denom; - - out << r << '\t'; + rValues[k] = r; + out << '\t' << r; } - out << endl; + double sum = 0; + for(int k=0;k >& axes, ofstream& o //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << i+1 << '\t'; } - else { out << metadataLabels[i] << '\t'; } + if (metadatafile == "") { out << i+1; } + else { out << metadataLabels[i]; } //find the ranks of this otu - Y vector otuScores; @@ -458,6 +464,7 @@ int CorrAxesCommand::calcSpearman(map >& axes, ofstream& o } } + vector pValues(numaxes); //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { @@ -473,11 +480,16 @@ int CorrAxesCommand::calcSpearman(map >& axes, ofstream& o int n = lookupFloat.size(); double p = 1.0 - ((6 * di) / (float) (n * ((n*n) - 1))); - out << p << '\t'; + out << '\t' << p; + pValues[j] = p; + } - - - out << endl; + + double sum = 0; + for(int k=0;k >& axes, ofstream& ou //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << i+1 << '\t'; } - else { out << metadataLabels[i] << '\t'; } + if (metadatafile == "") { out << i+1; } + else { out << metadataLabels[i]; } //find the ranks of this otu - Y vector otuScores; @@ -569,6 +581,7 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou } } } + vector pValues(numaxes); //calc spearman ranks for each axis for this otu for (int j = 0; j < numaxes; j++) { @@ -597,10 +610,16 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou double p = ( (4 * P) / (float) (n * (n - 1)) ) - 1.0; - out << p << '\t'; + out << '\t' << p; + pValues[j] = p; + } - out << endl; + double sum = 0; + for(int k=0;kerrorOut(e, "DistanceDB", "DistanceDB"); @@ -29,7 +29,11 @@ DistanceDB::DistanceDB() { void DistanceDB::addSequence(Sequence seq) { try { //are the template sequences aligned - if (!isAligned(seq.getAligned())) { templateAligned = false; m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); m->mothurOutEndLine(); } + if (!isAligned(seq.getAligned())) { + templateAligned = false; + m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method."); + m->mothurOutEndLine(); + } if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); } @@ -51,7 +55,11 @@ vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ searchScore = -1.0; - if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); } + if (numWanted > data.size()){ + m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); + m->mothurOutEndLine(); + numWanted = data.size(); + } if (sequence.length() != templateSeqsLength) { templateSameLength = false; } @@ -93,13 +101,13 @@ vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ smallDist = dist; } } - searchScore = smallDist; topMatches.push_back(bestIndex); } }else{ - m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); m->mothurOutEndLine(); + m->mothurOut("cannot find closest matches using distance method for " + query->getName() + " without aligned template sequences of the same length."); + m->mothurOutEndLine(); exit(1); } diff --git a/eachgapdist.h b/eachgapdist.h index 9034dbe..d66b40c 100644 --- a/eachgapdist.h +++ b/eachgapdist.h @@ -49,8 +49,6 @@ public: if(length == 0) { dist = 1.0000; } else { dist = ((double)diff / (double)length); } - - } }; diff --git a/getseqscommand.cpp b/getseqscommand.cpp index a191515..9f2b506 100644 --- a/getseqscommand.cpp +++ b/getseqscommand.cpp @@ -188,8 +188,6 @@ GetSeqsCommand::GetSeqsCommand(string option) { if (accnosfile == "not open") { abort = true; } else if (accnosfile == "not found") { accnosfile = ""; m->mothurOut("You must provide an accnos file."); m->mothurOutEndLine(); abort = true; } - accnosfile2 = validParameter.validFile(parameters, "accnos2", true); - if (accnosfile2 == "not open") { abort = true; } if (accnosfile2 == "not found") { accnosfile2 = ""; } fastafile = validParameter.validFile(parameters, "fasta", true); diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp index fe6d4dd..d2c0fcb 100644 --- a/seqerrorcommand.cpp +++ b/seqerrorcommand.cpp @@ -10,6 +10,7 @@ #include "seqerrorcommand.h" #include "reportfile.h" #include "qualityscores.h" +#include "refchimeratest.h" //********************************************************************************************************************** vector SeqErrorCommand::getValidParameters(){ @@ -81,7 +82,7 @@ SeqErrorCommand::SeqErrorCommand(string option) { string temp; //valid paramters for this command - string AlignArray[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "outputdir"}; + string AlignArray[] = {"query", "reference", "name", "qfile", "report", "threshold", "inputdir", "ignorechimeras", "outputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); @@ -192,6 +193,9 @@ SeqErrorCommand::SeqErrorCommand(string option) { temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "1.00"; } convert(temp, threshold); + temp = validParameter.validFile(parameters, "ignorechimeras", false); if (temp == "not found") { temp = "1"; } + convert(temp, ignoreChimeras); + substitutionMatrix.resize(6); for(int i=0;i<6;i++){ substitutionMatrix[i].assign(6,0); } } @@ -228,16 +232,15 @@ SeqErrorCommand::~SeqErrorCommand(){ int SeqErrorCommand::execute(){ try{ if (abort == true) { return 0; } - - errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary"; + + string errorSummaryFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.summary"; m->openOutputFile(errorSummaryFileName, errorSummaryFile); outputNames.push_back(errorSummaryFileName); outputTypes["error.summary"].push_back(errorSummaryFileName); printErrorHeader(); - errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq"; + string errorSeqFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq"; m->openOutputFile(errorSeqFileName, errorSeqFile); outputNames.push_back(errorSeqFileName); outputTypes["error.seq"].push_back(errorSeqFileName); - printErrorHeader(); getReferences(); //read in reference sequences - make sure there's no ambiguous bases @@ -281,8 +284,6 @@ int SeqErrorCommand::execute(){ qScoreErrorMap['i'].assign(41, 0); qScoreErrorMap['a'].assign(41, 0); - - map > errorForward; errorForward['m'].assign(1000,0); errorForward['s'].assign(1000,0); @@ -295,39 +296,47 @@ int SeqErrorCommand::execute(){ errorReverse['s'].assign(1000,0); errorReverse['i'].assign(1000,0); errorReverse['d'].assign(1000,0); - errorReverse['a'].assign(1000,0); + errorReverse['a'].assign(1000,0); + + string errorChimeraFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.chimera"; + RefChimeraTest chimeraTest(referenceSeqs, errorChimeraFileName); + outputNames.push_back(errorChimeraFileName); outputTypes["error.chimera"].push_back(errorChimeraFileName); + int index = 0; + bool ignoreSeq = 0; while(queryFile){ if (m->control_pressed) { errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - - Compare minCompare; + Sequence query(queryFile); - - for(int i=0;i 1 && ignoreChimeras == 1) { ignoreSeq = 1; } + else { ignoreSeq = 0; } + + Compare minCompare = getErrors(query, referenceSeqs[closestRefIndex]); + if(namesFileName != ""){ it = weights.find(query.getName()); minCompare.weight = it->second; } else { minCompare.weight = 1; } - printErrorData(minCompare); + printErrorData(minCompare, numParentSeqs); - for(int i=0;i maxMismatch){ @@ -352,116 +364,21 @@ int SeqErrorCommand::execute(){ numSeqs++; } - + index++; + if(index % 1000 == 0){ cout << index << endl; } } queryFile.close(); - - if(qualFileName != "" && reportFileName != ""){ - string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality"; - ofstream errorQualityFile; - m->openOutputFile(errorQualityFileName, errorQualityFile); - outputNames.push_back(errorQualityFileName); outputTypes["error.quality"].push_back(errorQualityFileName); - - errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl; - for(int i=0;i<41;i++){ - errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl; - } - errorQualityFile.close(); - - - - int lastRow = 0; - int lastColumn = 0; - - for(int i=0;iopenOutputFile(qualityForwardFileName, qualityForwardFile); - outputNames.push_back(errorQualityFileName); outputTypes["error.qual.forward"].push_back(qualityForwardFileName); - - for(int i=0;icontrol_pressed) { qualityForwardFile.close(); errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - - qualityForwardFile << i+1; - for(int j=0;jopenOutputFile(qualityReverseFileName, qualityReverseFile); - outputNames.push_back(errorQualityFileName); outputTypes["error.qual.reverse"].push_back(qualityReverseFileName); + errorSummaryFile.close(); + errorSeqFile.close(); - for(int i=0;icontrol_pressed) { qualityReverseFile.close(); errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - - qualityReverseFile << i+1; - for(int j=0;jopenOutputFile(errorForwardFileName, errorForwardFile); - outputNames.push_back(errorForwardFileName); outputTypes["error.forward"].push_back(errorForwardFileName); - - errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; - for(int i=0;i<1000;i++){ - float match = (float)errorForward['m'][i]; - float subst = (float)errorForward['s'][i]; - float insert = (float)errorForward['i'][i]; - float del = (float)errorForward['d'][i]; - float amb = (float)errorForward['a'][i]; - float total = match + subst + insert + del + amb; - if(total == 0){ break; } - errorForwardFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl; + if(qualFileName != "" && reportFileName != ""){ + printErrorQuality(qScoreErrorMap); + printQualityFR(qualForwardMap, qualReverseMap); } - errorForwardFile.close(); - - if (m->control_pressed) { errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - - string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse"; - ofstream errorReverseFile; - m->openOutputFile(errorReverseFileName, errorReverseFile); - outputNames.push_back(errorReverseFileName); outputTypes["error.reverse"].push_back(errorReverseFileName); - errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; - for(int i=0;i<1000;i++){ - float match = (float)errorReverse['m'][i]; - float subst = (float)errorReverse['s'][i]; - float insert = (float)errorReverse['i'][i]; - float del = (float)errorReverse['d'][i]; - float amb = (float)errorReverse['a'][i]; - float total = match + subst + insert + del + amb; - if(total == 0){ break; } - errorReverseFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl; - } - errorReverseFile.close(); + printErrorFRFile(errorForward, errorReverse); - if (m->control_pressed) { errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } string errorCountFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.count"; ofstream errorCountFile; @@ -476,50 +393,10 @@ int SeqErrorCommand::execute(){ } errorCountFile.close(); - if (m->control_pressed) { errorSummaryFile.close(); errorSeqFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix"; - ofstream subMatrixFile; - m->openOutputFile(subMatrixFileName, subMatrixFile); - outputNames.push_back(subMatrixFileName); outputTypes["error.matrix"].push_back(subMatrixFileName); - vector bases(6); - bases[0] = "A"; - bases[1] = "T"; - bases[2] = "G"; - bases[3] = "C"; - bases[4] = "Gap"; - bases[5] = "N"; - vector refSums(5,1); - - for(int i=0;i<5;i++){ - subMatrixFile << "\tr" << bases[i]; - - for(int j=0;j<6;j++){ - refSums[i] += substitutionMatrix[i][j]; - } - - } - subMatrixFile << endl; - - for(int i=0;i<6;i++){ - subMatrixFile << 'q' << bases[i]; - for(int j=0;j<5;j++){ - subMatrixFile << '\t' << substitutionMatrix[j][i]; - } - subMatrixFile << endl; - } - subMatrixFile << "total"; - for(int i=0;i<5;i++){ - subMatrixFile << '\t' << refSums[i]; - } - subMatrixFile << endl; - subMatrixFile.close(); - - errorSummaryFile.close(); - errorSeqFile.close(); - - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } - + printSubMatrix(); + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -675,7 +552,7 @@ void SeqErrorCommand::printErrorHeader(){ try { errorSummaryFile << "query\treference\tweight\t"; errorSummaryFile << "AA\tAT\tAG\tAC\tTA\tTT\tTG\tTC\tGA\tGT\tGG\tGC\tCA\tCT\tCG\tCC\tNA\tNT\tNG\tNC\tAi\tTi\tGi\tCi\tNi\tdA\tdT\tdG\tdC\t"; - errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\n"; + errorSummaryFile << "insertions\tdeletions\tsubstitutions\tambig\tmatches\tmismatches\ttotal\terror\tnumparents\n"; errorSummaryFile << setprecision(6); errorSummaryFile.setf(ios::fixed); @@ -688,7 +565,7 @@ void SeqErrorCommand::printErrorHeader(){ //*************************************************************************************************************** -void SeqErrorCommand::printErrorData(Compare error){ +void SeqErrorCommand::printErrorData(Compare error, int numParentSeqs){ try { errorSummaryFile << error.queryName << '\t' << error.refName << '\t' << error.weight << '\t'; errorSummaryFile << error.AA << '\t' << error.AT << '\t' << error.AG << '\t' << error.AC << '\t'; @@ -703,48 +580,48 @@ void SeqErrorCommand::printErrorData(Compare error){ errorSummaryFile << error.dA + error.dT + error.dG + error.dC << '\t'; //deletions errorSummaryFile << error.mismatches - (error.Ai + error.Ti + error.Gi + error.Ci) - (error.dA + error.dT + error.dG + error.dC) - (error.NA + error.NT + error.NG + error.NC + error.Ni) << '\t'; //substitutions errorSummaryFile << error.NA + error.NT + error.NG + error.NC + error.Ni << '\t'; //ambiguities - errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << endl; + errorSummaryFile << error.matches << '\t' << error.mismatches << '\t' << error.total << '\t' << error.errorRate << '\t' << numParentSeqs << endl; errorSeqFile << '>' << error.queryName << "\tref:" << error.refName << '\n' << error.sequence << endl; int a=0; int t=1; int g=2; int c=3; int gap=4; int n=5; - - substitutionMatrix[a][a] += error.weight * error.AA; - substitutionMatrix[a][t] += error.weight * error.TA; - substitutionMatrix[a][g] += error.weight * error.GA; - substitutionMatrix[a][c] += error.weight * error.CA; - substitutionMatrix[a][gap] += error.weight * error.dA; - substitutionMatrix[a][n] += error.weight * error.NA; - - substitutionMatrix[t][a] += error.weight * error.AT; - substitutionMatrix[t][t] += error.weight * error.TT; - substitutionMatrix[t][g] += error.weight * error.GT; - substitutionMatrix[t][c] += error.weight * error.CT; - substitutionMatrix[t][gap] += error.weight * error.dT; - substitutionMatrix[t][n] += error.weight * error.NT; - - substitutionMatrix[g][a] += error.weight * error.AG; - substitutionMatrix[g][t] += error.weight * error.TG; - substitutionMatrix[g][g] += error.weight * error.GG; - substitutionMatrix[g][c] += error.weight * error.CG; - substitutionMatrix[g][gap] += error.weight * error.dG; - substitutionMatrix[g][n] += error.weight * error.NG; - - substitutionMatrix[c][a] += error.weight * error.AC; - substitutionMatrix[c][t] += error.weight * error.TC; - substitutionMatrix[c][g] += error.weight * error.GC; - substitutionMatrix[c][c] += error.weight * error.CC; - substitutionMatrix[c][gap] += error.weight * error.dC; - substitutionMatrix[c][n] += error.weight * error.NC; - - substitutionMatrix[gap][a] += error.weight * error.Ai; - substitutionMatrix[gap][t] += error.weight * error.Ti; - substitutionMatrix[gap][g] += error.weight * error.Gi; - substitutionMatrix[gap][c] += error.weight * error.Ci; - substitutionMatrix[gap][n] += error.weight * error.Ni; - + if(numParentSeqs == 1 || ignoreChimeras == 0){ + substitutionMatrix[a][a] += error.weight * error.AA; + substitutionMatrix[a][t] += error.weight * error.TA; + substitutionMatrix[a][g] += error.weight * error.GA; + substitutionMatrix[a][c] += error.weight * error.CA; + substitutionMatrix[a][gap] += error.weight * error.dA; + substitutionMatrix[a][n] += error.weight * error.NA; + + substitutionMatrix[t][a] += error.weight * error.AT; + substitutionMatrix[t][t] += error.weight * error.TT; + substitutionMatrix[t][g] += error.weight * error.GT; + substitutionMatrix[t][c] += error.weight * error.CT; + substitutionMatrix[t][gap] += error.weight * error.dT; + substitutionMatrix[t][n] += error.weight * error.NT; + + substitutionMatrix[g][a] += error.weight * error.AG; + substitutionMatrix[g][t] += error.weight * error.TG; + substitutionMatrix[g][g] += error.weight * error.GG; + substitutionMatrix[g][c] += error.weight * error.CG; + substitutionMatrix[g][gap] += error.weight * error.dG; + substitutionMatrix[g][n] += error.weight * error.NG; + + substitutionMatrix[c][a] += error.weight * error.AC; + substitutionMatrix[c][t] += error.weight * error.TC; + substitutionMatrix[c][g] += error.weight * error.GC; + substitutionMatrix[c][c] += error.weight * error.CC; + substitutionMatrix[c][gap] += error.weight * error.dC; + substitutionMatrix[c][n] += error.weight * error.NC; + + substitutionMatrix[gap][a] += error.weight * error.Ai; + substitutionMatrix[gap][t] += error.weight * error.Ti; + substitutionMatrix[gap][g] += error.weight * error.Gi; + substitutionMatrix[gap][c] += error.weight * error.Ci; + substitutionMatrix[gap][n] += error.weight * error.Ni; + } } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "printErrorData"); @@ -754,9 +631,176 @@ void SeqErrorCommand::printErrorData(Compare error){ //*************************************************************************************************************** +void SeqErrorCommand::printSubMatrix(){ + try { + string subMatrixFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.matrix"; + ofstream subMatrixFile; + m->openOutputFile(subMatrixFileName, subMatrixFile); + outputNames.push_back(subMatrixFileName); outputTypes["error.matrix"].push_back(subMatrixFileName); + vector bases(6); + bases[0] = "A"; + bases[1] = "T"; + bases[2] = "G"; + bases[3] = "C"; + bases[4] = "Gap"; + bases[5] = "N"; + vector refSums(5,1); + + for(int i=0;i<5;i++){ + subMatrixFile << "\tr" << bases[i]; + + for(int j=0;j<6;j++){ + refSums[i] += substitutionMatrix[i][j]; + } + } + subMatrixFile << endl; + + for(int i=0;i<6;i++){ + subMatrixFile << 'q' << bases[i]; + for(int j=0;j<5;j++){ + subMatrixFile << '\t' << substitutionMatrix[j][i]; + } + subMatrixFile << endl; + } + + subMatrixFile << "total"; + for(int i=0;i<5;i++){ + subMatrixFile << '\t' << refSums[i]; + } + subMatrixFile << endl; + subMatrixFile.close(); + } + catch(exception& e) { + m->errorOut(e, "SeqErrorCommand", "printSubMatrix"); + exit(1); + } +} +//*************************************************************************************************************** + +void SeqErrorCommand::printErrorFRFile(map > errorForward, map > errorReverse){ + try{ + string errorForwardFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.forward"; + ofstream errorForwardFile; + m->openOutputFile(errorForwardFileName, errorForwardFile); + outputNames.push_back(errorForwardFileName); outputTypes["error.forward"].push_back(errorForwardFileName); + + errorForwardFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; + for(int i=0;i<1000;i++){ + float match = (float)errorForward['m'][i]; + float subst = (float)errorForward['s'][i]; + float insert = (float)errorForward['i'][i]; + float del = (float)errorForward['d'][i]; + float amb = (float)errorForward['a'][i]; + float total = match + subst + insert + del + amb; + if(total == 0){ break; } + errorForwardFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl; + } + errorForwardFile.close(); + + string errorReverseFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.seq.reverse"; + ofstream errorReverseFile; + m->openOutputFile(errorReverseFileName, errorReverseFile); + outputNames.push_back(errorReverseFileName); outputTypes["error.reverse"].push_back(errorReverseFileName); + + errorReverseFile << "position\ttotalseqs\tmatch\tsubstitution\tinsertion\tdeletion\tambiguous" << endl; + for(int i=0;i<1000;i++){ + float match = (float)errorReverse['m'][i]; + float subst = (float)errorReverse['s'][i]; + float insert = (float)errorReverse['i'][i]; + float del = (float)errorReverse['d'][i]; + float amb = (float)errorReverse['a'][i]; + float total = match + subst + insert + del + amb; + if(total == 0){ break; } + errorReverseFile << i+1 << '\t' << total << '\t' << match/total << '\t' << subst/total << '\t' << insert/total << '\t' << del/total << '\t' << amb/total << endl; + } + errorReverseFile.close(); + } + catch(exception& e) { + m->errorOut(e, "SeqErrorCommand", "printErrorFRFile"); + exit(1); + } +} + +//*************************************************************************************************************** + +void SeqErrorCommand::printErrorQuality(map > qScoreErrorMap){ + try{ + string errorQualityFileName = queryFileName.substr(0,queryFileName.find_last_of('.')) + ".error.quality"; + ofstream errorQualityFile; + m->openOutputFile(errorQualityFileName, errorQualityFile); + outputNames.push_back(errorQualityFileName); outputTypes["error.quality"].push_back(errorQualityFileName); + errorQualityFile << "qscore\tmatches\tsubstitutions\tinsertions\tambiguous" << endl; + for(int i=0;i<41;i++){ + errorQualityFile << i << '\t' << qScoreErrorMap['m'][i] << '\t' << qScoreErrorMap['s'][i] << '\t' << qScoreErrorMap['i'][i] << '\t'<< qScoreErrorMap['a'][i] << endl; + } + errorQualityFile.close(); + } + catch(exception& e) { + m->errorOut(e, "SeqErrorCommand", "printErrorFRFile"); + exit(1); + } +} + + +//*************************************************************************************************************** + +void SeqErrorCommand::printQualityFR(vector > qualForwardMap, vector > qualReverseMap){ + try{ + + + int lastRow = 0; + int lastColumn = 0; + + for(int i=0;iopenOutputFile(qualityForwardFileName, qualityForwardFile); + outputNames.push_back(qualityForwardFileName); outputTypes["error.qual.forward"].push_back(qualityForwardFileName); + for(int i=0;iopenOutputFile(qualityReverseFileName, qualityReverseFile); + outputNames.push_back(qualityReverseFileName); outputTypes["error.qual.reverse"].push_back(qualityReverseFileName); + + for(int i=0;ierrorOut(e, "SeqErrorCommand", "printErrorFRFile"); + exit(1); + } +} + + +//*************************************************************************************************************** diff --git a/seqerrorcommand.h b/seqerrorcommand.h index e8ca96a..00b5367 100644 --- a/seqerrorcommand.h +++ b/seqerrorcommand.h @@ -58,10 +58,15 @@ private: map getWeights(); Compare getErrors(Sequence, Sequence); void printErrorHeader(); - void printErrorData(Compare); - - string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, errorSummaryFileName, errorSeqFileName, outputDir; + void printErrorData(Compare, int); + void printSubMatrix(); + void printErrorFRFile(map >, map >); + void printErrorQuality(map >); + void printQualityFR(vector >, vector >); + + string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir; double threshold; + bool ignoreChimeras; int numRefs; ofstream errorSummaryFile, errorSeqFile; vector outputNames; -- 2.39.2