From: pschloss Date: Tue, 15 Feb 2011 11:17:04 +0000 (+0000) Subject: mods to seq.errror X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=0bcfddf7bc721a334bdae42d86a580019303537d mods to seq.errror --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 5b4e076..b95f673 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1620,7 +1620,7 @@ attributes = { ORGANIZATIONNAME = "Schloss Lab"; }; - buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */; + buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */; compatibilityVersion = "Xcode 3.1"; developmentRegion = English; hasScannedForEncodings = 1; @@ -2042,7 +2042,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "mothur" */ = { + 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB928A08733DD80010E9CD /* Debug */, diff --git a/makefile b/makefile index 6ac7e31..7cc71f0 100644 --- a/makefile +++ b/makefile @@ -9,7 +9,7 @@ # Macros # -USEMPI ?= yes +USEMPI ?= no 64BIT_VERSION ?= yes USEREADLINE ?= yes CYGWIN_BUILD ?= no diff --git a/mothur.h b/mothur.h index 6e5833f..97a1043 100644 --- a/mothur.h +++ b/mothur.h @@ -57,6 +57,7 @@ #include #include #include + #include #include #include @@ -70,7 +71,7 @@ #include //get cwd #include #include - + #include #endif using namespace std; diff --git a/refchimeratest.cpp b/refchimeratest.cpp index e540d8a..4d4a658 100644 --- a/refchimeratest.cpp +++ b/refchimeratest.cpp @@ -30,7 +30,7 @@ RefChimeraTest::RefChimeraTest(vector& refs, string chimeraReportFileN alignLength = referenceSeqs[0].length(); - chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents"; + chimeraReportFile << "queryName\tbestRef\tbestSequenceMismatch\tleftParentChi,rightParentChi\tbreakPointChi\tminMismatchToChimera\tdistToBestMera\tnumParents" << endl; // chimeraReportFile << "leftParentTri,middleParentTri,rightParentTri\tbreakPointTriA,breakPointTriB\tminMismatchToTrimera\tdistToBestMera\tnMera" << endl; } @@ -111,7 +111,8 @@ int RefChimeraTest::getMismatches(string& querySeq, vector >& left, int lDiffs = 0; for(int l=0;l >& left, int rDiffs = 0; int index = 0; for(int l=alignLength-1;l>=0;l--){ - if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){ +// if(querySeq[l] != '.' && querySeq[l] != referenceSeqs[i][l]){ + if(querySeq[l] != '.' && referenceSeqs[i][l] != '.' && querySeq[l] != referenceSeqs[i][l]){ rDiffs++; } right[i][index++] = rDiffs; @@ -254,7 +256,8 @@ double RefChimeraTest::calcDistToChimera(string& querySeq, string& chimeraRefSeq int mismatch = 0; for(int i=0;i megaAlignVector(numRefs, ""); + int index = 0; bool ignoreSeq = 0; @@ -327,7 +327,8 @@ int SeqErrorCommand::execute(){ else { minCompare.weight = 1; } printErrorData(minCompare, numParentSeqs); - + + if(!ignoreSeq){ for(int i=0;iopenOutputFile(megAlignmentFileName, megAlignmentFile); + + for(int i=0;imothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -419,20 +433,34 @@ void SeqErrorCommand::getReferences(){ int numAmbigSeqs = 0; + int maxStartPos = 0; + int minEndPos = 100000; + while(referenceFile){ Sequence currentSeq(referenceFile); int numAmbigs = currentSeq.getAmbigBases(); if(numAmbigs > 0){ numAmbigSeqs++; } + + int startPos = currentSeq.getStartPos(); + if(startPos > maxStartPos) { maxStartPos = startPos; } + + int endPos = currentSeq.getEndPos(); + if(endPos < minEndPos) { minEndPos = endPos; } referenceSeqs.push_back(currentSeq); m->gobble(referenceFile); } referenceFile.close(); + numRefs = referenceSeqs.size(); + + for(int i=0;imothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n"); - } + } - numRefs = referenceSeqs.size(); } catch(exception& e) { m->errorOut(e, "SeqErrorCommand", "getReferences"); @@ -507,7 +535,6 @@ Compare SeqErrorCommand::getErrors(Sequence query, Sequence reference){ if(started == 1){ break; } } else if(q[i] != '.' && r[i] == '.'){ // query extends beyond reference -// m->mothurOut("Warning: " + toString(query.getName()) + " extend beyond " + toString(reference.getName()) + ". Ignoring the extra bases in the query\n"); if(started == 1){ break; } } else if(q[i] == '.' && r[i] == '.'){ // both are missing data diff --git a/sequence.cpp b/sequence.cpp index 872a0f0..6aa0c0f 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -427,6 +427,13 @@ string Sequence::getAligned(){ else { return aligned; } } +//******************************************************************************************************************** + +string Sequence::getInlineSeq(){ + return name + '\t' + aligned; +} + + //******************************************************************************************************************** string Sequence::getPairwise(){ @@ -514,7 +521,7 @@ int Sequence::getLongHomoPolymer(){ //******************************************************************************************************************** int Sequence::getStartPos(){ - if(endPos == -1){ + if(startPos == -1){ for(int j = 0; j < alignmentLength; j++) { if(aligned[j] != '.'){ startPos = j + 1; @@ -529,6 +536,17 @@ int Sequence::getStartPos(){ //******************************************************************************************************************** +void Sequence::padToPos(int start){ + + for(int j = startPos-1; j < start-1; j++) { + aligned[j] = '.'; + } + startPos = start; + +} + +//******************************************************************************************************************** + int Sequence::getEndPos(){ if(endPos == -1){ for(int j=alignmentLength-1;j>=0;j--){ @@ -545,6 +563,17 @@ int Sequence::getEndPos(){ //******************************************************************************************************************** +void Sequence::padFromPos(int end){ + + for(int j = end; j < endPos; j++) { + aligned[j] = '.'; + } + endPos = end; + +} + +//******************************************************************************************************************** + bool Sequence::getIsAligned(){ return isAligned; } diff --git a/sequence.hpp b/sequence.hpp index 48287af..94d7d29 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -44,9 +44,12 @@ public: string getAligned(); string getPairwise(); string getUnaligned(); + string getInlineSeq(); int getNumBases(); int getStartPos(); int getEndPos(); + void padToPos(int); + void padFromPos(int); int getAlignLength(); int getAmbigBases(); void removeAmbigBases();