From: westcott Date: Tue, 3 May 2011 16:29:07 +0000 (+0000) Subject: fixed id match X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=4c16a1dac0538d5ba2ac925674747ab174612ab8 fixed id match --- diff --git a/chimerarealigner.h b/chimerarealigner.h index 7ad7418..cf71158 100644 --- a/chimerarealigner.h +++ b/chimerarealigner.h @@ -25,8 +25,6 @@ class ChimeraReAligner { private: Sequence* querySeq; - - //Sequence* getSequence(string); //find sequence from name MothurOut* m; }; /***********************************************************/ diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index e62c2f0..1f7160a 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -765,10 +765,20 @@ int ChimeraSlayer::getChimeras(Sequence* query) { if (chimeraFlag == "yes") { if (realign) { + vector parents; + for (int i = 0; i < Results.size(); i++) { +cout << Results[i].parent << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd << endl; + Sequence* parent = new Sequence(Results[i].parent, Results[i].parentAligned); + + parents.push_back(parent); + } + ChimeraReAligner realigner; - realigner.reAlign(query, Results); + //realigner.reAlign(query, parents); + + for (int i = 0; i < parents.size(); i++) { delete parents[i]; } } - + //query->printSequence(cout); //get sequence that were given from maligner results vector seqs; map removeDups; @@ -1037,7 +1047,7 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db vector mergedResults; for (int i = 0; i < smaller.size(); i++) { if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; } - + //add left if you havent already it = seen.find(smaller[i]); if (it == seen.end()) { @@ -1065,7 +1075,7 @@ vector ChimeraSlayer::getBlastSeqs(Sequence* q, vector& db } for (int i = 0; i < mergedResults.size(); i++) { - + //cout << mergedResults[i] << '\t' << db[mergedResults[i]]->getName() << endl; if (db[mergedResults[i]]->getName() != q->getName()) { Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned()); refResults.push_back(temp); diff --git a/maligner.cpp b/maligner.cpp index 2c2cb4b..009f1e9 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -98,7 +98,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { int traceStart = trace[0].col; int traceEnd = trace[trace.size()-1].oldCol; string queryInRange = query->getAligned(); - queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1)); + queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart)); if (m->control_pressed) { return chimera; } @@ -118,20 +118,20 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) { temp.regionEnd = regionEnd; string parentInRange = refSeqs[seqIndex]->getAligned(); - parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1)); + parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart)); temp.queryToParent = computePercentID(queryInRange, parentInRange); temp.divR = (percentIdenticalQueryChimera / temp.queryToParent); string queryInRegion = query->getAligned(); - queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1)); + queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart)); string parentInRegion = refSeqs[seqIndex]->getAligned(); - parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1)); + parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart)); temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion); - //cout << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << endl; + cout << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << endl; outputResults.push_back(temp); } @@ -585,19 +585,32 @@ float Maligner::computePercentID(string queryAlign, string chimera) { return -1.0; } - - int numBases = 0; int numIdentical = 0; - + int countA = 0; + int countB = 0; for (int i = 0; i < queryAlign.length(); i++) { - if ((isalpha(queryAlign[i])) || (isalpha(chimera[i]))) { - numBases++; - if (queryAlign[i] == chimera[i]) { - numIdentical++; + if (((queryAlign[i] != 'G') && (queryAlign[i] != 'T') && (queryAlign[i] != 'A') && (queryAlign[i] != 'C')&& (queryAlign[i] != '.') && (queryAlign[i] != '-')) || + ((chimera[i] != 'G') && (chimera[i] != 'T') && (chimera[i] != 'A') && (chimera[i] != 'C')&& (chimera[i] != '.') && (chimera[i] != '-'))) {} + else { + + bool charA = false; bool charB = false; + if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; } + if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; } + + if (charA || charB) { + + if (charA) { countA++; } + if (charB) { countB++; } + + if (queryAlign[i] == chimera[i]) { + numIdentical++; + } } } } - + + float numBases = (countA + countB) /(float) 2; + if (numBases == 0) { return 0; } float percentIdentical = (numIdentical/(float)numBases) * 100; diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index a055e81..73c1e5c 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -1146,7 +1146,7 @@ int SubSampleCommand::processList(ListVector*& list, ofstream& out, set& individual += binnames[j]; } } - if (subset.count(individual) != 0) { newNames += individual; } + if (subset.count(individual) != 0) { newNames += individual + ","; } //if there are names in this bin add to new list