X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=decalc.cpp;h=f0b1ecc4fb1d31cbb79837579604cffaf84863e9;hb=d8ed98e71c2da5b39b8a778e2c694a4ddff677eb;hp=857bcf2e09c5e20f1f6a03cc95be674898411e6e;hpb=aa9238c0a9e6e7aa0ed8b8b606b08ad4fd7dcfe3;p=mothur.git diff --git a/decalc.cpp b/decalc.cpp index 857bcf2..f0b1ecc 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -150,7 +150,7 @@ vector DeCalculator::findWindows(Sequence* query, int front, int back, int */ //this follows wigeon, but we may want to consider that it chops off the end values if the sequence cannot be evenly divided into steps - for (int m = front; m < (back - size) ; m+=increment) { win.push_back(m); } + for (int i = front; i < (back - size) ; i+=increment) { win.push_back(i); } @@ -169,10 +169,10 @@ vector DeCalculator::calcObserved(Sequence* query, Sequence* subject, vec vector temp; //int gaps = 0; - for (int m = 0; m < window.size(); m++) { + for (int i = 0; i < window.size(); i++) { - string seqFrag = query->getAligned().substr(window[m], size); - string seqFragsub = subject->getAligned().substr(window[m], size); + string seqFrag = query->getAligned().substr(window[i], size); + string seqFragsub = subject->getAligned().substr(window[i], size); int diff = 0; for (int b = 0; b < seqFrag.length(); b++) { @@ -244,9 +244,9 @@ vector DeCalculator::calcExpected(vector qav, float coef) { //for each window vector queryExpected; - for (int m = 0; m < qav.size(); m++) { + for (int j = 0; j < qav.size(); j++) { - float expected = qav[m] * coef; + float expected = qav[j] * coef; queryExpected.push_back(expected); } @@ -264,12 +264,12 @@ float DeCalculator::calcDE(vector obs, vector exp) { try { //for each window - float sum = 0.0; //sum = sum from 1 to m of (oi-ei)^2 + float sum = 0.0; //sum = sum from 1 to i of (oi-ei)^2 int numZeros = 0; - for (int m = 0; m < obs.size(); m++) { + for (int j = 0; j < obs.size(); j++) { - //if (obs[m] != 0.0) { - sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); + //if (obs[j] != 0.0) { + sum += ((obs[j] - exp[j]) * (obs[j] - exp[j])); //}else { numZeros++; } } @@ -290,10 +290,12 @@ vector DeCalculator::calcFreq(vector seqs, string filename) { try { vector prob; - string freqfile = getRootName(filename) + "freq"; + string freqfile = m->getRootName(filename) + "freq"; ofstream outFreq; - openOutputFile(freqfile, outFreq); + m->openOutputFile(freqfile, outFreq); + + outFreq << "#" << m->getVersion() << endl; string length = toString(seqs.size()); //if there are 5000 seqs in the template then set precision to 3 int precision = length.length() - 1; @@ -321,7 +323,7 @@ vector DeCalculator::calcFreq(vector seqs, string filename) { //find base with highest frequency int highest = 0; - for (int m = 0; m < freq.size(); m++) { if (freq[m] > highest) { highest = freq[m]; } } + for (int j = 0; j < freq.size(); j++) { if (freq[j] > highest) { highest = freq[j]; } } float highFreq = highest / (float) (seqs.size()); @@ -355,13 +357,13 @@ vector DeCalculator::findQav(vector window, int size, vector vector averages; //for each window find average - for (int m = 0; m < window.size(); m++) { + for (int i = 0; i < window.size(); i++) { float average = 0.0; //while you are in the window for this sequence int count = 0; - for (int j = window[m]; j < (window[m]+size); j++) { + for (int j = window[i]; j < (window[i]+size); j++) { average += probabilityProfile[j]; count++; } @@ -381,16 +383,13 @@ vector DeCalculator::findQav(vector window, int size, vector } //*************************************************************************************************************** //seqs have already been masked -vector< vector > DeCalculator::getQuantiles(vector seqs, vector windowSizesTemplate, int window, vector probProfile, int increment, int start, int end) { +vector< vector > DeCalculator::getQuantiles(vector seqs, vector windowSizesTemplate, int window, vector probProfile, int increment, int start, int end) { try { - vector< vector > quan; + vector< vector > quan; //percentage of mismatched pairs 1 to 100 quan.resize(100); -//ofstream o; -//string out = "getQuantiles.out"; -//openOutputFile(out, o); - + //for each sequence for(int i = start; i < end; i++){ @@ -428,18 +427,19 @@ vector< vector > DeCalculator::getQuantiles(vector seqs, float de = calcDE(obsi, exp); float dist = calcDist(query, subject, front, back); - //o << i << '\t' << j << '\t' << dist << '\t' << de << endl; + //cout << i << '\t' << j << '\t' << dist << '\t' << de << endl; dist = ceil(dist); - quanMember newScore(de, i, j); - - quan[dist].push_back(newScore); + //quanMember newScore(de, i, j); + quan[dist].push_back(de); + delete subject; } delete query; } + return quan; @@ -456,27 +456,27 @@ inline bool compareQuanMembers(quanMember left, quanMember right){ } //*************************************************************************************************************** //this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right. may want to revisit in the future... -void DeCalculator::removeObviousOutliers(vector< vector >& quantiles, int num) { +void DeCalculator::removeObviousOutliers(vector< vector >& quantiles, int num) { try { for (int i = 0; i < quantiles.size(); i++) { - - //find mean of this quantile score - sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers); - - float high = quantiles[i][int(quantiles[i].size() * 0.99)].score; - float low = quantiles[i][int(quantiles[i].size() * 0.01)].score; - vector temp; + //find mean of this quantile score + sort(quantiles[i].begin(), quantiles[i].end()); - //look at each value in quantiles to see if it is an outlier - for (int j = 0; j < quantiles[i].size(); j++) { - //is this score between 1 and 99% - if ((quantiles[i][j].score > low) && (quantiles[i][j].score < high)) { - temp.push_back(quantiles[i][j]); + vector temp; + if (quantiles[i].size() != 0) { + float high = quantiles[i][int(quantiles[i].size() * 0.99)]; + float low = quantiles[i][int(quantiles[i].size() * 0.01)]; + + //look at each value in quantiles to see if it is an outlier + for (int j = 0; j < quantiles[i].size(); j++) { + //is this score between 1 and 99% + if ((quantiles[i][j] > low) && (quantiles[i][j] < high)) { + temp.push_back(quantiles[i][j]); + } } } - quantiles[i] = temp; } @@ -688,6 +688,7 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector seqsMatches; + vector distsLeft; vector distsRight; @@ -759,14 +760,14 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetDist(); SeqDist subjectLeft; - subjectLeft.seq = db[j]; + subjectLeft.seq = NULL; subjectLeft.dist = distLeft; subjectLeft.index = j; distsLeft.push_back(subjectLeft); SeqDist subjectRight; - subjectRight.seq = db[j]; + subjectRight.seq = NULL; subjectRight.dist = distRight; subjectRight.index = j; @@ -779,7 +780,7 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector seen; map::iterator it; @@ -790,18 +791,18 @@ vector DeCalculator::findClosest(Sequence* querySeq, vectorgetName()); + it = seen.find(db[distsLeft[i].index]->getName()); if (it == seen.end()) { dists.push_back(distsLeft[i]); - seen[distsLeft[i].seq->getName()] = distsLeft[i].seq->getName(); + seen[db[distsLeft[i].index]->getName()] = db[distsLeft[i].index]->getName(); lastLeft = distsLeft[i].dist; } //add right if you havent already - it = seen.find(distsRight[i].seq->getName()); + it = seen.find(db[distsRight[i].index]->getName()); if (it == seen.end()) { dists.push_back(distsRight[i]); - seen[distsRight[i].seq->getName()] = distsRight[i].seq->getName(); + seen[db[distsRight[i].index]->getName()] = db[distsRight[i].index]->getName(); lastRight = distsRight[i].dist; } @@ -824,10 +825,17 @@ vector DeCalculator::findClosest(Sequence* querySeq, vector dists.size()) { + //m->mothurOut("numwanted is larger than the number of template sequences, adjusting numwanted."); m->mothurOutEndLine(); + numWanted = dists.size(); + } + //cout << numWanted << endl; for (int i = 0; i < numWanted; i++) { //cout << dists[i].seq->getName() << '\t' << dists[i].dist << endl; - Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. + + Sequence* temp = new Sequence(db[dists[i].index]->getName(), db[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother. + seqsMatches.push_back(temp); indexes.push_back(dists[i].index); } @@ -945,28 +953,34 @@ map DeCalculator::trimSeqs(Sequence* query, vector topMatch //save this spot if it is the farthest if (pos < rearPos) { rearPos = pos; } - - //check to make sure that is not whole seq - if ((rearPos - frontPos - 1) <= 0) { m->mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); m->mothurOutEndLine(); exit(1); } -//cout << query->getName() << " front = " << frontPos << " rear = " << rearPos << endl; - //trim query - string newAligned = query->getAligned(); - newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); - query->setAligned(newAligned); - - //trim topMatches - for (int i = 0; i < topMatches.size(); i++) { - newAligned = topMatches[i]->getAligned(); - newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); - topMatches[i]->setAligned(newAligned); - } map trimmedPos; - - for (int i = 0; i < newAligned.length(); i++) { - trimmedPos[i] = i+frontPos; + //check to make sure that is not whole seq + if ((rearPos - frontPos - 1) <= 0) { + query->setAligned(""); + //trim topMatches + for (int i = 0; i < topMatches.size(); i++) { + topMatches[i]->setAligned(""); + } + + }else { + + //trim query + string newAligned = query->getAligned(); + newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); + query->setAligned(newAligned); + + //trim topMatches + for (int i = 0; i < topMatches.size(); i++) { + newAligned = topMatches[i]->getAligned(); + newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1)); + topMatches[i]->setAligned(newAligned); + } + + for (int i = 0; i < newAligned.length(); i++) { + trimmedPos[i] = i+frontPos; + } } - return trimmedPos; } catch(exception& e) {