X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=decalc.cpp;h=c95191074942f12e60a802fab8b667b7fea57ff9;hb=bbe4d799c02b23891ff4845fe3aad9376f739f86;hp=5aed358b5fe35d5ff20047fa4d857f72f5986ea3;hpb=8c3489da5da7fb7274f34bfa091c54aa496e75bd;p=mothur.git diff --git a/decalc.cpp b/decalc.cpp index 5aed358..c951910 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -154,7 +154,7 @@ vector DeCalculator::calcObserved(Sequence* query, Sequence* subject, vec try { vector temp; - + //int gaps = 0; for (int m = 0; m < window.size(); m++) { string seqFrag = query->getAligned().substr(window[m], size); @@ -164,15 +164,22 @@ vector DeCalculator::calcObserved(Sequence* query, Sequence* subject, vec for (int b = 0; b < seqFrag.length(); b++) { //if at least one is a base and they are not equal if( (isalpha(seqFrag[b]) || isalpha(seqFragsub[b])) && (seqFrag[b] != seqFragsub[b]) ) { diff++; } + + //ignore gaps + //if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; } } //percentage of mismatched bases float dist; //if the whole fragment is 0 distance = 0 + //if ((seqFrag.length()-gaps) == 0) { dist = 0.0; } + + //percentage of mismatched bases + //else { dist = diff / (float) (seqFrag.length()-gaps) * 100; } + + dist = diff / (float) (seqFrag.length()) * 100; - dist = diff / (float) (seqFrag.length()) * 100; - temp.push_back(dist); } @@ -368,7 +375,7 @@ vector< vector > DeCalculator::getQuantiles(vector seqs, //for each sequence for(int i = start; i < end; i++){ - mothurOut("Processing template sequence " + toString(i)); mothurOutEndLine(); + mothurOut("Processing sequence " + toString(i)); mothurOutEndLine(); Sequence* query = new Sequence(seqs[i]->getName(), seqs[i]->getAligned()); //compare to every other sequence in template @@ -409,27 +416,26 @@ vector< vector > DeCalculator::getQuantiles(vector seqs, quan[dist-1].push_back(newScore); //save highestDE - if(de > highestDE[i]) { highestDE[i] = de; } - if(de > highestDE[j]) { highestDE[j] = de; } + if (de > highestDE[i]) { highestDE[i] = de; } + if(de > highestDE[j]) { highestDE[j] = de; } delete subject; - } delete query; } - return quan; } catch(exception& e) { - errorOut(e, "DeCalculator", "findQav"); + errorOut(e, "DeCalculator", "getQuantiles"); exit(1); } } //*************************************************************************************************************** +//this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right. may want to revisit in the future... vector< vector > DeCalculator::removeObviousOutliers(vector< vector >& quantiles, int num) { try { vector< vector > quan; @@ -487,8 +493,9 @@ vector< vector > DeCalculator::removeObviousOutliers(vector< vector marked = returnObviousOutliers(quantiles, num); vector copyMarked = marked; @@ -539,7 +546,7 @@ cout << "high = " << high << endl; quan[i] = temp; } - +*/ return quan; } catch(exception& e) { @@ -565,24 +572,19 @@ vector DeCalculator::returnObviousOutliers(vector< vector > qua sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers); float high = quantiles[i][int(quantiles[i].size() * 0.99)].score; - float low = quantiles[i][int(quantiles[i].size() * 0.01)].score; //look at each value in quantiles to see if it is an outlier for (int j = 0; j < quantiles[i].size(); j++) { - //is this score between 1 and 99% - if ((quantiles[i][j].score > low) && (quantiles[i][j].score < high)) { + //is this score between above 99% + if (quantiles[i][j].score > high) { + //find out how "bad" of an outlier you are - so you can rank the outliers + float dist = quantiles[i][j].score - high; + contributions[&(quantiles[i][j])] = dist; - }else { - float dist; - //add to contributions - if (quantiles[i][j].score < low) { - dist = low - quantiles[i][j].score; - contributions[&(quantiles[i][j])] = dist; - }else { //you are higher than high - dist = quantiles[i][j].score - high; - contributions[&(quantiles[i][j])] = dist; - } + //penalizing sequences for being in multiple outliers + marked[quantiles[i][j].member1]++; + marked[quantiles[i][j].member2]++; } } } @@ -597,9 +599,9 @@ vector DeCalculator::returnObviousOutliers(vector< vector > qua //if member1 has greater score mark him //if member2 has greater score mark her //if they are the same mark both - if (marked[outliers[i].member1] > marked[outliers[i].member2]) { marked[outliers[i].member1]++; } - else if (marked[outliers[i].member2] > marked[outliers[i].member1]) { marked[outliers[i].member2]++; } - else if (marked[outliers[i].member2] == marked[outliers[i].member1]) { marked[outliers[i].member2]++; marked[outliers[i].member1]++; } + if (marked[outliers[i].member1] > marked[outliers[i].member2]) { marked[outliers[i].member1]++; } + else if (marked[outliers[i].member2] > marked[outliers[i].member1]) { marked[outliers[i].member2]++; } + else if (marked[outliers[i].member2] == marked[outliers[i].member1]) { marked[outliers[i].member2]++; marked[outliers[i].member1]++; } } return marked; @@ -610,6 +612,7 @@ vector DeCalculator::returnObviousOutliers(vector< vector > qua } } //*************************************************************************************************************** +//put quanMember in the vector based on how far they are from the 99% or 1%. Biggest offenders in front. vector DeCalculator::sortContrib(map quan) { try{ @@ -624,7 +627,7 @@ vector DeCalculator::sortContrib(map quan) { for (it = quan.begin(); it != quan.end(); it++) { if (it->second > largest->second) { largest = it; } } - +cout << largest->second << '\t' << largest->first->score << '\t' << largest->first->member1 << '\t' << largest->first->member2 << endl; //save it newQuan.push_back(*(largest->first)); @@ -642,7 +645,8 @@ vector DeCalculator::sortContrib(map quan) { } //*************************************************************************************************************** -int DeCalculator::findLargestContrib(vector seen) { +//used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used... +/*int DeCalculator::findLargestContrib(vector seen) { try{ int largest = 0; @@ -684,7 +688,7 @@ void DeCalculator::removeContrib(int bad, vector& quan) { exit(1); } } - +*/ //*************************************************************************************************************** float DeCalculator::findAverage(vector myVector) { try{