X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=decalc.h;h=d1daf050e004333228e39fb5aaa7621dd1b0bcf6;hb=3914b0d6480f67df53b1e838f51c4e6155710434;hp=5818539dd8db0c94f19c62bfa942eb499368149b;hpb=dbd5da8043df1cb9f5ff7c6ddb5f550ea49b52c2;p=mothur.git diff --git a/decalc.h b/decalc.h index 5818539..d1daf05 100644 --- a/decalc.h +++ b/decalc.h @@ -14,31 +14,64 @@ /***********************************************************************/ -//This class was created using the algorythms described in the +//This class was created using the algorithms described in the // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1. /***********************************************************************/ +//this structure is necessary to determine the sequence that contributed to the outliers when we remove them +//this way we can remove all scores that are contributed by outlier sequences. +struct quanMember { + float score; + int member1; + int member2; + quanMember (float s, int m, int n) : score(s), member1(m), member2(n) {} + quanMember() {} + +}; + +//******************************************************************************************************************** class DeCalculator { public: - - void trimSeqs(Sequence*, Sequence, map&); - vector readFreq(); - vector calcFreq(vector); - vector findPairs(int, int); - vector findWindows(Sequence*, int, int, int&); - vector calcObserved(Sequence*, Sequence, vector, int); + DeCalculator() { m = MothurOut::getInstance(); } + ~DeCalculator() {}; + + vector findClosest(Sequence, vector&, vector&, int, int); //takes querySeq, a reference db, filteredRefDB, numWanted, minSim + Sequence* findClosest(Sequence*, vector); + set getPos() { return h; } + void setMask(string); + void setAlignmentLength(int l) { alignLength = l; } + void runMask(Sequence*); + void trimSeqs(Sequence*, Sequence*, map&); + map trimSeqs(Sequence&, vector&); + void removeObviousOutliers(vector< vector >&, int); + vector calcFreq(vector, string); + vector findWindows(Sequence*, int, int, int&, int); + vector calcObserved(Sequence*, Sequence*, vector, int); vector calcExpected(vector, float); - vector findQav(vector, int); + vector findQav(vector, int, vector); float calcDE(vector, vector); - float calcDist(Sequence*, Sequence, int, int); + float calcDist(Sequence*, Sequence*, int, int); float getCoef(vector, vector); + vector< vector > getQuantiles(vector, vector, int, vector, int, int, int); + + vector returnObviousOutliers(vector< vector >, int); + + map getMaskMap() { return maskMap; } private: - + //vector sortContrib(map); //used by mallard + float findAverage(vector); + //int findLargestContrib(vector); + //void removeContrib(int, vector&); + string seqMask; + set h; + int alignLength; + map maskMap; + MothurOut* m; };