X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=decalc.h;fp=decalc.h;h=d6cca182e81937dd5a494d57c2c8221b6039ad45;hb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;hp=0000000000000000000000000000000000000000;hpb=1b73ff67c83892a025e597dabd9df6fe7b58206a;p=mothur.git diff --git a/decalc.h b/decalc.h new file mode 100644 index 0000000..d6cca18 --- /dev/null +++ b/decalc.h @@ -0,0 +1,80 @@ +#ifndef DECALC_H +#define DECALC_H +/* + * decalc.h + * Mothur + * + * Created by Sarah Westcott on 7/22/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "mothur.h" +#include "sequence.hpp" + +/***********************************************************************/ + +//This class was created using the algorythms described in the +// "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper +//by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1. + +/***********************************************************************/ + +//this structure is necessary to determine the sequence that contributed to the outliers when we remove them +//this way we can remove all scores that are contributed by outlier sequences. +struct quanMember { + float score; + int member1; + int member2; + quanMember (float s, int m, int n) : score(s), member1(m), member2(n) {} + quanMember() {} + +}; + +//******************************************************************************************************************** +class DeCalculator { + + public: + + DeCalculator() { m = MothurOut::getInstance(); } + ~DeCalculator() {}; + + vector findClosest(Sequence, vector&, vector&, int, int); //takes querySeq, a reference db, filteredRefDB, numWanted, minSim + Sequence* findClosest(Sequence*, vector); + set getPos() { return h; } + void setMask(string); + void setAlignmentLength(int l) { alignLength = l; } + void runMask(Sequence*); + void trimSeqs(Sequence*, Sequence*, map&); + map trimSeqs(Sequence&, vector&); + void removeObviousOutliers(vector< vector >&, int); + vector calcFreq(vector, string); + vector findWindows(Sequence*, int, int, int&, int); + vector calcObserved(Sequence*, Sequence*, vector, int); + vector calcExpected(vector, float); + vector findQav(vector, int, vector); + float calcDE(vector, vector); + float calcDist(Sequence*, Sequence*, int, int); + float getCoef(vector, vector); + vector< vector > getQuantiles(vector, vector, int, vector, int, int, int); + + vector returnObviousOutliers(vector< vector >, int); + + map getMaskMap() { return maskMap; } + + private: + //vector sortContrib(map); //used by mallard + float findAverage(vector); + //int findLargestContrib(vector); + //void removeContrib(int, vector&); + string seqMask; + set h; + int alignLength; + map maskMap; + MothurOut* m; + +}; + +/***********************************************************************/ + +#endif