X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=abstractrandomforest.hpp;fp=abstractrandomforest.hpp;h=3be91b9b9b47a8f04ffbb30ff31d4448549f3d50;hb=035f86272c776e1cccaa47021e26782e49cd41e7;hp=0000000000000000000000000000000000000000;hpb=96dbe925073caefaed6e6db85659c144a806aeb1;p=mothur.git diff --git a/abstractrandomforest.hpp b/abstractrandomforest.hpp new file mode 100755 index 0000000..3be91b9 --- /dev/null +++ b/abstractrandomforest.hpp @@ -0,0 +1,67 @@ +// +// abstractrandomforest.hpp +// rrf-fs-prototype +// +// Created by Abu Zaher Faridee on 7/20/12. +// Copyright (c) 2012 Schloss Lab. All rights reserved. +// + +#ifndef rrf_fs_prototype_abstractrandomforest_hpp +#define rrf_fs_prototype_abstractrandomforest_hpp + +#include "mothurout.h" +#include "macros.h" +#include "abstractdecisiontree.hpp" + +#define DEBUG_MODE + +/***********************************************************************/ + +class AbstractRandomForest{ +public: + // intialization with vectors + AbstractRandomForest(const std::vector < std::vector > dataSet, + const int numDecisionTrees, + const string); + virtual ~AbstractRandomForest(){ } + virtual int populateDecisionTrees() = 0; + virtual int calcForrestErrorRate() = 0; + virtual int calcForrestVariableImportance(string) = 0; + +/***********************************************************************/ + +protected: + + // TODO: create a better way of discarding feature + // currently we just set FEATURE_DISCARD_SD_THRESHOLD to 0 to solved this + // it can be tuned for better selection + // also, there might be other factors like Mean or other stuffs + // same would apply for createLocalDiscardedFeatureList in the TreeNode class + + // TODO: Another idea is getting an aggregated discarded feature indices after the run, from combining + // the local discarded feature indices + // this would penalize a feature, even if in global space the feature looks quite good + // the penalization would be averaged, so this woould unlikely to create a local optmina + + vector getGlobalDiscardedFeatureIndices(); + + int numDecisionTrees; + int numSamples; + int numFeatures; + vector< vector > dataSet; + vector globalDiscardedFeatureIndices; + vector globalVariableImportanceList; + string treeSplitCriterion; + // This is a map of each feature to outcome count of each classes + // e.g. 1 => [2 7] means feature 1 has 2 outcome of 0 and 7 outcome of 1 + map > globalOutOfBagEstimates; + + // TODO: fix this, do we use pointers? + vector decisionTrees; + + MothurOut* m; + +private: + +}; +#endif