X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=abstractdecisiontree.hpp;h=cc238b4687e57c6cab840f5ec31ce10adbc8a655;hb=541bab1dac00688b4c3a8c4a95ab464412663c50;hp=3445db4a511e8705ed5b0bed14626eb869ce5043;hpb=90708fe9701e3827e477c82fb3652539c3bf2a0d;p=mothur.git diff --git a/abstractdecisiontree.hpp b/abstractdecisiontree.hpp index 3445db4..cc238b4 100755 --- a/abstractdecisiontree.hpp +++ b/abstractdecisiontree.hpp @@ -6,8 +6,8 @@ // Copyright (c) 2012 Schloss Lab. All rights reserved. // -#ifndef rrf_fs_prototype_abstractdecisiontree_hpp -#define rrf_fs_prototype_abstractdecisiontree_hpp +#ifndef RF_ABSTRACTDECISIONTREE_HPP +#define RF_ABSTRACTDECISIONTREE_HPP #include "mothurout.h" #include "macros.h" @@ -17,14 +17,22 @@ /**************************************************************************************************/ +struct IntPairVectorSorter{ + bool operator() (const pair& firstPair, const pair& secondPair) { + return firstPair.first < secondPair.first; + } +}; + +/**************************************************************************************************/ + class AbstractDecisionTree{ public: - AbstractDecisionTree(vector >baseDataSet, - vector globalDiscardedFeatureIndices, - OptimumFeatureSubsetSelector optimumFeatureSubsetSelector, - string treeSplitCriterion); + AbstractDecisionTree(vector >& baseDataSet, + vector globalDiscardedFeatureIndices, + OptimumFeatureSubsetSelector optimumFeatureSubsetSelector, + string treeSplitCriterion); virtual ~AbstractDecisionTree(){} @@ -32,23 +40,29 @@ protected: virtual int createBootStrappedSamples(); virtual int getMinEntropyOfFeature(vector featureVector, vector outputVector, double& minEntropy, int& featureSplitValue, double& intrinsicValue); - virtual int getBestSplitAndMinEntropy(vector< vector > featureOutputPairs, vector splitPoints, double& minEntropy, int& minEntropyIndex, double& relatedIntrinsicValue); + virtual int getBestSplitAndMinEntropy(vector< pair > featureOutputPairs, vector splitPoints, double& minEntropy, int& minEntropyIndex, double& relatedIntrinsicValue); virtual double calcIntrinsicValue(int numLessThanValueAtSplitPoint, int numGreaterThanValueAtSplitPoint, int numSamples); - virtual double calcSplitEntropy(vector< vector > featureOutputPairs, int splitIndex, int numOutputClasses, bool); + virtual double calcSplitEntropy(vector< pair > featureOutputPairs, int splitIndex, int numOutputClasses, bool); + virtual int getSplitPopulation(RFTreeNode* node, vector< vector >& leftChildSamples, vector< vector >& rightChildSamples); virtual bool checkIfAlreadyClassified(RFTreeNode* treeNode, int& outputClass); - vector< vector > baseDataSet; + vector< vector >& baseDataSet; int numSamples; int numFeatures; int numOutputClasses; vector outputClasses; + vector< vector > bootstrappedTrainingSamples; vector bootstrappedTrainingSampleIndices; vector< vector > bootstrappedTestSamples; vector bootstrappedTestSampleIndices; + vector > testSampleFeatureVectors; + RFTreeNode* rootNode; + int nodeIdCount; + map nodeMisclassificationCounts; vector globalDiscardedFeatureIndices; int optimumFeatureSubsetSize; string treeSplitCriterion;