mlpack  2.2.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
hoeffding_categorical_split.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_CATEGORICAL_SPLIT_HPP
14 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_CATEGORICAL_SPLIT_HPP
15 
16 #include <mlpack/prereqs.hpp>
18 
19 namespace mlpack {
20 namespace tree {
21 
43 template<typename FitnessFunction>
45 {
46  public:
49 
57  HoeffdingCategoricalSplit(const size_t numCategories,
58  const size_t numClasses);
59 
66  HoeffdingCategoricalSplit(const size_t numCategories,
67  const size_t numClasses,
68  const HoeffdingCategoricalSplit& other);
69 
76  template<typename eT>
77  void Train(eT value, const size_t label);
78 
89  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
90  const;
91 
93  size_t NumChildren() const { return sufficientStatistics.n_cols; }
94 
102  void Split(arma::Col<size_t>& childMajorities, SplitInfo& splitInfo);
103 
105  size_t MajorityClass() const;
107  double MajorityProbability() const;
108 
110  template<typename Archive>
111  void Serialize(Archive& ar, const unsigned int /* version */)
112  {
113  ar & data::CreateNVP(sufficientStatistics, "sufficientStatistics");
114  }
115 
116  private:
120  arma::Mat<size_t> sufficientStatistics;
121 };
122 
123 } // namespace tree
124 } // namespace mlpack
125 
126 // Include implementation.
127 #include "hoeffding_categorical_split_impl.hpp"
128 
129 #endif
void Split(arma::Col< size_t > &childMajorities, SplitInfo &splitInfo)
Gather the information for a split: get the labels of the child majorities, and initialize the SplitI...
void EvaluateFitnessFunction(double &bestFitness, double &secondBestFitness) const
Given the points seen so far, evaluate the fitness function, returning the gain for the best possible...
FirstShim< T > CreateNVP(T &t, const std::string &name, typename boost::enable_if< HasSerialize< T >>::type *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
void Serialize(Archive &ar, const unsigned int)
Serialize the categorical split.
HoeffdingCategoricalSplit(const size_t numCategories, const size_t numClasses)
Create the HoeffdingCategoricalSplit given a number of categories for this dimension and a number of ...
The core includes that mlpack expects; standard C++ includes and Armadillo.
size_t MajorityClass() const
Get the majority class seen so far.
void Train(eT value, const size_t label)
Train on the given value with the given label.
double MajorityProbability() const
Get the probability of the majority class given the points seen so far.
CategoricalSplitInfo SplitInfo
The type of split information required by the HoeffdingCategoricalSplit.
This is the standard Hoeffding-bound categorical feature proposed in the paper below: ...
size_t NumChildren() const
Return the number of children, if the node were to split.