mlpack  2.2.5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
missing_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <unordered_map>
17 #include <boost/bimap.hpp>
19 #include <limits>
20 
21 namespace mlpack {
22 namespace data {
23 
32 {
33  public:
34  // typedef of MappedType
35  using MappedType = double;
36 
38  {
39  // Nothing to initialize here.
40  }
41 
49  explicit MissingPolicy(std::set<std::string> missingSet) :
50  missingSet(std::move(missingSet))
51  {
52  // Nothing to initialize here.
53  }
54 
56  static const bool NeedsFirstPass = false;
57 
62  template<typename T>
63  void MapFirstPass(const std::string& /* string */, const size_t /* dim */)
64  {
65  // Nothing to do.
66  }
67 
82  template<typename MapType, typename T>
83  T MapString(const std::string& string,
84  const size_t dimension,
85  MapType& maps,
86  std::vector<Datatype>& /* types */)
87  {
88  static_assert(std::numeric_limits<T>::has_quiet_NaN == true,
89  "Cannot use MissingPolicy with types where has_quiet_NaN() is false!");
90 
91  // If we can load the string then there is no need for mapping.
92  std::stringstream token;
93  token.str(string);
94  T t;
95  token >> t; // Could be sped up by only doing this if we need to.
96 
97  // If extraction of the value fails, or if it is a value that is supposed to
98  // be mapped, then do mapping.
99  if (token.fail() || !token.eof() ||
100  missingSet.find(string) != std::end(missingSet))
101  {
102  // Everything is mapped to NaN. However we must still keep track of
103  // everything that we have mapped, so we add it to the maps if needed.
104  if (maps.count(dimension) == 0 ||
105  maps[dimension].first.left.count(string) == 0)
106  {
107  // This string does not exist yet.
108  typedef boost::bimap<std::string, MappedType>::value_type PairType;
109  maps[dimension].first.insert(PairType(string,
110  std::numeric_limits<MappedType>::quiet_NaN()));
111  maps[dimension].second++;
112  }
113 
114  return std::numeric_limits<T>::quiet_NaN();
115  }
116  else
117  {
118  // We can just return the value that we read.
119  return t;
120  }
121  }
122 
139  template <typename eT, typename MapType>
140  void MapTokens(const std::vector<std::string>& tokens,
141  size_t& row,
142  arma::Mat<eT>& matrix,
143  MapType& maps,
144  std::vector<Datatype>& types)
145  {
146  // MissingPolicy allows double type matrix only, because it uses NaN.
147  static_assert(std::is_same<eT, double>::value, "You must use double type "
148  " matrix in order to apply MissingPolicy");
149 
150  std::stringstream token;
151  for (size_t i = 0; i != tokens.size(); ++i)
152  {
153  token.str(tokens[i]);
154  token>>matrix.at(row, i);
155  // if the token is not number, map it.
156  // or if token is a number, but is included in the missingSet, map it.
157  if (token.fail() || missingSet.find(tokens[i]) != std::end(missingSet))
158  {
159  const eT val = static_cast<eT>(this->MapString(tokens[i], row, maps,
160  types));
161  matrix.at(row, i) = val;
162  }
163  token.clear();
164  }
165  }
166 
167  private:
168  // Note that missingSet and maps are different.
169  // missingSet specifies which value/string should be mapped and may be a
170  // superset of 'maps'.
171  std::set<std::string> missingSet;
172 }; // class MissingPolicy
173 
174 } // namespace data
175 } // namespace mlpack
176 
177 #endif
void MapTokens(const std::vector< std::string > &tokens, size_t &row, arma::Mat< eT > &matrix, MapType &maps, std::vector< Datatype > &types)
MapTokens turns vector of strings into numeric variables and puts them into a given matrix...
The core includes that mlpack expects; standard C++ includes and Armadillo.
T MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &)
Given the string and the dimension to which it belongs by the user, and the maps and types given by t...
void MapFirstPass(const std::string &, const size_t)
There is nothing for us to do here, but this is required by the MapPolicy type.
static const bool NeedsFirstPass
This doesn&#39;t need a first pass over the data to set up.
MissingPolicy(std::set< std::string > missingSet)
Create the MissingPolicy object with the given missingSet.
MissingPolicy is used as a helper class for DatasetMapper.