12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
16 #include <unordered_map>
17 #include <boost/bimap.hpp>
44 std::vector<Datatype>& types)
55 std::stringstream token;
60 if (token.fail() || !token.eof())
80 template<
typename MapType,
typename T>
82 const size_t dimension,
84 std::vector<Datatype>& types)
92 std::stringstream token;
97 if (!token.fail() && token.eof())
109 if (maps.count(dimension) == 0 ||
110 maps[dimension].first.left.count(
string) == 0)
113 size_t& numMappings = maps[dimension].second;
116 if (numMappings == 0)
119 typedef boost::bimap<std::string, MappedType>::value_type PairType;
120 maps[dimension].first.insert(PairType(
string, numMappings));
121 return T(numMappings++);
126 return maps[dimension].first.left.at(
string);
145 template <
typename eT,
typename MapType>
148 arma::Mat<eT>& matrix,
150 std::vector<Datatype>& types)
152 auto notNumber = [](
const std::string& str)
155 std::stringstream token;
161 const bool notNumeric = std::any_of(std::begin(tokens),
162 std::end(tokens), notNumber);
165 for (
size_t i = 0; i != tokens.size(); ++i)
167 const eT val =
static_cast<eT
>(this->
MapString(tokens[i], row, maps,
169 matrix.at(row, i) = val;
174 std::stringstream token;
175 for (
size_t i = 0; i != tokens.size(); ++i)
177 token.str(tokens[i]);
178 token >> matrix.at(row, i);
IncrementPolicy is used as a helper class for DatasetMapper.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void MapTokens(const std::vector< std::string > &tokens, size_t &row, arma::Mat< eT > &matrix, MapType &maps, std::vector< Datatype > &types)
MapTokens turns vector of strings into numeric variables and puts them into a given matrix...
static const bool NeedsFirstPass
We do need a first pass over the data to set the dimension types right.
T MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &types)
Given the string and the dimension to which the it belongs, and the maps and types given by the Datas...
void MapFirstPass(const std::string &string, const size_t dim, std::vector< Datatype > &types)
Determine if the dimension is numeric or categorical.