12 #ifndef MLPACK_CORE_DATA_LOAD_CSV_HPP
13 #define MLPACK_CORE_DATA_LOAD_CSV_HPP
15 #include <boost/spirit/include/qi.hpp>
16 #include <boost/algorithm/string/trim.hpp>
39 explicit LoadCSV(std::string file,
bool fatal =
false);
41 template<
typename T,
typename PolicyType>
51 TranposeParse(inout, infoSet);
55 NonTranposeParse(inout, infoSet);
72 template<
typename T,
typename MapPolicy>
82 inFile.seekg(0, std::ios::beg);
88 while (std::getline(inFile, line))
96 inFile.seekg(0, std::ios::beg);
98 while (std::getline(inFile, line))
105 auto findColSize = [&cols](iter_type) { ++cols; };
106 boost::spirit::qi::phrase_parse(line.begin(), line.end(),
107 CreateCharRule()[findColSize] %
",", boost::spirit::ascii::space);
112 if (MapPolicy::NeedsFirstPass)
115 auto firstPassMap = [&](
const iter_type& iter)
117 std::string str(iter.begin(), iter.end());
122 info.template MapFirstPass<T>(std::move(str), rows - 1);
126 boost::spirit::qi::phrase_parse(line.begin(), line.end(),
127 CreateCharRule()[firstPassMap] %
",", boost::spirit::ascii::space);
132 template<
typename T,
typename MapPolicy>
142 inFile.seekg(0, std::ios::beg);
147 while (std::getline(inFile, line))
154 auto findRowSize = [&rows](iter_type) { ++rows; };
155 boost::spirit::qi::phrase_parse(line.begin(), line.end(),
156 CreateCharRule()[findRowSize] %
",", boost::spirit::ascii::space);
163 if (MapPolicy::NeedsFirstPass)
168 auto firstPassMap = [&](
const iter_type& iter)
170 std::string str(iter.begin(), iter.end());
175 info.template MapFirstPass<T>(std::move(str), dim++);
179 boost::spirit::qi::phrase_parse(line.begin(), line.end(),
180 CreateCharRule()[firstPassMap] %
",", boost::spirit::ascii::space);
186 using iter_type = boost::iterator_range<std::string::iterator>;
192 static typename std::enable_if<std::is_integral<T>::value,
193 boost::spirit::qi::int_parser<T>>::type
196 return boost::spirit::qi::int_parser<T>();
201 static typename std::enable_if<std::is_floating_point<T>::value,
202 boost::spirit::qi::real_parser<T>>::type
205 return boost::spirit::qi::real_parser<T>();
211 template<
typename T,
typename PolicyType>
212 void NonTranposeParse(arma::Mat<T> &inout, DatasetMapper<PolicyType> &infoSet)
214 using namespace boost::spirit;
218 GetMatrixSize<T>(rows, cols, infoSet);
221 inout.set_size(rows, cols);
228 inFile.seekg(0, std::ios::beg);
230 auto setCharClass = [&](iter_type
const &iter)
232 std::string str(iter.begin(), iter.end());
239 inout(row, col++) = infoSet.template MapString<T>(std::move(str), row);
242 auto charRule = CreateCharRule();
243 while (std::getline(inFile, line))
247 const bool canParse = qi::phrase_parse(line.begin(), line.end(),
248 charRule[setCharClass] %
",", ascii::space);
252 throw std::runtime_error(
"LoadCSV cannot parse categories");
259 template<
typename T,
typename PolicyType>
260 void TranposeParse(arma::Mat<T> &inout, DatasetMapper<PolicyType> &infoSet)
264 GetTransposeMatrixSize<T>(rows, cols, infoSet);
267 inout.set_size(rows, cols);
268 TranposeParseImpl(inout, infoSet);
271 template<
typename T,
typename PolicyType>
272 bool TranposeParseImpl(arma::Mat<T>& inout,
273 DatasetMapper<PolicyType>& infoSet)
275 using namespace boost::spirit;
281 inFile.seekg(0, std::ios::beg);
283 auto setCharClass = [&](iter_type
const &iter)
286 std::string str(iter.begin(), iter.end());
291 inout(row, col) = infoSet.template MapString<T>(std::move(str), row);
295 auto charRule = CreateCharRule();
296 while (std::getline(inFile, line))
303 const bool canParse = qi::phrase_parse(line.begin(), line.end(),
304 charRule[setCharClass] %
",",
308 throw std::runtime_error(
"LoadCSV cannot parse categories");
317 boost::spirit::qi::rule<std::string::iterator, T(), boost::spirit::ascii::space_type>
318 CreateNumRule()
const
320 using namespace boost::spirit;
323 auto elemParser = ElemParser::Parser<T>();
338 if(extension ==
"csv" || extension ==
"txt")
340 return elemParser >> &(qi::lit(
",") | qi::eol | qi::eoi);
344 return elemParser >> &(qi::lit(
"\t") | qi::eol | qi::eoi);
348 boost::spirit::qi::rule<std::string::iterator, iter_type(), boost::spirit::ascii::space_type>
349 CreateCharRule()
const;
351 std::string extension;
352 bool fatalIfOpenFail;
353 std::string fileName;
354 std::ifstream inFile;
Auxiliary information for a dataset, including mappings to/from strings and the datatype of each dime...
Load the csv file.This class use boost::spirit to implement the parser, please refer to following lin...
void Load(arma::Mat< T > &inout, DatasetMapper< PolicyType > &infoSet, bool transpose=true)
void GetTransposeMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
void GetMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a non-transposed...
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
LoadCSV(std::string file, bool fatal=false)