Alexandria  2.16
Please provide a description of the project.
AsciiReader.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2020 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
25 #include <fstream>
26 #include <set>
27 // The std regex library is not fully implemented in GCC 4.8. The following lines
28 // make use of the BOOST library and can be modified if GCC 4.9 will be used in
29 // the future.
30 // #include <regex>
31 #include <boost/regex.hpp>
32 using boost::regex;
33 using boost::regex_match;
34 #include <boost/algorithm/string.hpp>
35 #include <boost/io/detail/quoted_manip.hpp>
36 
38 #include "Table/AsciiReader.h"
39 
40 #include "ReaderHelper.h"
41 #include "AsciiReaderHelper.h"
42 
43 namespace Euclid {
44 namespace Table {
45 
46 AsciiReader::AsciiReader(std::istream& stream) : AsciiReader(InstOrRefHolder<std::istream>::create(stream)) {
47 }
48 
49 AsciiReader::AsciiReader(const std::string& filename) : AsciiReader(create<std::ifstream>(filename)) {
50 }
51 
53  : m_stream_holder(std::move(stream_holder)) {
54 }
55 
57  if (m_reading_started) {
58  throw Elements::Exception() << "Changing comment indicator after reading "
59  << "has started is not allowed";
60  }
61  if (indicator.empty()) {
62  throw Elements::Exception() << "Empty string as comment indicator";
63  }
64  m_comment = indicator;
65  return *this;
66 }
67 
69  if (m_reading_started) {
70  throw Elements::Exception() << "Fixing the column names after reading "
71  << "has started is not allowed";
72  }
73 
74  m_column_names = std::move(column_names);
75 
76  std::set<std::string> set {};
77  regex vertical_whitespace {".*\\v.*"}; // Checks if input contains any whitespace characters
78  for (const auto& name : m_column_names) {
79  if (name.empty()) {
80  throw Elements::Exception() << "Empty string column names are not allowed";
81  }
82  if (regex_match(name, vertical_whitespace)) {
83  throw Elements::Exception() << "Column name '" << name << "' contains "
84  << "vertical whitespace characters";
85  }
86  if (!set.insert(name).second) { // Check for duplicate names
87  throw Elements::Exception() << "Duplicate column name " << name;
88  }
89  }
92  throw Elements::Exception() << "Different number of column names and types";
93  }
94 
95  return *this;
96 }
97 
99  if (m_reading_started) {
100  throw Elements::Exception() << "Fixing the column types after reading "
101  << "has started is not allowed";
102  }
103 
104  m_column_types = std::move(column_types);
105 
108  throw Elements::Exception() << "Different number of column names and types";
109  }
110 
111  return *this;
112 }
113 
115  if (m_column_info != nullptr) {
116  return;
117  }
118  m_reading_started = true;
119 
120  auto& in = m_stream_holder->ref();
121 
122  size_t columns_number = countColumns(in, m_comment);
123  if (!m_column_names.empty() && m_column_names.size() != columns_number) {
124  throw Elements::Exception() << "Columns number in stream (" << columns_number
125  << ") does not match the column names number ("
126  << m_column_names.size() << ")";
127  }
128  if (!m_column_types.empty() && m_column_types.size() != columns_number) {
129  throw Elements::Exception() << "Columns number in stream (" << columns_number
130  << ") does not match the column types number ("
131  << m_column_types.size() << ")";
132  }
133 
134  auto auto_names = autoDetectColumnNames(in, m_comment, columns_number);
135  auto auto_desc = autoDetectColumnDescriptions(in, m_comment);
136 
137  std::vector<std::string> names {};
139  std::vector<std::string> units {};
140  std::vector<std::string> descriptions {};
141  for (size_t i=0; i<columns_number; ++i) {
142  if (m_column_names.empty()) {
143  names.emplace_back(auto_names[i]);
144  } else {
145  names.emplace_back(m_column_names[i]);
146  }
147  auto info = auto_desc.find(auto_names[i]);
148  if (info != auto_desc.end()) {
149  if (m_column_types.empty()) {
150  types.emplace_back(info->second.type);
151  } else {
152  types.emplace_back(m_column_types[i]);
153  }
154  units.emplace_back(info->second.unit);
155  descriptions.emplace_back(info->second.description);
156  } else {
157  if (m_column_types.empty()) {
158  types.emplace_back(typeid(std::string));
159  } else {
160  types.emplace_back(m_column_types[i]);
161  }
162  units.emplace_back("");
163  descriptions.emplace_back("");
164  }
165  }
166  m_column_info = createColumnInfo(names, types, units, descriptions);
167 
168 }
169 
170 
172  readColumnInfo();
173  return *m_column_info;
174 }
175 
177  std::string line;
178  auto pos = in.tellg();
179  getline(in, line);
180  in.seekg(pos);
181  return line;
182 }
183 
185  std::ostringstream comment;
186 
187  m_reading_started = true;
188  auto &in = m_stream_holder->ref();
189  while (in && _peekLine(in).compare(0, m_comment.size(), m_comment) == 0) {
190  std::string line;
191  getline(in, line);
192  line = line.substr(m_comment.size());
193  boost::trim(line);
194  comment << line << '\n';
195  }
196 
197  auto full_comment = comment.str();
198  boost::trim(full_comment);
199  return full_comment;
200 }
201 
203  readColumnInfo();
204  auto& in = m_stream_holder->ref();
205 
206  std::vector<Row> row_list;
207  while(in && rows != 0) {
208  std::string line;
209  getline(in, line);
210  size_t comment_pos = line.find(m_comment);
211  if (comment_pos != std::string::npos) {
212  line = line.substr(0, comment_pos);
213  }
214  boost::trim(line);
215  if (!line.empty()) {
216  --rows;
217  std::stringstream line_stream(line);
218  size_t count {0};
219  std::vector<Row::cell_type> values {};
220  std::string token;
221  line_stream >> token;
222  while (line_stream) {
223  if (count >= m_column_info->size()) {
224  throw Elements::Exception() << "Line with wrong number of cells: " << line;
225  }
226  values.push_back(convertToCellType(token, m_column_info->getDescription(count).type));
227  line_stream >> boost::io::quoted(token);
228  ++count;
229  }
230  row_list.push_back(Row{std::move(values), m_column_info});
231  }
232  }
233 
234  if (row_list.empty()) {
235  throw Elements::Exception() << "No more table rows left";
236  }
237  return Table{std::move(row_list)};
238 }
239 
240 void AsciiReader::skip(long rows) {
241  readColumnInfo();
242  auto& in = m_stream_holder->ref();
243 
244  while(in && rows != 0) {
245  std::string line;
246  getline(in, line);
247  size_t comment_pos = line.find(m_comment);
248  if (comment_pos != std::string::npos) {
249  line = line.substr(0, comment_pos);
250  }
251  boost::trim(line);
252  if (!line.empty()) {
253  --rows;
254  }
255  }
256 }
257 
259  return hasNextRow(m_stream_holder->ref(), m_comment);
260 }
261 
264 }
265 
266 } // Table namespace
267 } // Euclid namespace
268 
269 
270 
std::vector< std::type_index > m_column_types
Definition: AsciiReader.h:233
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
AsciiReader & setCommentIndicator(const std::string &indicator)
Set the comment indicator.
Definition: AsciiReader.cpp:56
T empty(T... args)
std::shared_ptr< ColumnInfo > createColumnInfo(const std::vector< std::string > &names, const std::vector< std::type_index > &types, const std::vector< std::string > &units, const std::vector< std::string > &descriptions)
Creates a ColumnInfo object from the given names and types.
constexpr double second
std::shared_ptr< ColumnInfo > m_column_info
Definition: AsciiReader.h:235
STL namespace.
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
STL class.
const ColumnInfo & getInfo() override
Returns the column information of the table.
T seekg(T... args)
STL class.
T push_back(T... args)
std::size_t rowsLeft() override
Implements the TableReader::rowsLeft() contract.
std::unique_ptr< InstOrRefHolder< std::istream > > m_stream_holder
Definition: AsciiReader.h:230
Table readImpl(long rows) override
Reads the next rows into a Table.
AsciiReader(std::istream &stream)
Constructs an AsciiReader which reads from the given stream.
Definition: AsciiReader.cpp:46
TableReader implementation for reading ASCII tables from streams.
Definition: AsciiReader.h:87
T str(T... args)
T move(T... args)
T tellg(T... args)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
Represents one row of a Table.
Definition: Row.h:64
Represents a table.
Definition: Table.h:49
bool hasNextRow(std::istream &in, const std::string &comment)
T find(T... args)
std::string quoted(const std::string &str)
T size(T... args)
STL class.
void skip(long rows) override
Implements the TableReader::skip() contract.
Provides information about the columns of a Table.
Definition: ColumnInfo.h:52
STL class.
AsciiReader & fixColumnTypes(std::vector< std::type_index > column_types)
Overrides the automatically detected column types.
Definition: AsciiReader.cpp:98
std::vector< std::string > m_column_names
Definition: AsciiReader.h:234
std::string getComment() override
T substr(T... args)
static std::string _peekLine(std::istream &in)
bool hasMoreRows() override
Implements the TableReader::hasMoreRows() contract.
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
AsciiReader & fixColumnNames(std::vector< std::string > column_names)
Overrides the automatically detected column names.
Definition: AsciiReader.cpp:68
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.
T emplace_back(T... args)