1 /*************************************************************************** 2 * Copyright (c) 2005-2020 by Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6) * 3 * info_at_agrum_dot_org * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 2 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write to the * 17 * Free Software Foundation, Inc., * 18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 19 ***************************************************************************/ 20 /** @file 21 * @brief Class for fast parsing of CSV file (never more than one line in 22 * application memory) 23 * 24 * Typical use : 25 * @code 26 * // open the CSV file 27 * std::string filename="foo.csv" 28 * std::ifstream in(filename.c_str()); 29 * gum::learning::CSVParser csvp(in); 30 * 31 * // read each line in the CSV file 32 * while (csvp.next()) { 33 * csvp.current (); 34 * } 35 * 36 * in.close(); 37 * @endcode 38 * 39 * @author Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU) 40 * 41 */ 42 43 #ifndef GUM_CSV_PARSER_H 44 #define GUM_CSV_PARSER_H 45 46 #include <istream> 47 #include <string> 48 #include <vector> 49 50 #include <agrum/agrum.h> 51 52 namespace gum { 53 54 namespace learning { 55 56 /** @class CSVParser 57 * @ingroup learning_database 58 * @headerfile CSVParser.h <agrum/tools/database/CSVParser.h> 59 * @brief Class for fast parsing of CSV file (never more than one 60 * line in application memory) 61 * 62 * Typical use: 63 * @code 64 * // open the CSV file 65 * std::string filename="foo.csv" 66 * std::ifstream in(filename.c_str()); 67 * gum::learning::CSVParser<> csvp(in); 68 * 69 * // read each line in the CSV file 70 * while (csvp.next()) { 71 * csvp.current (); 72 * } 73 * 74 * in.close(); 75 * @endcode 76 */ 77 template < template < typename > class ALLOC = std::allocator > 78 class CSVParser { 79 public: 80 /// type for the allocators passed in arguments of methods 81 using allocator_type = ALLOC< std::string >; 82 83 84 // ########################################################################## 85 /// @name Constructors / Destructors 86 // ########################################################################## 87 /// @{ 88 89 /// default constructor 90 /** @param in an input stream containing the CSV 91 * @param delimiter the character that acts as the column separator in 92 * the CSV 93 * @param commentmarker the character that marks the beginning of a comment 94 * @param quoteMarker the character that is used to quote the sentences 95 * in the CSV 96 * @param alloc the allocator used by all the methods 97 */ 98 CSVParser(std::istream& in, 99 const std::string& filename, 100 const std::string& delimiter = ",", 101 const char commentmarker = '#', 102 const char quoteMarker = '"', 103 const allocator_type& alloc = allocator_type()); 104 105 /// destructor 106 virtual ~CSVParser(); 107 108 /// @} 109 110 111 // ######################################################################## 112 /// @name Accessors / Modifiers 113 // ######################################################################## 114 /// @{ 115 116 /// gets the next line of the csv stream and parses it 117 /** @return false if there is no next line 118 */ 119 bool next(); 120 121 /// returns the current parsed line 122 /** @throw NullElement is raised if there is no data 123 */ 124 const std::vector< std::string, ALLOC< std::string > >& current() const; 125 126 /// returns the current line number within the stream 127 const std::size_t nbLine() const; 128 129 /// reopens a new input stream to parse 130 void useNewStream(std::istream& in, 131 const std::string& delimiter = ",", 132 const char commentmarker = '#', 133 const char quoteMarker = '"'); 134 135 /// @} 136 137 138 #ifndef DOXYGEN_SHOULD_SKIP_THIS 139 140 private: 141 void _getNextTriplet_(const std::string& str, 142 std::size_t& first_letter_token, 143 std::size_t& next_token, 144 std::size_t& last_letter_token, 145 std::size_t from) const; 146 147 void _tokenize_(const std::string& str); 148 149 std::size_t _correspondingQuoteMarker_(const std::string& str, std::size_t pos) const; 150 151 152 std::string _line_; 153 std::string _delimiter_; 154 std::string _spaces_; 155 std::string _delimiterPlusSpaces_; 156 std::size_t _nbLine_; 157 char _commentMarker_; 158 char _quoteMarker_; 159 bool _emptyData_; 160 161 std::istream* _instream_; 162 std::vector< std::string, ALLOC< std::string > > _data_; 163 const std::string _filename_; 164 165 #endif /* DOXYGEN_SHOULD_SKIP_THIS */ 166 }; 167 168 } // namespace learning 169 170 } // namespace gum 171 172 #include <agrum/tools/database/CSVParser_tpl.h> 173 174 #endif // GUM_CSV_PARSER_H 175