1 /*************************************************************************** 2 * Copyright (c) 2005-2020 by Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6) * 3 * info_at_agrum_dot_org * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 2 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write to the * 17 * Free Software Foundation, Inc., * 18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 19 ***************************************************************************/ 20 /** @file 21 * @brief The class for parsing DatabaseTable rows and generating output rows 22 * 23 * @author Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6) 24 */ 25 #ifndef GUM_LEARNING_DB_ROW_GENERATOR_PARSER_H 26 #define GUM_LEARNING_DB_ROW_GENERATOR_PARSER_H 27 28 #include <limits> 29 30 #include <agrum/agrum.h> 31 #include <agrum/tools/database/DBHandler.h> 32 #include <agrum/tools/database/databaseTable.h> 33 #include <agrum/tools/database/DBRowGeneratorSet.h> 34 35 namespace gum { 36 37 namespace learning { 38 39 /** @class DBRowGeneratorParser 40 * @headerfile DBRowGeneratorParser.h <agrum/tools/database/DBRowGeneratorParser.h> 41 * @ingroup learning_database 42 * @brief the class used to read a row in the database and to transform it 43 * into a set of DBRow instances that can be used for learning. 44 * 45 * A DBRowGeneratorParser contains a handler on a DatabaseTable that enables 46 * it to parse DBRows contained in the DatabaseTable. It also contains a 47 * DBRowGeneratorSet that is used to create output rows for each parsed 48 * DBRow. Note that if the DBRowGeneratorSet is empty, then 49 * DBRowGeneratorParser simply outputs each parsed DBRow without additional 50 * processing. To understand the difference between a DBRowGeneratorParser 51 * and a DBRowGeneratorSet, the latter is designed to take as input only 52 * one DBRow instance and to produce some output DBRow instances, whereas 53 * the former is designed to parse the content of a DatabaseTable and to 54 * produce from them some output DBRow instances. 55 * 56 * @par Usage example: 57 * @code 58 * // create and fill a database 59 * gum::learning::DatabaseTable<> database ( ... ); 60 * ..... 61 * 62 * // create a vector with the types of the columns of database 63 * const std::vector<gum::learning::DBTranslatedValueType> 64 * col_types ( 10, gum::learning::DBTranslatedValueType::DISCRETE ); 65 * 66 * // create a generator set 67 * gum::learning::MyGenerator<> generator1 ( col_types, 6 ); 68 * gum::learning::MyGenerator2<> generator2 ( col_types, 4 ); 69 * gum::learning::DBRowGeneratorSet<> genset; 70 * genset.insertGenerator ( generator1 ); 71 * genset.insertGenerator ( generator2 ); 72 * 73 * // create the DBRowGeneratorParser 74 * gum::learning::DBRowGeneratorParser<> 75 * parser ( database.handler (), genset ); 76 * 77 * // use the parser to parse all the database and to apply all the 78 * // transformations induced by generator1 and generator2 79 * while ( parser.hasRows () ) { 80 * const auto& dbrow = parser.row(); 81 * // do something with dbrow 82 * } 83 * @endcode 84 */ 85 template <template<typename> class ALLOC = std::allocator> 86 class DBRowGeneratorParser { 87 public: 88 89 /// type for the allocators passed in arguments of methods 90 using allocator_type = ALLOC<DBTranslatedValue>; 91 92 // ########################################################################## 93 /// @name Constructors / Destructors 94 // ########################################################################## 95 96 /// @{ 97 98 /// default constructor 99 DBRowGeneratorParser( const typename DatabaseTable<ALLOC>::Handler& handler, 100 const DBRowGeneratorSet<ALLOC>& generator_set, 101 const allocator_type& alloc = allocator_type () ); 102 103 /// copy constructor 104 DBRowGeneratorParser( const DBRowGeneratorParser<ALLOC>& from ); 105 106 /// copy constructor with a given allocator 107 DBRowGeneratorParser( const DBRowGeneratorParser<ALLOC>& from, 108 const allocator_type& alloc ); 109 110 /// move constructor 111 DBRowGeneratorParser(DBRowGeneratorParser<ALLOC>&& filter); 112 113 /// move constructor with a given allocator 114 DBRowGeneratorParser(DBRowGeneratorParser<ALLOC>&& filter, 115 const allocator_type& alloc ); 116 117 /// virtual copy constructor 118 virtual DBRowGeneratorParser<ALLOC>* clone () const; 119 120 /// virtual copy constructor with a given allocator 121 virtual DBRowGeneratorParser<ALLOC>* 122 clone (const allocator_type& alloc) const; 123 124 /// destructor 125 virtual ~DBRowGeneratorParser(); 126 127 /// @} 128 129 // ########################################################################## 130 /// @name Operators 131 // ########################################################################## 132 133 /// @{ 134 135 /// copy operator 136 DBRowGeneratorParser<ALLOC>& 137 operator=(const DBRowGeneratorParser<ALLOC>& from ); 138 139 /// move operator 140 DBRowGeneratorParser<ALLOC>& 141 operator=(DBRowGeneratorParser<ALLOC>&& from ); 142 143 /// @} 144 145 // ########################################################################## 146 /// @name Accessors / Modifiers 147 // ########################################################################## 148 149 /// @{ 150 151 /** @brief returns true if there are still rows that can be output by the 152 * DBRowGeneratorParser 153 * 154 * The usual way of calling this method is to encapsulate it into a while 155 * loop whose stopping condition is when the handler has no more rows. 156 * This loop shall be inside a try-catch statement that enables to 157 * stop properly the loop when the NotFound exception is raised. In most 158 * practical cases, this exception will never be raised, but if you use 159 * a row generator that enables to return 0 row (say, for instance an 160 * intelligent EM that does not return any row when there are too many 161 * missing data) and if the last rows of the database are such that this 162 * generator will return no row, then the exception will be raised. 163 * Actually, it is not efficient to parse all the database to detect such 164 * a case before trying to return the rows, especially because this 165 * situation is very unlikely to occur. So a correct code to use method 166 * row () is like: 167 * @code 168 * try { 169 * while ( parser.hasRows () ) { 170 * const auto& row = parser.row (); 171 * do_whatever_you_want_with_the_row... ; 172 * } 173 * } 174 * catch ( NotFound& ) { // stop, there are no more rows to process } 175 * @endcode 176 */ 177 bool hasRows(); 178 179 /// returns a new output row with its corresponding weight 180 /** The usual way of calling this method is to encapsulate it into a while 181 * loop whose stopping condition is when the handler has no more rows. 182 * This loop shall be inside a try-catch statement that enables to 183 * stop properly the loop when the NotFound exception is raised. In most 184 * practical cases, this exception will never be raised, but if you use 185 * a row generator that enables to return 0 row (say, for instance an 186 * intelligent EM that does not return any row when there are too many 187 * missing data) and if the last rows of the database are such that this 188 * generator will return no row, then the exception will be raised. 189 * Actually, it is not efficient to parse all the database to detect such 190 * a case before trying to return the rows, especially because this 191 * situation is very unlikely to occur. So a correct code to use method 192 * row () is like: 193 * @code 194 * try { 195 * while ( parser.hasRows () ) { 196 * const auto& row = parser.row (); 197 * do_whatever_you_want_with_the_row... ; 198 * } 199 * } 200 * catch ( NotFound& ) { // stop, there are no more rows to process } 201 * @endcode 202 */ 203 const DBRow<DBTranslatedValue,ALLOC>& row (); 204 205 /// resets the parser 206 void reset(); 207 208 /// returns the handler used by the parser 209 typename DatabaseTable<ALLOC>::Handler& handler(); 210 211 /// returns the handler used by the parser 212 const typename DatabaseTable<ALLOC>::Handler& handler() const; 213 214 /// returns a reference on the database 215 const DatabaseTable<ALLOC>& database () const; 216 217 /// returns the generator set that is actually used 218 DBRowGeneratorSet<ALLOC>& generatorSet(); 219 220 /// returns the generator set that is actually used 221 const DBRowGeneratorSet<ALLOC>& generatorSet() const; 222 223 /// sets the area in the database the handler will handle 224 /** In addition to setting the area that will be parsed by the handler, 225 * this method makes the handler point to the beginning of the area. 226 * @param begin the first row to be handled 227 * @param end the handler handles rows in interval [begin,end). Thus, 228 * the endth row is not included in the set of rows handled. 229 * @warning if begin is greater than end, these values are swapped. 230 * @throw NullElement is raised if the handler does not point to 231 * any database 232 * @throw SizeError is raised if end is greater than the number of 233 * rows of the database */ 234 void setRange(std::size_t begin, std::size_t end); 235 236 /** @brief sets the columns of interest: the output DBRow needs only 237 * contain values fot these columns 238 * 239 * This method is useful, e.g., for EM-like algorithms that need to know 240 * which unobserved variables/values need be filled. 241 * 242 * @throw OperationNotAllowed is raised if the generator set has already 243 * started generating output rows and is currently in a state where the 244 * generation is not completed yet (i.e., we still need to call the 245 * generate() method to complete it). */ 246 void setColumnsOfInterest ( 247 const std::vector<std::size_t,ALLOC<std::size_t>>& cols_of_interest ); 248 249 /** @brief sets the columns of interest: the output DBRow needs only 250 * contain values fot these columns 251 * 252 * This method is useful, e.g., for EM-like algorithms that need to know 253 * which unobserved variables/values need be filled. 254 * 255 * @throw OperationNotAllowed is raised if the generator set has already 256 * started generating output rows and is currently in a state where the 257 * generation is not completed yet (i.e., we still need to call the 258 * generate() method to complete it). */ 259 void setColumnsOfInterest ( 260 std::vector<std::size_t,ALLOC<std::size_t>>&& cols_of_interest ); 261 262 /// assign a new Bayes net to all the generators that depend on a BN 263 /** Typically, generators based on EM or K-means depend on a model to 264 * compute correctly their outputs. Method setBayesNet enables to 265 * update their BN model. 266 * @warning if one generator that relies on Bayes nets cannot be assigned 267 * new_bn, then no generator is updated and an exception is raised. */ 268 template < typename GUM_SCALAR > 269 void setBayesNet (const BayesNet<GUM_SCALAR>& new_bn); 270 271 /// returns the allocator used 272 allocator_type getAllocator () const; 273 274 /// @} 275 276 277 private: 278 279 /// the handler that is really used to parse the database 280 typename DatabaseTable<ALLOC>::Handler _handler_; 281 282 /// the set of DBRow generators (might be empty) 283 DBRowGeneratorSet<ALLOC> _generator_set_; 284 285 /// the size of the generator set 286 std::size_t _generator_size_; 287 288 }; 289 290 } /* namespace learning */ 291 292 } /* namespace gum */ 293 294 // always include the template implementation 295 #include <agrum/tools/database/DBRowGeneratorParser_tpl.h> 296 297 #endif /* GUM_LEARNING_DB_ROW_GENERATOR_PARSER_H */ 298