1 /** 2 * 3 * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU) 4 * info_at_agrum_dot_org 5 * 6 * This library is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU Lesser General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public License 17 * along with this library. If not, see <http://www.gnu.org/licenses/>. 18 * 19 */ 20 21 22 /** @file 23 * @brief The databases' cell translators for integer variables 24 * 25 * @author Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6) 26 */ 27 #ifndef GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H 28 #define GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H 29 30 #include <agrum/agrum.h> 31 #include <agrum/tools/database/DBTranslator.h> 32 #include <agrum/tools/variables/integerVariable.h> 33 34 35 namespace gum { 36 37 namespace learning { 38 39 40 /** @class DBTranslator4IntegerVariable 41 * @headerfile DBTranslator4IntegerVariable.h <agrum/tools/database/DBTranslator4IntegerVariable.h> 42 * @brief The databases' cell translators for integer variables 43 * 44 * Translators are used by DatabaseTable instances to transform datasets' 45 * strings into DBTranslatedValue instances. The point is that strings are 46 * not adequate for fast learning, they need to be preprocessed into a type 47 * that can be analyzed quickly (the so-called DBTranslatedValue type). 48 * 49 * A DBTranslator4IntegerVariable is a translator that contains and 50 * exploits a IntegerVariable for translations. Each time a string needs 51 * be translated, we ask the IntegerVariable which index contains the the number 52 * represented by the string. The DBTranslatedValue corresponding to the translation 53 * of the string contains in its discr_val field this number. 54 * 55 * @warning Translators for integer variables are not editable, that is, 56 * you must provide the const variable that will be used for translations. 57 * Enabling the editable mode would not make much sense because, during the 58 * translation, the DBTranslatedValue of an integer may change after translating 59 * another integer. 60 * 61 * @par Here is an example of how to use this class: 62 * @code 63 * // create the translator, with possible missing symbols: "N/A" and "???" 64 * // i.e., each time the translator reads a "N/A" or a "???" string, it 65 * // won't translate it into a number but into a missing value. 66 * std::vector<std::string> missing { "N/A", "???" }; 67 * gum::IntegerVariable var ( "X1", "" ); 68 * var.addValue( 1 ); 69 * var.addValue( 3 ); 70 * var.addValue( 10 ); 71 * gum::learning::DBTranslator4IntegerVariable<> translator(var, missing); 72 * 73 * // gets the DBTranslatedValue corresponding to some strings 74 * auto val1 = translator.translate("3"); 75 * auto val2 = translator << "1"; 76 * // at this point, val1 and val2 are equal to 77 * // gum::learning::DBTranslatedValue { std::size_t(1) } and 78 * // gum::learning::DBTranslatedValue { std::size_t(0) } respectively 79 * 80 * // if the string contains a number outside the domain of the 81 * // IntegerVariable, then a gum::NotFound exception is raised: 82 * auto val3 = translator << "17"; // NotFound raised 83 * 84 * // add the numbers assigned to val1, val2 85 * std::size_t sum = val1.discr_val + val2.discr_val; 86 * 87 * // translate missing values: val4 and val5 will be equal to: 88 * // DBTranslatedValue { std::numeric_limits<std::size_t>::max () } 89 * auto val4 = translator << "N/A"; 90 * auto val5 = translator.translate ( "???" ); 91 * 92 * // the following instructions raise TypeError exceptions because the 93 * // strings are not integers 94 * auto val6 = translator << "422.5"; 95 * auto val7 = translator.translate ( "xxx" ); 96 * 97 * // given a DBTranslatedValue that is supposed to contain the index of 98 * // an integer, get the string representing this integer. 99 * std::string str; 100 * str = translator.translateBack ( val1 ); // str = "3" 101 * str = translator >> val2; // str = "1" 102 * str = translator >> gum::learning::DBTranslatedValue {std::size_t(1)}; 103 * // str = "3" 104 * 105 * // translate back missing values: the string will corresponds to one of 106 * // the missing symbols known to the translator 107 * str = translator >> val4; // str = "N/A" or "???" 108 * str = translator >> val5; // str = "N/A" or "???" 109 * 110 * // get the variable stored within the translator 111 * const gum::IntegerVariable<float>* var = 112 * dynamic_cast<const gum::IntegerVariable*>(translator.variable()); 113 * @endcode 114 * 115 * @ingroup learning_database 116 */ 117 template < template < typename > class ALLOC = std::allocator > 118 class DBTranslator4IntegerVariable: public DBTranslator< ALLOC > { 119 public: 120 /// type for the allocators passed in arguments of methods 121 using allocator_type = typename DBTranslator< ALLOC >::allocator_type; 122 123 124 // ########################################################################## 125 /// @name Constructors / Destructors 126 // ########################################################################## 127 128 /// @{ 129 130 /// default constructor with an integer variable as translator 131 /** @param var an integer variable which will be used for translations. 132 * The translator keeps a copy of this variable 133 * @param missing_symbols the set of symbols in the dataset 134 * representing missing values 135 * @param max_dico_entries the max number of entries that the dictionary 136 * can contain. During the construction, we check that the integer 137 * variable passed in argument has fewer values than 138 * the admissible dictionary size 139 * @param alloc The allocator used to allocate memory for all the 140 * fields of the DBTranslator4IntegerVariable */ 141 template < template < typename > class XALLOC > 142 DBTranslator4IntegerVariable( 143 const IntegerVariable& var, 144 const std::vector< std::string, XALLOC< std::string > >& missing_symbols, 145 std::size_t max_dico_entries = std::numeric_limits< std::size_t >::max(), 146 const allocator_type& alloc = allocator_type()); 147 148 /** @brief default constructor with an integer variable as translator 149 * but without missing symbols 150 * 151 * @param var an integer variable which will be used for translations. 152 * The translator keeps a copy of this variable 153 * @param max_dico_entries the max number of entries that the dictionary 154 * can contain. During the construction, we check that the integer 155 * variable passed in argument has a domain size not larger than 156 * the admissible dictionary size 157 * @param alloc The allocator used to allocate memory for all the 158 * fields of the DBTranslator4IntegerVariable */ 159 DBTranslator4IntegerVariable(const IntegerVariable& var, 160 std::size_t max_dico_entries 161 = std::numeric_limits< std::size_t >::max(), 162 const allocator_type& alloc = allocator_type()); 163 164 /// copy constructor 165 DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC >& from); 166 167 /// copy constructor with a given allocator 168 DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC >& from, 169 const allocator_type& alloc); 170 171 /// move constructor 172 DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC >&& from); 173 174 /// move constructor with a given allocator 175 DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC >&& from, 176 const allocator_type& alloc); 177 178 /// virtual copy constructor 179 virtual DBTranslator4IntegerVariable< ALLOC >* clone() const; 180 181 /// virtual copy constructor with a given allocator 182 virtual DBTranslator4IntegerVariable< ALLOC >* clone(const allocator_type& alloc) const; 183 184 /// destructor 185 virtual ~DBTranslator4IntegerVariable(); 186 187 /// @} 188 189 190 // ########################################################################## 191 /// @name Operators 192 // ########################################################################## 193 194 /// @{ 195 196 /// copy operator 197 DBTranslator4IntegerVariable< ALLOC >& 198 operator=(const DBTranslator4IntegerVariable< ALLOC >& from); 199 200 /// move operator 201 DBTranslator4IntegerVariable< ALLOC >& 202 operator=(DBTranslator4IntegerVariable< ALLOC >&& from); 203 204 /// @} 205 206 207 // ########################################################################## 208 /// @name Accessors / Modifiers 209 // ########################################################################## 210 211 /// @{ 212 213 /// returns the translation of a string 214 /** This method tries to translate a given string into the 215 * DBTranslatedValue that should be stored into a databaseTable. If the 216 * translator cannot find the translation in its current dictionary, then 217 * the translator raises either a TypeError if the string is not a number 218 * or an UnknownLabelInDatabase exception. 219 * 220 * @warning Note that missing values (i.e., string encoded as missing 221 * symbols) are translated as std::numeric_limits<std::size_t>::max (). 222 * @warning If the variable contained into the translator has an integer 223 * that corresponds to a missing value symbol, the integer will be taken into 224 * account in the translation, not the missing symbol. 225 * @return the translated value of the string to be stored into a 226 * DatabaseTable 227 * @throws UnknownLabelInDatabase is raised if the translation cannot be 228 * found. 229 * @throws TypeError is raised if the translation cannot be found in 230 * the translator and the string does not correspond to a number. */ 231 virtual DBTranslatedValue translate(const std::string& str) final; 232 233 /// returns the original value for a given translation 234 /** @return the string that was translated into a given DBTranslatedValue. 235 * @throws UnknownLabelInDatabase is raised if this original value 236 * cannot be found */ 237 virtual std::string translateBack(const DBTranslatedValue translated_val) const final; 238 239 /// returns the domain size of the variable used for translations 240 /** @warning Note that missing values are encoded as 241 * std::numeric_limits<>::max () and are not taken into account in the 242 * domain sizes. */ 243 virtual std::size_t domainSize() const final; 244 245 /// indicates that the translator is never in editable dictionary mode 246 virtual bool hasEditableDictionary() const final; 247 248 /// sets/unset the editable dictionary mode 249 virtual void setEditableDictionaryMode(bool new_mode) final; 250 251 /// indicates that the translations should never be reordered 252 virtual bool needsReordering() const final; 253 254 /** @brief returns an empty HashTable to indicate that no reordering 255 * is needed. */ 256 virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > > 257 reorder() final; 258 259 /// returns the variable stored into the translator 260 virtual const IntegerVariable* variable() const final; 261 262 /// returns the translation of a missing value 263 virtual DBTranslatedValue missingValue() const final; 264 265 /// @} 266 267 268 #ifndef DOXYGEN_SHOULD_SKIP_THIS 269 270 private: 271 // the IntegerVariable used for translations 272 IntegerVariable _variable_; 273 274 275 #endif /* DOXYGEN_SHOULD_SKIP_THIS */ 276 }; 277 278 279 } /* namespace learning */ 280 281 } /* namespace gum */ 282 283 284 // always include the template implementation 285 #include <agrum/tools/database/DBTranslator4IntegerVariable_tpl.h> 286 287 #endif /* GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H */ 288