1 /**
2  *
3  *   Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU)
4  *   info_at_agrum_dot_org
5  *
6  *  This library is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU Lesser General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public License
17  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief The databases' cell translators for integer variables
24  *
25  * @author Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6)
26  */
27 #ifndef GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H
28 #define GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H
29 
30 #include <agrum/agrum.h>
31 #include <agrum/tools/database/DBTranslator.h>
32 #include <agrum/tools/variables/integerVariable.h>
33 
34 
35 namespace gum {
36 
37   namespace learning {
38 
39 
40     /** @class DBTranslator4IntegerVariable
41      * @headerfile DBTranslator4IntegerVariable.h <agrum/tools/database/DBTranslator4IntegerVariable.h>
42      * @brief The databases' cell translators for integer variables
43      *
44      * Translators are used by DatabaseTable instances to transform datasets'
45      * strings into DBTranslatedValue instances. The point is that strings are
46      * not adequate for fast learning, they need to be preprocessed into a type
47      * that can be analyzed quickly (the so-called DBTranslatedValue type).
48      *
49      * A DBTranslator4IntegerVariable is a translator that contains and
50      * exploits a IntegerVariable for translations. Each time a string needs
51      * be translated, we ask the IntegerVariable which index contains the the number
52      * represented by the string. The DBTranslatedValue corresponding to the translation
53      * of the string contains in its discr_val field this number.
54      *
55      * @warning Translators for integer variables are not editable, that is,
56      * you must provide the const variable that will be used for translations.
57      * Enabling the editable mode would not make much sense because, during the
58      * translation, the DBTranslatedValue of an integer may change after translating
59      * another integer.
60      *
61      * @par Here is an example of how to use this class:
62      * @code
63      * // create the translator, with possible missing symbols: "N/A" and "???"
64      * // i.e., each time the translator reads a "N/A" or a "???" string, it
65      * // won't translate it into a number but into a missing value.
66      * std::vector<std::string> missing { "N/A", "???" };
67      * gum::IntegerVariable var ( "X1", "" );
68      * var.addValue( 1 );
69      * var.addValue( 3 );
70      * var.addValue( 10 );
71      * gum::learning::DBTranslator4IntegerVariable<> translator(var, missing);
72      *
73      * // gets the DBTranslatedValue corresponding to some strings
74      * auto val1 = translator.translate("3");
75      * auto val2 = translator << "1";
76      * // at this point, val1 and val2 are equal to
77      * // gum::learning::DBTranslatedValue { std::size_t(1) } and
78      * // gum::learning::DBTranslatedValue { std::size_t(0) } respectively
79      *
80      * // if the string contains a number outside the domain of the
81      * // IntegerVariable, then a gum::NotFound exception is raised:
82      * auto val3 = translator << "17"; // NotFound raised
83      *
84      * // add the numbers assigned to val1, val2
85      * std::size_t sum = val1.discr_val + val2.discr_val;
86      *
87      * // translate missing values: val4 and val5 will be equal to:
88      * // DBTranslatedValue { std::numeric_limits<std::size_t>::max () }
89      * auto val4 = translator << "N/A";
90      * auto val5 = translator.translate ( "???" );
91      *
92      * // the following instructions raise TypeError exceptions because the
93      * // strings are not integers
94      * auto val6 = translator << "422.5";
95      * auto val7 = translator.translate ( "xxx" );
96      *
97      * // given a DBTranslatedValue that is supposed to contain the index of
98      * // an integer, get the string representing this integer.
99      * std::string str;
100      * str = translator.translateBack ( val1 );        // str = "3"
101      * str = translator >> val2;                       // str = "1"
102      * str = translator >> gum::learning::DBTranslatedValue {std::size_t(1)};
103      *                                                 // str = "3"
104      *
105      * // translate back missing values: the string will corresponds to one of
106      * // the missing symbols known to the translator
107      * str = translator >> val4; // str = "N/A" or "???"
108      * str = translator >> val5; // str = "N/A" or "???"
109      *
110      * // get the variable stored within the translator
111      * const gum::IntegerVariable<float>* var =
112      *   dynamic_cast<const gum::IntegerVariable*>(translator.variable());
113      * @endcode
114      *
115      * @ingroup learning_database
116      */
117     template < template < typename > class ALLOC = std::allocator >
118     class DBTranslator4IntegerVariable: public DBTranslator< ALLOC > {
119       public:
120       /// type for the allocators passed in arguments of methods
121       using allocator_type = typename DBTranslator< ALLOC >::allocator_type;
122 
123 
124       // ##########################################################################
125       /// @name Constructors / Destructors
126       // ##########################################################################
127 
128       /// @{
129 
130       /// default constructor with an integer variable as translator
131       /** @param var an integer variable which will be used for translations.
132        * The translator keeps a copy of this variable
133        * @param  missing_symbols the set of symbols in the dataset
134        * representing missing values
135        * @param max_dico_entries the max number of entries that the dictionary
136        * can contain. During the construction, we check that the integer
137        * variable passed in argument has fewer values than
138        * the admissible dictionary size
139        * @param alloc The allocator used to allocate memory for all the
140        * fields of the DBTranslator4IntegerVariable */
141       template < template < typename > class XALLOC >
142       DBTranslator4IntegerVariable(
143          const IntegerVariable&                                   var,
144          const std::vector< std::string, XALLOC< std::string > >& missing_symbols,
145          std::size_t           max_dico_entries = std::numeric_limits< std::size_t >::max(),
146          const allocator_type& alloc            = allocator_type());
147 
148       /** @brief default constructor with an integer variable as translator
149        * but without missing symbols
150        *
151        * @param var an integer variable which will be used for translations.
152        * The translator keeps a copy of this variable
153        * @param max_dico_entries the max number of entries that the dictionary
154        * can contain. During the construction, we check that the integer
155        * variable passed in argument has a domain size not larger than
156        * the admissible dictionary size
157        * @param alloc The allocator used to allocate memory for all the
158        * fields of the DBTranslator4IntegerVariable */
159       DBTranslator4IntegerVariable(const IntegerVariable& var,
160                                    std::size_t            max_dico_entries
161                                    = std::numeric_limits< std::size_t >::max(),
162                                    const allocator_type& alloc = allocator_type());
163 
164       /// copy constructor
165       DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC >& from);
166 
167       /// copy constructor with a given allocator
168       DBTranslator4IntegerVariable(const DBTranslator4IntegerVariable< ALLOC >& from,
169                                    const allocator_type&                        alloc);
170 
171       /// move constructor
172       DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC >&& from);
173 
174       /// move constructor with a given allocator
175       DBTranslator4IntegerVariable(DBTranslator4IntegerVariable< ALLOC >&& from,
176                                    const allocator_type&                   alloc);
177 
178       /// virtual copy constructor
179       virtual DBTranslator4IntegerVariable< ALLOC >* clone() const;
180 
181       /// virtual copy constructor with a given allocator
182       virtual DBTranslator4IntegerVariable< ALLOC >* clone(const allocator_type& alloc) const;
183 
184       /// destructor
185       virtual ~DBTranslator4IntegerVariable();
186 
187       /// @}
188 
189 
190       // ##########################################################################
191       /// @name Operators
192       // ##########################################################################
193 
194       /// @{
195 
196       /// copy operator
197       DBTranslator4IntegerVariable< ALLOC >&
198          operator=(const DBTranslator4IntegerVariable< ALLOC >& from);
199 
200       /// move operator
201       DBTranslator4IntegerVariable< ALLOC >&
202          operator=(DBTranslator4IntegerVariable< ALLOC >&& from);
203 
204       /// @}
205 
206 
207       // ##########################################################################
208       /// @name Accessors / Modifiers
209       // ##########################################################################
210 
211       /// @{
212 
213       /// returns the translation of a string
214       /** This method tries to translate a given string into the
215        * DBTranslatedValue that should be stored into a databaseTable. If the
216        * translator cannot find the translation in its current dictionary, then
217        * the translator raises either a TypeError if the string is not a number
218        * or an UnknownLabelInDatabase exception.
219        *
220        * @warning Note that missing values (i.e., string encoded as missing
221        * symbols) are translated as std::numeric_limits<std::size_t>::max ().
222        * @warning If the variable contained into the translator has an integer
223        * that corresponds to a missing value symbol, the integer will be taken into
224        * account in the translation, not the missing symbol.
225        * @return the translated value of the string to be stored into a
226        * DatabaseTable
227        * @throws UnknownLabelInDatabase is raised if the translation cannot be
228        * found.
229        * @throws TypeError is raised if the translation cannot be found in
230        * the translator and the string does not correspond to a number. */
231       virtual DBTranslatedValue translate(const std::string& str) final;
232 
233       /// returns the original value for a given translation
234       /** @return the string that was translated into a given DBTranslatedValue.
235        * @throws UnknownLabelInDatabase is raised if this original value
236        * cannot be found */
237       virtual std::string translateBack(const DBTranslatedValue translated_val) const final;
238 
239       /// returns the domain size of the variable used for translations
240       /** @warning Note that missing values are encoded as
241        * std::numeric_limits<>::max () and are not taken into account in the
242        * domain sizes. */
243       virtual std::size_t domainSize() const final;
244 
245       /// indicates that the translator is never in editable dictionary mode
246       virtual bool hasEditableDictionary() const final;
247 
248       /// sets/unset the editable dictionary mode
249       virtual void setEditableDictionaryMode(bool new_mode) final;
250 
251       /// indicates that the translations should never be reordered
252       virtual bool needsReordering() const final;
253 
254       /** @brief returns an empty HashTable to indicate that no reordering
255        * is needed. */
256       virtual HashTable< std::size_t, std::size_t, ALLOC< std::pair< std::size_t, std::size_t > > >
257          reorder() final;
258 
259       /// returns the variable stored into the translator
260       virtual const IntegerVariable* variable() const final;
261 
262       /// returns the translation of a missing value
263       virtual DBTranslatedValue missingValue() const final;
264 
265       /// @}
266 
267 
268 #ifndef DOXYGEN_SHOULD_SKIP_THIS
269 
270       private:
271       // the IntegerVariable used for translations
272       IntegerVariable _variable_;
273 
274 
275 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
276     };
277 
278 
279   } /* namespace learning */
280 
281 } /* namespace gum */
282 
283 
284 // always include the template implementation
285 #include <agrum/tools/database/DBTranslator4IntegerVariable_tpl.h>
286 
287 #endif /* GUM_LEARNING_DB_TRANSLATOR_4_INTEGER_VARIABLE_H */
288