1 /**
2  *
3  *   Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU)
4  *   info_at_agrum_dot_org
5  *
6  *  This library is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU Lesser General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public License
17  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  * @brief Template implementation of AdaptiveRMaxPlaner classes.
25  *
26  * @author Pierre-Henri WUILLEMIN(_at_LIP6) and Jean-Christophe MAGNAN and Christophe
27  * GONZALES(_at_AMU)
28  */
29 
30 // =========================================================================
31 #include <queue>
32 #include <vector>
33 //#include <algorithm>
34 //#include <utility>
35 // =========================================================================
36 #include <agrum/tools/core/math/math_utils.h>
37 #include <agrum/tools/core/functors.h>
38 #include <agrum/tools/core/smallobjectallocator/smallObjectAllocator.h>
39 // =========================================================================
40 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h>
41 #include <agrum/tools/multidim/instantiation.h>
42 #include <agrum/tools/multidim/potential.h>
43 // =========================================================================
44 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h>
45 // =========================================================================
46 
47 /// For shorter line and hence more comprehensive code purposes only
48 #define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x)
49 
50 namespace gum {
51 
52   /* **************************************************************************************************
53    * **/
54   /* ** **/
55   /* **                                Constructors / Destructors **/
56   /* ** **/
57   /* **************************************************************************************************
58    * **/
59 
60   // ===========================================================================
61   // Default constructor
62   // ===========================================================================
AdaptiveRMaxPlaner(IOperatorStrategy<double> * opi,double discountFactor,double epsilon,const ILearningStrategy * learner,bool verbose)63   AdaptiveRMaxPlaner::AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi,
64                                          double                       discountFactor,
65                                          double                       epsilon,
66                                          const ILearningStrategy*     learner,
67                                          bool                         verbose) :
68       StructuredPlaner(opi, discountFactor, epsilon, verbose),
69       IDecisionStrategy(), _fmdpLearner_(learner), _initialized_(false) {
70     GUM_CONSTRUCTOR(AdaptiveRMaxPlaner);
71   }
72 
73   // ===========================================================================
74   // Default destructor
75   // ===========================================================================
~AdaptiveRMaxPlaner()76   AdaptiveRMaxPlaner::~AdaptiveRMaxPlaner() {
77     GUM_DESTRUCTOR(AdaptiveRMaxPlaner);
78 
79     for (HashTableIteratorSafe< Idx, StatesCounter* > scIter = _counterTable_.beginSafe();
80          scIter != _counterTable_.endSafe();
81          ++scIter)
82       delete scIter.val();
83   }
84 
85   /* **************************************************************************************************
86    * **/
87   /* ** **/
88   /* **                                     Planning Methods **/
89   /* ** **/
90   /* **************************************************************************************************
91    * **/
92 
93   // ==========================================================================
94   // Initializes data structure needed for making the planning
95   // ==========================================================================
initialize(const FMDP<double> * fmdp)96   void AdaptiveRMaxPlaner::initialize(const FMDP< double >* fmdp) {
97     if (!_initialized_) {
98       StructuredPlaner::initialize(fmdp);
99       IDecisionStrategy::initialize(fmdp);
100       for (auto actionIter = fmdp->beginActions(); actionIter != fmdp->endActions(); ++actionIter) {
101         _counterTable_.insert(*actionIter, new StatesCounter());
102         _initializedTable_.insert(*actionIter, false);
103       }
104       _initialized_ = true;
105     }
106   }
107 
108   // ===========================================================================
109   // Performs a value iteration
110   // ===========================================================================
makePlanning(Idx nbStep)111   void AdaptiveRMaxPlaner::makePlanning(Idx nbStep) {
112     _makeRMaxFunctionGraphs_();
113 
114     StructuredPlaner::makePlanning(nbStep);
115 
116     _clearTables_();
117   }
118 
119   /* **************************************************************************************************
120    * **/
121   /* ** **/
122   /* **                                   Value Iteration Methods **/
123   /* ** **/
124   /* **************************************************************************************************
125    * **/
126 
127   // ===========================================================================
128   // Performs a single step of value iteration
129   // ===========================================================================
initVFunction_()130   void AdaptiveRMaxPlaner::initVFunction_() {
131     vFunction_->manager()->setRootNode(vFunction_->manager()->addTerminalNode(0.0));
132     for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter)
133       vFunction_ = this->operator_->add(vFunction_, RECASTED(this->fmdp_->reward(*actionIter)), 1);
134   }
135 
136   // ===========================================================================
137   // Performs a single step of value iteration
138   // ===========================================================================
valueIteration_()139   MultiDimFunctionGraph< double >* AdaptiveRMaxPlaner::valueIteration_() {
140     // *****************************************************************************************
141     // Loop reset
142     MultiDimFunctionGraph< double >* newVFunction = operator_->getFunctionInstance();
143     newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());
144 
145     // *****************************************************************************************
146     // For each action
147     std::vector< MultiDimFunctionGraph< double >* > qActionsSet;
148     for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {
149       MultiDimFunctionGraph< double >* qAction = evalQaction_(newVFunction, *actionIter);
150 
151       // *******************************************************************************************
152       // Next, we add the reward
153       qAction = addReward_(qAction, *actionIter);
154 
155       qAction = this->operator_->maximize(
156          _actionsRMaxTable_[*actionIter],
157          this->operator_->multiply(qAction, _actionsBoolTable_[*actionIter], 1),
158          2);
159 
160       qActionsSet.push_back(qAction);
161     }
162     delete newVFunction;
163 
164     // *****************************************************************************************
165     // Next to evaluate main value function, we take maximise over all action
166     // value, ...
167     newVFunction = maximiseQactions_(qActionsSet);
168 
169     return newVFunction;
170   }
171 
172   /* **************************************************************************************************
173    * **/
174   /* ** **/
175   /* **                                   Optimal Policy Evaluation Methods **/
176   /* ** **/
177   /* **************************************************************************************************
178    * **/
179 
180   // ===========================================================================
181   // Evals the policy corresponding to the given value function
182   // ===========================================================================
evalPolicy_()183   void AdaptiveRMaxPlaner::evalPolicy_() {
184     // *****************************************************************************************
185     // Loop reset
186     MultiDimFunctionGraph< double >* newVFunction = operator_->getFunctionInstance();
187     newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime());
188 
189     std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >* >
190        argMaxQActionsSet;
191     // *****************************************************************************************
192     // For each action
193     for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) {
194       MultiDimFunctionGraph< double >* qAction = this->evalQaction_(newVFunction, *actionIter);
195 
196       qAction = this->addReward_(qAction, *actionIter);
197 
198       qAction = this->operator_->maximize(
199          _actionsRMaxTable_[*actionIter],
200          this->operator_->multiply(qAction, _actionsBoolTable_[*actionIter], 1),
201          2);
202 
203       argMaxQActionsSet.push_back(makeArgMax_(qAction, *actionIter));
204     }
205     delete newVFunction;
206 
207     // *****************************************************************************************
208     // Next to evaluate main value function, we take maximise over all action
209     // value, ...
210     MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >* argMaxVFunction
211        = argmaximiseQactions_(argMaxQActionsSet);
212 
213     // *****************************************************************************************
214     // Next to evaluate main value function, we take maximise over all action
215     // value, ...
216     extractOptimalPolicy_(argMaxVFunction);
217   }
218 
219   // ===========================================================================
220   //
221   // ===========================================================================
_makeRMaxFunctionGraphs_()222   void AdaptiveRMaxPlaner::_makeRMaxFunctionGraphs_() {
223     _rThreshold_ = _fmdpLearner_->modaMax() * 5 > 30 ? _fmdpLearner_->modaMax() * 5 : 30;
224     _rmax_       = _fmdpLearner_->rMax() / (1.0 - this->discountFactor_);
225 
226     for (auto actionIter = this->fmdp()->beginActions(); actionIter != this->fmdp()->endActions();
227          ++actionIter) {
228       std::vector< MultiDimFunctionGraph< double >* > rmaxs;
229       std::vector< MultiDimFunctionGraph< double >* > boolQs;
230 
231       for (auto varIter = this->fmdp()->beginVariables(); varIter != this->fmdp()->endVariables();
232            ++varIter) {
233         const IVisitableGraphLearner* visited = _counterTable_[*actionIter];
234 
235         MultiDimFunctionGraph< double >* varRMax  = this->operator_->getFunctionInstance();
236         MultiDimFunctionGraph< double >* varBoolQ = this->operator_->getFunctionInstance();
237 
238         visited->insertSetOfVars(varRMax);
239         visited->insertSetOfVars(varBoolQ);
240 
241         std::pair< NodeId, NodeId > rooty
242            = _visitLearner_(visited, visited->root(), varRMax, varBoolQ);
243         varRMax->manager()->setRootNode(rooty.first);
244         varRMax->manager()->reduce();
245         varRMax->manager()->clean();
246         varBoolQ->manager()->setRootNode(rooty.second);
247         varBoolQ->manager()->reduce();
248         varBoolQ->manager()->clean();
249 
250         rmaxs.push_back(varRMax);
251         boolQs.push_back(varBoolQ);
252 
253         //          std::cout << RECASTED(this->fmdp_->transition(*actionIter,
254         //          *varIter))->toDot() << std::endl;
255         //          for( auto varIter2 =
256         //          RECASTED(this->fmdp_->transition(*actionIter,
257         //          *varIter))->variablesSequence().beginSafe(); varIter2 !=
258         //          RECASTED(this->fmdp_->transition(*actionIter,
259         //          *varIter))->variablesSequence().endSafe(); ++varIter2 )
260         //              std::cout << (*varIter2)->name() << " | ";
261         //          std::cout << std::endl;
262 
263         //          std::cout << varRMax->toDot() << std::endl;
264         //          for( auto varIter =
265         //          varRMax->variablesSequence().beginSafe(); varIter !=
266         //          varRMax->variablesSequence().endSafe(); ++varIter )
267         //              std::cout << (*varIter)->name() << " | ";
268         //          std::cout << std::endl;
269 
270         //          std::cout << varBoolQ->toDot() << std::endl;
271         //          for( auto varIter =
272         //          varBoolQ->variablesSequence().beginSafe(); varIter !=
273         //          varBoolQ->variablesSequence().endSafe(); ++varIter )
274         //              std::cout << (*varIter)->name() << " | ";
275         //          std::cout << std::endl;
276       }
277 
278       //        std::cout << "Maximising" << std::endl;
279       _actionsRMaxTable_.insert(*actionIter, this->maximiseQactions_(rmaxs));
280       _actionsBoolTable_.insert(*actionIter, this->minimiseFunctions_(boolQs));
281     }
282   }
283 
284   // ===========================================================================
285   //
286   // ===========================================================================
287   std::pair< NodeId, NodeId >
_visitLearner_(const IVisitableGraphLearner * visited,NodeId currentNodeId,MultiDimFunctionGraph<double> * rmax,MultiDimFunctionGraph<double> * boolQ)288      AdaptiveRMaxPlaner::_visitLearner_(const IVisitableGraphLearner*    visited,
289                                         NodeId                           currentNodeId,
290                                         MultiDimFunctionGraph< double >* rmax,
291                                         MultiDimFunctionGraph< double >* boolQ) {
292     std::pair< NodeId, NodeId > rep;
293     if (visited->isTerminal(currentNodeId)) {
294       rep.first = rmax->manager()->addTerminalNode(
295          visited->nodeNbObservation(currentNodeId) < _rThreshold_ ? _rmax_ : 0.0);
296       rep.second = boolQ->manager()->addTerminalNode(
297          visited->nodeNbObservation(currentNodeId) < _rThreshold_ ? 0.0 : 1.0);
298       return rep;
299     }
300 
301     NodeId* rmaxsons = static_cast< NodeId* >(
302        SOA_ALLOCATE(sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
303     NodeId* bqsons = static_cast< NodeId* >(
304        SOA_ALLOCATE(sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize()));
305 
306     for (Idx moda = 0; moda < visited->nodeVar(currentNodeId)->domainSize(); ++moda) {
307       std::pair< NodeId, NodeId > sonp
308          = _visitLearner_(visited, visited->nodeSon(currentNodeId, moda), rmax, boolQ);
309       rmaxsons[moda] = sonp.first;
310       bqsons[moda]   = sonp.second;
311     }
312 
313     rep.first  = rmax->manager()->addInternalNode(visited->nodeVar(currentNodeId), rmaxsons);
314     rep.second = boolQ->manager()->addInternalNode(visited->nodeVar(currentNodeId), bqsons);
315     return rep;
316   }
317 
318   // ===========================================================================
319   //
320   // ===========================================================================
_clearTables_()321   void AdaptiveRMaxPlaner::_clearTables_() {
322     for (auto actionIter = this->fmdp()->beginActions(); actionIter != this->fmdp()->endActions();
323          ++actionIter) {
324       delete _actionsBoolTable_[*actionIter];
325       delete _actionsRMaxTable_[*actionIter];
326     }
327     _actionsRMaxTable_.clear();
328     _actionsBoolTable_.clear();
329   }
330 
331 }   // end of namespace gum
332