1 /** 2 * 3 * Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU) 4 * info_at_agrum_dot_org 5 * 6 * This library is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU Lesser General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public License 17 * along with this library. If not, see <http://www.gnu.org/licenses/>. 18 * 19 */ 20 21 22 /** 23 * @file 24 * @brief Template implementation of AdaptiveRMaxPlaner classes. 25 * 26 * @author Pierre-Henri WUILLEMIN(_at_LIP6) and Jean-Christophe MAGNAN and Christophe 27 * GONZALES(_at_AMU) 28 */ 29 30 // ========================================================================= 31 #include <queue> 32 #include <vector> 33 //#include <algorithm> 34 //#include <utility> 35 // ========================================================================= 36 #include <agrum/tools/core/math/math_utils.h> 37 #include <agrum/tools/core/functors.h> 38 #include <agrum/tools/core/smallobjectallocator/smallObjectAllocator.h> 39 // ========================================================================= 40 #include <agrum/tools/multidim/implementations/multiDimFunctionGraph.h> 41 #include <agrum/tools/multidim/instantiation.h> 42 #include <agrum/tools/multidim/potential.h> 43 // ========================================================================= 44 #include <agrum/FMDP/planning/adaptiveRMaxPlaner.h> 45 // ========================================================================= 46 47 /// For shorter line and hence more comprehensive code purposes only 48 #define RECASTED(x) reinterpret_cast< const MultiDimFunctionGraph< double >* >(x) 49 50 namespace gum { 51 52 /* ************************************************************************************************** 53 * **/ 54 /* ** **/ 55 /* ** Constructors / Destructors **/ 56 /* ** **/ 57 /* ************************************************************************************************** 58 * **/ 59 60 // =========================================================================== 61 // Default constructor 62 // =========================================================================== AdaptiveRMaxPlaner(IOperatorStrategy<double> * opi,double discountFactor,double epsilon,const ILearningStrategy * learner,bool verbose)63 AdaptiveRMaxPlaner::AdaptiveRMaxPlaner(IOperatorStrategy< double >* opi, 64 double discountFactor, 65 double epsilon, 66 const ILearningStrategy* learner, 67 bool verbose) : 68 StructuredPlaner(opi, discountFactor, epsilon, verbose), 69 IDecisionStrategy(), _fmdpLearner_(learner), _initialized_(false) { 70 GUM_CONSTRUCTOR(AdaptiveRMaxPlaner); 71 } 72 73 // =========================================================================== 74 // Default destructor 75 // =========================================================================== ~AdaptiveRMaxPlaner()76 AdaptiveRMaxPlaner::~AdaptiveRMaxPlaner() { 77 GUM_DESTRUCTOR(AdaptiveRMaxPlaner); 78 79 for (HashTableIteratorSafe< Idx, StatesCounter* > scIter = _counterTable_.beginSafe(); 80 scIter != _counterTable_.endSafe(); 81 ++scIter) 82 delete scIter.val(); 83 } 84 85 /* ************************************************************************************************** 86 * **/ 87 /* ** **/ 88 /* ** Planning Methods **/ 89 /* ** **/ 90 /* ************************************************************************************************** 91 * **/ 92 93 // ========================================================================== 94 // Initializes data structure needed for making the planning 95 // ========================================================================== initialize(const FMDP<double> * fmdp)96 void AdaptiveRMaxPlaner::initialize(const FMDP< double >* fmdp) { 97 if (!_initialized_) { 98 StructuredPlaner::initialize(fmdp); 99 IDecisionStrategy::initialize(fmdp); 100 for (auto actionIter = fmdp->beginActions(); actionIter != fmdp->endActions(); ++actionIter) { 101 _counterTable_.insert(*actionIter, new StatesCounter()); 102 _initializedTable_.insert(*actionIter, false); 103 } 104 _initialized_ = true; 105 } 106 } 107 108 // =========================================================================== 109 // Performs a value iteration 110 // =========================================================================== makePlanning(Idx nbStep)111 void AdaptiveRMaxPlaner::makePlanning(Idx nbStep) { 112 _makeRMaxFunctionGraphs_(); 113 114 StructuredPlaner::makePlanning(nbStep); 115 116 _clearTables_(); 117 } 118 119 /* ************************************************************************************************** 120 * **/ 121 /* ** **/ 122 /* ** Value Iteration Methods **/ 123 /* ** **/ 124 /* ************************************************************************************************** 125 * **/ 126 127 // =========================================================================== 128 // Performs a single step of value iteration 129 // =========================================================================== initVFunction_()130 void AdaptiveRMaxPlaner::initVFunction_() { 131 vFunction_->manager()->setRootNode(vFunction_->manager()->addTerminalNode(0.0)); 132 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) 133 vFunction_ = this->operator_->add(vFunction_, RECASTED(this->fmdp_->reward(*actionIter)), 1); 134 } 135 136 // =========================================================================== 137 // Performs a single step of value iteration 138 // =========================================================================== valueIteration_()139 MultiDimFunctionGraph< double >* AdaptiveRMaxPlaner::valueIteration_() { 140 // ***************************************************************************************** 141 // Loop reset 142 MultiDimFunctionGraph< double >* newVFunction = operator_->getFunctionInstance(); 143 newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime()); 144 145 // ***************************************************************************************** 146 // For each action 147 std::vector< MultiDimFunctionGraph< double >* > qActionsSet; 148 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) { 149 MultiDimFunctionGraph< double >* qAction = evalQaction_(newVFunction, *actionIter); 150 151 // ******************************************************************************************* 152 // Next, we add the reward 153 qAction = addReward_(qAction, *actionIter); 154 155 qAction = this->operator_->maximize( 156 _actionsRMaxTable_[*actionIter], 157 this->operator_->multiply(qAction, _actionsBoolTable_[*actionIter], 1), 158 2); 159 160 qActionsSet.push_back(qAction); 161 } 162 delete newVFunction; 163 164 // ***************************************************************************************** 165 // Next to evaluate main value function, we take maximise over all action 166 // value, ... 167 newVFunction = maximiseQactions_(qActionsSet); 168 169 return newVFunction; 170 } 171 172 /* ************************************************************************************************** 173 * **/ 174 /* ** **/ 175 /* ** Optimal Policy Evaluation Methods **/ 176 /* ** **/ 177 /* ************************************************************************************************** 178 * **/ 179 180 // =========================================================================== 181 // Evals the policy corresponding to the given value function 182 // =========================================================================== evalPolicy_()183 void AdaptiveRMaxPlaner::evalPolicy_() { 184 // ***************************************************************************************** 185 // Loop reset 186 MultiDimFunctionGraph< double >* newVFunction = operator_->getFunctionInstance(); 187 newVFunction->copyAndReassign(*vFunction_, fmdp_->mapMainPrime()); 188 189 std::vector< MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >* > 190 argMaxQActionsSet; 191 // ***************************************************************************************** 192 // For each action 193 for (auto actionIter = fmdp_->beginActions(); actionIter != fmdp_->endActions(); ++actionIter) { 194 MultiDimFunctionGraph< double >* qAction = this->evalQaction_(newVFunction, *actionIter); 195 196 qAction = this->addReward_(qAction, *actionIter); 197 198 qAction = this->operator_->maximize( 199 _actionsRMaxTable_[*actionIter], 200 this->operator_->multiply(qAction, _actionsBoolTable_[*actionIter], 1), 201 2); 202 203 argMaxQActionsSet.push_back(makeArgMax_(qAction, *actionIter)); 204 } 205 delete newVFunction; 206 207 // ***************************************************************************************** 208 // Next to evaluate main value function, we take maximise over all action 209 // value, ... 210 MultiDimFunctionGraph< ArgMaxSet< double, Idx >, SetTerminalNodePolicy >* argMaxVFunction 211 = argmaximiseQactions_(argMaxQActionsSet); 212 213 // ***************************************************************************************** 214 // Next to evaluate main value function, we take maximise over all action 215 // value, ... 216 extractOptimalPolicy_(argMaxVFunction); 217 } 218 219 // =========================================================================== 220 // 221 // =========================================================================== _makeRMaxFunctionGraphs_()222 void AdaptiveRMaxPlaner::_makeRMaxFunctionGraphs_() { 223 _rThreshold_ = _fmdpLearner_->modaMax() * 5 > 30 ? _fmdpLearner_->modaMax() * 5 : 30; 224 _rmax_ = _fmdpLearner_->rMax() / (1.0 - this->discountFactor_); 225 226 for (auto actionIter = this->fmdp()->beginActions(); actionIter != this->fmdp()->endActions(); 227 ++actionIter) { 228 std::vector< MultiDimFunctionGraph< double >* > rmaxs; 229 std::vector< MultiDimFunctionGraph< double >* > boolQs; 230 231 for (auto varIter = this->fmdp()->beginVariables(); varIter != this->fmdp()->endVariables(); 232 ++varIter) { 233 const IVisitableGraphLearner* visited = _counterTable_[*actionIter]; 234 235 MultiDimFunctionGraph< double >* varRMax = this->operator_->getFunctionInstance(); 236 MultiDimFunctionGraph< double >* varBoolQ = this->operator_->getFunctionInstance(); 237 238 visited->insertSetOfVars(varRMax); 239 visited->insertSetOfVars(varBoolQ); 240 241 std::pair< NodeId, NodeId > rooty 242 = _visitLearner_(visited, visited->root(), varRMax, varBoolQ); 243 varRMax->manager()->setRootNode(rooty.first); 244 varRMax->manager()->reduce(); 245 varRMax->manager()->clean(); 246 varBoolQ->manager()->setRootNode(rooty.second); 247 varBoolQ->manager()->reduce(); 248 varBoolQ->manager()->clean(); 249 250 rmaxs.push_back(varRMax); 251 boolQs.push_back(varBoolQ); 252 253 // std::cout << RECASTED(this->fmdp_->transition(*actionIter, 254 // *varIter))->toDot() << std::endl; 255 // for( auto varIter2 = 256 // RECASTED(this->fmdp_->transition(*actionIter, 257 // *varIter))->variablesSequence().beginSafe(); varIter2 != 258 // RECASTED(this->fmdp_->transition(*actionIter, 259 // *varIter))->variablesSequence().endSafe(); ++varIter2 ) 260 // std::cout << (*varIter2)->name() << " | "; 261 // std::cout << std::endl; 262 263 // std::cout << varRMax->toDot() << std::endl; 264 // for( auto varIter = 265 // varRMax->variablesSequence().beginSafe(); varIter != 266 // varRMax->variablesSequence().endSafe(); ++varIter ) 267 // std::cout << (*varIter)->name() << " | "; 268 // std::cout << std::endl; 269 270 // std::cout << varBoolQ->toDot() << std::endl; 271 // for( auto varIter = 272 // varBoolQ->variablesSequence().beginSafe(); varIter != 273 // varBoolQ->variablesSequence().endSafe(); ++varIter ) 274 // std::cout << (*varIter)->name() << " | "; 275 // std::cout << std::endl; 276 } 277 278 // std::cout << "Maximising" << std::endl; 279 _actionsRMaxTable_.insert(*actionIter, this->maximiseQactions_(rmaxs)); 280 _actionsBoolTable_.insert(*actionIter, this->minimiseFunctions_(boolQs)); 281 } 282 } 283 284 // =========================================================================== 285 // 286 // =========================================================================== 287 std::pair< NodeId, NodeId > _visitLearner_(const IVisitableGraphLearner * visited,NodeId currentNodeId,MultiDimFunctionGraph<double> * rmax,MultiDimFunctionGraph<double> * boolQ)288 AdaptiveRMaxPlaner::_visitLearner_(const IVisitableGraphLearner* visited, 289 NodeId currentNodeId, 290 MultiDimFunctionGraph< double >* rmax, 291 MultiDimFunctionGraph< double >* boolQ) { 292 std::pair< NodeId, NodeId > rep; 293 if (visited->isTerminal(currentNodeId)) { 294 rep.first = rmax->manager()->addTerminalNode( 295 visited->nodeNbObservation(currentNodeId) < _rThreshold_ ? _rmax_ : 0.0); 296 rep.second = boolQ->manager()->addTerminalNode( 297 visited->nodeNbObservation(currentNodeId) < _rThreshold_ ? 0.0 : 1.0); 298 return rep; 299 } 300 301 NodeId* rmaxsons = static_cast< NodeId* >( 302 SOA_ALLOCATE(sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize())); 303 NodeId* bqsons = static_cast< NodeId* >( 304 SOA_ALLOCATE(sizeof(NodeId) * visited->nodeVar(currentNodeId)->domainSize())); 305 306 for (Idx moda = 0; moda < visited->nodeVar(currentNodeId)->domainSize(); ++moda) { 307 std::pair< NodeId, NodeId > sonp 308 = _visitLearner_(visited, visited->nodeSon(currentNodeId, moda), rmax, boolQ); 309 rmaxsons[moda] = sonp.first; 310 bqsons[moda] = sonp.second; 311 } 312 313 rep.first = rmax->manager()->addInternalNode(visited->nodeVar(currentNodeId), rmaxsons); 314 rep.second = boolQ->manager()->addInternalNode(visited->nodeVar(currentNodeId), bqsons); 315 return rep; 316 } 317 318 // =========================================================================== 319 // 320 // =========================================================================== _clearTables_()321 void AdaptiveRMaxPlaner::_clearTables_() { 322 for (auto actionIter = this->fmdp()->beginActions(); actionIter != this->fmdp()->endActions(); 323 ++actionIter) { 324 delete _actionsBoolTable_[*actionIter]; 325 delete _actionsRMaxTable_[*actionIter]; 326 } 327 _actionsRMaxTable_.clear(); 328 _actionsBoolTable_.clear(); 329 } 330 331 } // end of namespace gum 332