1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the program and library */ 4 /* SCIP --- Solving Constraint Integer Programs */ 5 /* */ 6 /* Copyright (C) 2002-2021 Konrad-Zuse-Zentrum */ 7 /* fuer Informationstechnik Berlin */ 8 /* */ 9 /* SCIP is distributed under the terms of the ZIB Academic License. */ 10 /* */ 11 /* You should have received a copy of the ZIB Academic License */ 12 /* along with SCIP; see the file COPYING. If not visit scipopt.org. */ 13 /* */ 14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 15 16 /**@file pub_bandit_epsgreedy.h 17 * @ingroup PublicBanditMethods 18 * @brief public methods for the epsilon greedy bandit selector 19 * @author Gregor Hendel 20 */ 21 22 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/ 23 24 #ifndef SRC_SCIP_PUB_BANDIT_EPSGREEDY_H_ 25 #define SRC_SCIP_PUB_BANDIT_EPSGREEDY_H_ 26 27 28 #include "scip/def.h" 29 #include "scip/type_scip.h" 30 #include "scip/type_bandit.h" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 /**@addtogroup PublicBanditMethods 37 * 38 * ## Epsilon greedy 39 * 40 * Epsilon greedy is a randomized algorithm for the multi-armed bandit problem. 41 * 42 * In every iteration, it either 43 * selects an action uniformly at random with 44 * probability \f$ \varepsilon_t\f$ 45 * or it greedily exploits the best action seen so far with 46 * probability \f$ 1 - \varepsilon_t \f$. 47 * In this implementation, \f$ \varepsilon_t \f$ decreases over time 48 * (number of selections performed), controlled by the epsilon parameter. 49 * 50 * @{ 51 */ 52 53 /** create and resets an epsilon greedy bandit algorithm */ 54 SCIP_EXPORT 55 SCIP_RETCODE SCIPcreateBanditEpsgreedy( 56 SCIP* scip, /**< SCIP data structure */ 57 SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */ 58 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */ 59 SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */ 60 SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */ 61 SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */ 62 int avglim, /**< nonnegative limit on observation number before the exponential decay starts, 63 * only relevant if exponential decay is enabled 64 */ 65 int nactions, /**< the number of possible actions */ 66 unsigned int initseed /**< initial seed for random number generation */ 67 ); 68 69 /** get weights array of epsilon greedy bandit algorithm */ 70 SCIP_EXPORT 71 SCIP_Real* SCIPgetWeightsEpsgreedy( 72 SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */ 73 ); 74 75 /** set epsilon parameter of epsilon greedy bandit algorithm */ 76 SCIP_EXPORT 77 void SCIPsetEpsilonEpsgreedy( 78 SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */ 79 SCIP_Real eps /**< parameter to increase probability for exploration between all actions */ 80 ); 81 82 /** @} */ 83 84 85 86 #ifdef __cplusplus 87 } 88 #endif 89 90 #endif 91