1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the program and library */ 4 /* SCIP --- Solving Constraint Integer Programs */ 5 /* */ 6 /* Copyright (C) 2002-2021 Konrad-Zuse-Zentrum */ 7 /* fuer Informationstechnik Berlin */ 8 /* */ 9 /* SCIP is distributed under the terms of the ZIB Academic License. */ 10 /* */ 11 /* You should have received a copy of the ZIB Academic License */ 12 /* along with SCIP; see the file COPYING. If not visit scipopt.org. */ 13 /* */ 14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 15 16 /**@file pub_bandit_ucb.h 17 * @ingroup PublicBanditMethods 18 * @brief public methods for UCB bandit selection 19 * @author Gregor Hendel 20 */ 21 22 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/ 23 24 #ifndef SRC_SCIP_PUB_BANDIT_UCB_H_ 25 #define SRC_SCIP_PUB_BANDIT_UCB_H_ 26 27 #include "scip/def.h" 28 #include "scip/type_bandit.h" 29 #include "scip/type_retcode.h" 30 #include "scip/type_scip.h" 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 37 /**@addtogroup PublicBanditMethods 38 * 39 * ## Upper Confidence Bounds (UCB) 40 * 41 * UCB (Upper confidence bounds) is a deterministic 42 * selection algorithm for the multi-armed bandit problem. 43 * In every iteration, UCB selects the action that maximizes 44 * a tradeoff between its performance in the past 45 * and a variance term. 46 * The influence of the variance (confidence width) can be 47 * controlled by the parameter \f$ \alpha \f$. 48 * 49 * @{ 50 */ 51 52 53 /** create and reset UCB bandit algorithm */ 54 SCIP_EXPORT 55 SCIP_RETCODE SCIPcreateBanditUcb( 56 SCIP* scip, /**< SCIP data structure */ 57 SCIP_BANDIT** ucb, /**< pointer to store bandit algorithm */ 58 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */ 59 SCIP_Real alpha, /**< parameter to increase confidence width */ 60 int nactions, /**< the positive number of actions for this bandit algorithm */ 61 unsigned int initseed /**< initial random number seed */ 62 ); 63 64 /** returns the upper confidence bound of a selected action */ 65 SCIP_EXPORT 66 SCIP_Real SCIPgetConfidenceBoundUcb( 67 SCIP_BANDIT* ucb, /**< UCB bandit algorithm */ 68 int action /**< index of the queried action */ 69 ); 70 71 /** return start permutation of the UCB bandit algorithm */ 72 SCIP_EXPORT 73 int* SCIPgetStartPermutationUcb( 74 SCIP_BANDIT* ucb /**< UCB bandit algorithm */ 75 ); 76 77 /** @}*/ 78 79 80 #ifdef __cplusplus 81 } 82 #endif 83 84 #endif 85