1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /*                                                                           */
3 /*                  This file is part of the program and library             */
4 /*         SCIP --- Solving Constraint Integer Programs                      */
5 /*                                                                           */
6 /*    Copyright (C) 2002-2021 Konrad-Zuse-Zentrum                            */
7 /*                            fuer Informationstechnik Berlin                */
8 /*                                                                           */
9 /*  SCIP is distributed under the terms of the ZIB Academic License.         */
10 /*                                                                           */
11 /*  You should have received a copy of the ZIB Academic License              */
12 /*  along with SCIP; see the file COPYING. If not visit scipopt.org.         */
13 /*                                                                           */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file   pub_bandit_ucb.h
17  * @ingroup PublicBanditMethods
18  * @brief  public methods for UCB bandit selection
19  * @author Gregor Hendel
20  */
21 
22 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
23 
24 #ifndef SRC_SCIP_PUB_BANDIT_UCB_H_
25 #define SRC_SCIP_PUB_BANDIT_UCB_H_
26 
27 #include "scip/def.h"
28 #include "scip/type_bandit.h"
29 #include "scip/type_retcode.h"
30 #include "scip/type_scip.h"
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 
37 /**@addtogroup PublicBanditMethods
38  *
39  * ## Upper Confidence Bounds (UCB)
40  *
41  * UCB (Upper confidence bounds) is a deterministic
42  * selection algorithm for the multi-armed bandit problem.
43  * In every iteration, UCB selects the action that maximizes
44  * a tradeoff between its performance in the past
45  * and a variance term.
46  * The influence of the variance (confidence width) can be
47  * controlled by the parameter \f$ \alpha \f$.
48  *
49  * @{
50  */
51 
52 
53 /** create and reset UCB bandit algorithm */
54 SCIP_EXPORT
55 SCIP_RETCODE SCIPcreateBanditUcb(
56    SCIP*                 scip,               /**< SCIP data structure */
57    SCIP_BANDIT**         ucb,                /**< pointer to store bandit algorithm */
58    SCIP_Real*            priorities,         /**< nonnegative priorities for each action, or NULL if not needed */
59    SCIP_Real             alpha,              /**< parameter to increase confidence width */
60    int                   nactions,           /**< the positive number of actions for this bandit algorithm */
61    unsigned int          initseed            /**< initial random number seed */
62    );
63 
64 /** returns the upper confidence bound of a selected action */
65 SCIP_EXPORT
66 SCIP_Real SCIPgetConfidenceBoundUcb(
67    SCIP_BANDIT*          ucb,                /**< UCB bandit algorithm */
68    int                   action              /**< index of the queried action */
69    );
70 
71 /** return start permutation of the UCB bandit algorithm */
72 SCIP_EXPORT
73 int* SCIPgetStartPermutationUcb(
74    SCIP_BANDIT*          ucb                 /**< UCB bandit algorithm */
75    );
76 
77 /** @}*/
78 
79 
80 #ifdef __cplusplus
81 }
82 #endif
83 
84 #endif
85