1 /*
2  * First Available Server load balancing algorithm.
3  *
4  * This file implements an algorithm which emerged during a discussion with
5  * Steen Larsen, initially inspired from Anshul Gandhi et.al.'s work now
6  * described as "packing" in section 3.5:
7  *
8  *    http://reports-archive.adm.cs.cmu.edu/anon/2012/CMU-CS-12-109.pdf
9  *
10  * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version
15  * 2 of the License, or (at your option) any later version.
16  *
17  */
18 
19 #include <common/compat.h>
20 #include <common/config.h>
21 #include <common/debug.h>
22 #include <eb32tree.h>
23 
24 #include <types/global.h>
25 #include <types/server.h>
26 
27 #include <proto/backend.h>
28 #include <proto/queue.h>
29 
30 
31 /* Remove a server from a tree. It must have previously been dequeued. This
32  * function is meant to be called when a server is going down or has its
33  * weight disabled.
34  */
fas_remove_from_tree(struct server * s)35 static inline void fas_remove_from_tree(struct server *s)
36 {
37 	s->lb_tree = NULL;
38 }
39 
40 /* simply removes a server from a tree */
fas_dequeue_srv(struct server * s)41 static inline void fas_dequeue_srv(struct server *s)
42 {
43 	eb32_delete(&s->lb_node);
44 }
45 
46 /* Queue a server in its associated tree, assuming the weight is >0.
47  * Servers are sorted by unique ID so that we send all connections to the first
48  * available server in declaration order (or ID order) until its maxconn is
49  * reached. It is important to understand that the server weight is not used
50  * here.
51  */
fas_queue_srv(struct server * s)52 static inline void fas_queue_srv(struct server *s)
53 {
54 	s->lb_node.key = s->puid;
55 	eb32_insert(s->lb_tree, &s->lb_node);
56 }
57 
58 /* Re-position the server in the FS tree after it has been assigned one
59  * connection or after it has released one. Note that it is possible that
60  * the server has been moved out of the tree due to failed health-checks.
61  */
fas_srv_reposition(struct server * s)62 static void fas_srv_reposition(struct server *s)
63 {
64 	if (!s->lb_tree)
65 		return;
66 	fas_dequeue_srv(s);
67 	fas_queue_srv(s);
68 }
69 
70 /* This function updates the server trees according to server <srv>'s new
71  * state. It should be called when server <srv>'s status changes to down.
72  * It is not important whether the server was already down or not. It is not
73  * important either that the new state is completely down (the caller may not
74  * know all the variables of a server's state).
75  */
fas_set_server_status_down(struct server * srv)76 static void fas_set_server_status_down(struct server *srv)
77 {
78 	struct proxy *p = srv->proxy;
79 
80 	if (!srv_lb_status_changed(srv))
81 		return;
82 
83 	if (srv_is_usable(srv))
84 		goto out_update_state;
85 
86 	if (!srv_was_usable(srv))
87 		/* server was already down */
88 		goto out_update_backend;
89 
90 	if (srv->flags & SRV_F_BACKUP) {
91 		p->lbprm.tot_wbck -= srv->prev_eweight;
92 		p->srv_bck--;
93 
94 		if (srv == p->lbprm.fbck) {
95 			/* we lost the first backup server in a single-backup
96 			 * configuration, we must search another one.
97 			 */
98 			struct server *srv2 = p->lbprm.fbck;
99 			do {
100 				srv2 = srv2->next;
101 			} while (srv2 &&
102 				 !((srv2->flags & SRV_F_BACKUP) &&
103 				   srv_is_usable(srv2)));
104 			p->lbprm.fbck = srv2;
105 		}
106 	} else {
107 		p->lbprm.tot_wact -= srv->prev_eweight;
108 		p->srv_act--;
109 	}
110 
111 	fas_dequeue_srv(srv);
112 	fas_remove_from_tree(srv);
113 
114 out_update_backend:
115 	/* check/update tot_used, tot_weight */
116 	update_backend_weight(p);
117  out_update_state:
118 	srv_lb_commit_status(srv);
119 }
120 
121 /* This function updates the server trees according to server <srv>'s new
122  * state. It should be called when server <srv>'s status changes to up.
123  * It is not important whether the server was already down or not. It is not
124  * important either that the new state is completely UP (the caller may not
125  * know all the variables of a server's state). This function will not change
126  * the weight of a server which was already up.
127  */
fas_set_server_status_up(struct server * srv)128 static void fas_set_server_status_up(struct server *srv)
129 {
130 	struct proxy *p = srv->proxy;
131 
132 	if (!srv_lb_status_changed(srv))
133 		return;
134 
135 	if (!srv_is_usable(srv))
136 		goto out_update_state;
137 
138 	if (srv_was_usable(srv))
139 		/* server was already up */
140 		goto out_update_backend;
141 
142 	if (srv->flags & SRV_F_BACKUP) {
143 		srv->lb_tree = &p->lbprm.fas.bck;
144 		p->lbprm.tot_wbck += srv->eweight;
145 		p->srv_bck++;
146 
147 		if (!(p->options & PR_O_USE_ALL_BK)) {
148 			if (!p->lbprm.fbck) {
149 				/* there was no backup server anymore */
150 				p->lbprm.fbck = srv;
151 			} else {
152 				/* we may have restored a backup server prior to fbck,
153 				 * in which case it should replace it.
154 				 */
155 				struct server *srv2 = srv;
156 				do {
157 					srv2 = srv2->next;
158 				} while (srv2 && (srv2 != p->lbprm.fbck));
159 				if (srv2)
160 					p->lbprm.fbck = srv;
161 			}
162 		}
163 	} else {
164 		srv->lb_tree = &p->lbprm.fas.act;
165 		p->lbprm.tot_wact += srv->eweight;
166 		p->srv_act++;
167 	}
168 
169 	/* note that eweight cannot be 0 here */
170 	fas_queue_srv(srv);
171 
172  out_update_backend:
173 	/* check/update tot_used, tot_weight */
174 	update_backend_weight(p);
175  out_update_state:
176 	srv_lb_commit_status(srv);
177 }
178 
179 /* This function must be called after an update to server <srv>'s effective
180  * weight. It may be called after a state change too.
181  */
fas_update_server_weight(struct server * srv)182 static void fas_update_server_weight(struct server *srv)
183 {
184 	int old_state, new_state;
185 	struct proxy *p = srv->proxy;
186 
187 	if (!srv_lb_status_changed(srv))
188 		return;
189 
190 	/* If changing the server's weight changes its state, we simply apply
191 	 * the procedures we already have for status change. If the state
192 	 * remains down, the server is not in any tree, so it's as easy as
193 	 * updating its values. If the state remains up with different weights,
194 	 * there are some computations to perform to find a new place and
195 	 * possibly a new tree for this server.
196 	 */
197 
198 	old_state = srv_was_usable(srv);
199 	new_state = srv_is_usable(srv);
200 
201 	if (!old_state && !new_state) {
202 		srv_lb_commit_status(srv);
203 		return;
204 	}
205 	else if (!old_state && new_state) {
206 		fas_set_server_status_up(srv);
207 		return;
208 	}
209 	else if (old_state && !new_state) {
210 		fas_set_server_status_down(srv);
211 		return;
212 	}
213 
214 	if (srv->lb_tree)
215 		fas_dequeue_srv(srv);
216 
217 	if (srv->flags & SRV_F_BACKUP) {
218 		p->lbprm.tot_wbck += srv->eweight - srv->prev_eweight;
219 		srv->lb_tree = &p->lbprm.fas.bck;
220 	} else {
221 		p->lbprm.tot_wact += srv->eweight - srv->prev_eweight;
222 		srv->lb_tree = &p->lbprm.fas.act;
223 	}
224 
225 	fas_queue_srv(srv);
226 
227 	update_backend_weight(p);
228 	srv_lb_commit_status(srv);
229 }
230 
231 /* This function is responsible for building the trees in case of fast
232  * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
233  * uweight ratio. Both active and backup groups are initialized.
234  */
fas_init_server_tree(struct proxy * p)235 void fas_init_server_tree(struct proxy *p)
236 {
237 	struct server *srv;
238 	struct eb_root init_head = EB_ROOT;
239 
240 	p->lbprm.set_server_status_up   = fas_set_server_status_up;
241 	p->lbprm.set_server_status_down = fas_set_server_status_down;
242 	p->lbprm.update_server_eweight  = fas_update_server_weight;
243 	p->lbprm.server_take_conn = fas_srv_reposition;
244 	p->lbprm.server_drop_conn = fas_srv_reposition;
245 
246 	p->lbprm.wdiv = BE_WEIGHT_SCALE;
247 	for (srv = p->srv; srv; srv = srv->next) {
248 		srv->eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
249 		srv_lb_commit_status(srv);
250 	}
251 
252 	recount_servers(p);
253 	update_backend_weight(p);
254 
255 	p->lbprm.fas.act = init_head;
256 	p->lbprm.fas.bck = init_head;
257 
258 	/* queue active and backup servers in two distinct groups */
259 	for (srv = p->srv; srv; srv = srv->next) {
260 		if (!srv_is_usable(srv))
261 			continue;
262 		srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
263 		fas_queue_srv(srv);
264 	}
265 }
266 
267 /* Return next server from the FS tree in backend <p>. If the tree is empty,
268  * return NULL. Saturated servers are skipped.
269  */
fas_get_next_server(struct proxy * p,struct server * srvtoavoid)270 struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
271 {
272 	struct server *srv, *avoided;
273 	struct eb32_node *node;
274 
275 	srv = avoided = NULL;
276 
277 	if (p->srv_act)
278 		node = eb32_first(&p->lbprm.fas.act);
279 	else if (p->lbprm.fbck)
280 		return p->lbprm.fbck;
281 	else if (p->srv_bck)
282 		node = eb32_first(&p->lbprm.fas.bck);
283 	else
284 		return NULL;
285 
286 	while (node) {
287 		/* OK, we have a server. However, it may be saturated, in which
288 		 * case we don't want to reconsider it for now, so we'll simply
289 		 * skip it. Same if it's the server we try to avoid, in which
290 		 * case we simply remember it for later use if needed.
291 		 */
292 		struct server *s;
293 
294 		s = eb32_entry(node, struct server, lb_node);
295 		if (!s->maxconn || (!s->nbpend && s->served < srv_dynamic_maxconn(s))) {
296 			if (s != srvtoavoid) {
297 				srv = s;
298 				break;
299 			}
300 			avoided = s;
301 		}
302 		node = eb32_next(node);
303 	}
304 
305 	if (!srv)
306 		srv = avoided;
307 
308 	return srv;
309 }
310 
311 
312 /*
313  * Local variables:
314  *  c-indent-level: 8
315  *  c-basic-offset: 8
316  * End:
317  */
318