1 /*
2  * Copyright (C) 2009-2015 Red Hat, Inc.
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library.  If not, see
16  * <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <config.h>
20 #include <unistd.h>
21 
22 #include "virnetdevbandwidth.h"
23 #include "vircommand.h"
24 #include "viralloc.h"
25 #include "virerror.h"
26 #include "virlog.h"
27 #include "virstring.h"
28 #include "virutil.h"
29 
30 #define VIR_FROM_THIS VIR_FROM_NONE
31 
32 VIR_LOG_INIT("util.netdevbandwidth");
33 
34 void
virNetDevBandwidthFree(virNetDevBandwidth * def)35 virNetDevBandwidthFree(virNetDevBandwidth *def)
36 {
37     if (!def)
38         return;
39 
40     g_free(def->in);
41     g_free(def->out);
42     g_free(def);
43 }
44 
45 static void
virNetDevBandwidthCmdAddOptimalQuantum(virCommand * cmd,const virNetDevBandwidthRate * rate)46 virNetDevBandwidthCmdAddOptimalQuantum(virCommand *cmd,
47                                        const virNetDevBandwidthRate *rate)
48 {
49     const unsigned long long mtu = 1500;
50     unsigned long long r2q;
51 
52     /* When two or more classes compete for unused bandwidth they are each
53      * given some number of bytes before serving other competing class. This
54      * number is called quantum. It's advised in HTB docs that the number
55      * should be equal to MTU. The class quantum is computed from its rate
56      * divided by global r2q parameter. However, if rate is too small the
57      * default value will not suffice and thus we must provide our own value.
58      * */
59 
60     r2q = rate->average * 1024 / 8 / mtu;
61     if (!r2q)
62         r2q = 1;
63 
64     virCommandAddArg(cmd, "quantum");
65     virCommandAddArgFormat(cmd, "%llu", r2q);
66 }
67 
68 /**
69  * virNetDevBandwidthManipulateFilter:
70  * @ifname: interface to operate on
71  * @ifmac_ptr: MAC of the interface to create filter over
72  * @id: filter ID
73  * @class_id: where to place traffic
74  * @remove_old: whether to remove the filter
75  * @create_new: whether to create the filter
76  *
77  * TC filters are as crucial for traffic shaping as QDiscs. While
78  * QDiscs act like black boxes deciding which packets should be
79  * held up and which should be sent immediately, it's the filter
80  * that places a packet into the box. So, we may end up
81  * constructing a set of filters on a single device (e.g. a
82  * bridge) and filter the traffic into QDiscs based on the
83  * originating vNET device.
84  *
85  * Long story short, @ifname is the interface where the filter
86  * should be created. The @ifmac_ptr is the MAC address for which
87  * the filter should be created (usually different to the MAC
88  * address of @ifname). Then, like everything - even filters have
89  * an @id which should be unique (per @ifname). And @class_id
90  * tells into which QDisc should filter place the traffic.
91  *
92  * This function can be used for both, removing stale filter
93  * (@remove_old set to true) and creating new one (@create_new
94  * set to true). Both at once for the same price!
95  *
96  * Returns: 0 on success,
97  *         -1 otherwise (with error reported).
98  */
99 static int ATTRIBUTE_NONNULL(1)
virNetDevBandwidthManipulateFilter(const char * ifname,const virMacAddr * ifmac_ptr,unsigned int id,const char * class_id,bool remove_old,bool create_new)100 virNetDevBandwidthManipulateFilter(const char *ifname,
101                                    const virMacAddr *ifmac_ptr,
102                                    unsigned int id,
103                                    const char *class_id,
104                                    bool remove_old,
105                                    bool create_new)
106 {
107     int ret = -1;
108     char *filter_id = NULL;
109     virCommand *cmd = NULL;
110     unsigned char ifmac[VIR_MAC_BUFLEN];
111     char *mac[2] = {NULL, NULL};
112 
113     if (!(remove_old || create_new)) {
114         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
115                        _("filter creation API error"));
116         goto cleanup;
117     }
118 
119     /* u32 filters must have 800:: prefix. Don't ask. */
120     filter_id = g_strdup_printf("800::%u", id);
121 
122     if (remove_old) {
123         int cmd_ret = 0;
124 
125         cmd = virCommandNew(TC);
126         virCommandAddArgList(cmd, "filter", "del", "dev", ifname,
127                              "prio", "2", "handle",  filter_id, "u32", NULL);
128 
129         if (virCommandRun(cmd, &cmd_ret) < 0)
130             goto cleanup;
131 
132     }
133 
134     if (create_new) {
135         virMacAddrGetRaw(ifmac_ptr, ifmac);
136 
137         mac[0] = g_strdup_printf("0x%02x%02x%02x%02x", ifmac[2],
138                                  ifmac[3], ifmac[4], ifmac[5]);
139         mac[1] = g_strdup_printf("0x%02x%02x", ifmac[0], ifmac[1]);
140 
141         virCommandFree(cmd);
142         cmd = virCommandNew(TC);
143         /* Okay, this not nice. But since libvirt does not necessarily track
144          * interface IP address(es), and tc fw filter simply refuse to use
145          * ebtables marks, we need to use u32 selector to match MAC address.
146          * If libvirt will ever know something, remove this FIXME
147          */
148         virCommandAddArgList(cmd, "filter", "add", "dev", ifname, "protocol", "ip",
149                              "prio", "2", "handle", filter_id, "u32",
150                              "match", "u16", "0x0800", "0xffff", "at", "-2",
151                              "match", "u32", mac[0], "0xffffffff", "at", "-12",
152                              "match", "u16", mac[1], "0xffff", "at", "-14",
153                              "flowid", class_id, NULL);
154 
155         if (virCommandRun(cmd, NULL) < 0)
156             goto cleanup;
157     }
158 
159     ret = 0;
160  cleanup:
161     VIR_FREE(mac[1]);
162     VIR_FREE(mac[0]);
163     VIR_FREE(filter_id);
164     virCommandFree(cmd);
165     return ret;
166 }
167 
168 
169 /**
170  * virNetDevBandwidthSet:
171  * @ifname: on which interface
172  * @bandwidth: rates to set (may be NULL)
173  * @hierarchical_class: whether to create hierarchical class
174  * @swapped: true if IN/OUT should be set contrariwise
175  *
176  * This function enables QoS on specified interface
177  * and set given traffic limits for both, incoming
178  * and outgoing traffic. Any previous setting get
179  * overwritten. If @hierarchical_class is TRUE, create
180  * hierarchical class. It is used to guarantee minimal
181  * throughput ('floor' attribute in NIC).
182  *
183  * If @swapped is set, the IN part of @bandwidth is set on
184  * @ifname's TX, and vice versa. If it is not set, IN is set on
185  * RX and OUT on TX. This is because for some types of interfaces
186  * domain and the host live on the same side of the interface (so
187  * domain's RX/TX is host's RX/TX), and for some it's swapped
188  * (domain's RX/TX is hosts's TX/RX).
189  *
190  * Return 0 on success, -1 otherwise.
191  */
192 int
virNetDevBandwidthSet(const char * ifname,const virNetDevBandwidth * bandwidth,bool hierarchical_class,bool swapped)193 virNetDevBandwidthSet(const char *ifname,
194                       const virNetDevBandwidth *bandwidth,
195                       bool hierarchical_class,
196                       bool swapped)
197 {
198     int ret = -1;
199     virNetDevBandwidthRate *rx = NULL; /* From domain POV */
200     virNetDevBandwidthRate *tx = NULL; /* From domain POV */
201     virCommand *cmd = NULL;
202     char *average = NULL;
203     char *peak = NULL;
204     char *burst = NULL;
205 
206     if (!bandwidth) {
207         /* nothing to be enabled */
208         ret = 0;
209         goto cleanup;
210     }
211 
212     if (geteuid() != 0) {
213         virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
214                        _("Network bandwidth tuning is not available"
215                          " in session mode"));
216         return -1;
217     }
218 
219     if (!ifname) {
220         virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
221                        _("Unable to set bandwidth for interface because "
222                          "device name is unknown"));
223         return -1;
224     }
225 
226     if (swapped) {
227         rx = bandwidth->out;
228         tx = bandwidth->in;
229     } else {
230         rx = bandwidth->in;
231         tx = bandwidth->out;
232     }
233 
234     virNetDevBandwidthClear(ifname);
235 
236     if (tx && tx->average) {
237         average = g_strdup_printf("%llukbps", tx->average);
238         if (tx->peak)
239             peak = g_strdup_printf("%llukbps", tx->peak);
240         if (tx->burst)
241             burst = g_strdup_printf("%llukb", tx->burst);
242 
243         cmd = virCommandNew(TC);
244         virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "root",
245                              "handle", "1:", "htb", "default",
246                              hierarchical_class ? "2" : "1", NULL);
247         if (virCommandRun(cmd, NULL) < 0)
248             goto cleanup;
249 
250         /* If we are creating a hierarchical class, all non guaranteed traffic
251          * goes to the 1:2 class which will adjust 'rate' dynamically as NICs
252          * with guaranteed throughput are plugged and unplugged. Class 1:1
253          * exists so we don't exceed the maximum limit for the network. For each
254          * NIC with guaranteed throughput a separate classid will be created.
255          * NB '1:' is just a shorter notation of '1:0'.
256          *
257          * To get a picture how this works:
258          *
259          * +-----+     +---------+     +-----------+      +-----------+     +-----+
260          * |     |     |  qdisc  |     | class 1:1 |      | class 1:2 |     |     |
261          * | NIC |     | def 1:2 |     |   rate    |      |   rate    |     | sfq |
262          * |     | --> |         | --> |   peak    | -+-> |   peak    | --> |     |
263          * +-----+     +---------+     +-----------+  |   +-----------+     +-----+
264          *                                            |
265          *                                            |   +-----------+     +-----+
266          *                                            |   | class 1:3 |     |     |
267          *                                            |   |   rate    |     | sfq |
268          *                                            +-> |   peak    | --> |     |
269          *                                            |   +-----------+     +-----+
270          *                                           ...
271          *                                            |   +-----------+     +-----+
272          *                                            |   | class 1:n |     |     |
273          *                                            |   |   rate    |     | sfq |
274          *                                            +-> |   peak    | --> |     |
275          *                                                +-----------+     +-----+
276          *
277          * After the routing decision, when is it clear a packet is to be sent
278          * via a particular NIC, it is sent to the root qdisc (queuing
279          * discipline). In this case HTB (Hierarchical Token Bucket). It has
280          * only one direct child class (with id 1:1) which shapes the overall
281          * rate that is sent through the NIC.  This class has at least one child
282          * (1:2) which is meant for all non-privileged (non guaranteed) traffic
283          * from all domains. Then, for each interface with guaranteed
284          * throughput, a separate class (1:n) is created. Imagine a class is a
285          * box. Whenever a packet ends up in a class it is stored in this box
286          * until the kernel sends it, then it is removed from box. Packets are
287          * placed into boxes based on rules (filters) - e.g. depending on
288          * destination IP/MAC address. If there is no rule to be applied, the
289          * root qdisc has a default where such packets go (1:2 in this case).
290          * Packets come in over and over again and boxes get filled more and
291          * more. Imagine that kernel sends packets just once a second. So it
292          * starts to traverse through this tree. It starts with the root qdisc
293          * and through 1:1 it gets to 1:2. It sends packets up to 1:2's 'rate'.
294          * Then it moves to 1:3 and again sends packets up to 1:3's 'rate'.  The
295          * whole process is repeated until 1:n is processed. So now we have
296          * ensured each class its guaranteed bandwidth. If the sum of sent data
297          * doesn't exceed the 'rate' in 1:1 class, we can go further and send
298          * more packets. The rest of available bandwidth is distributed to the
299          * 1:2,1:3...1:n classes by ratio of their 'rate'. As soon as the root
300          * 'rate' limit is reached or there are no more packets to send, we stop
301          * sending and wait another second. Each class has an SFQ qdisc which
302          * shuffles packets in boxes stochastically, so one sender cannot
303          * starve others.
304          *
305          * Therefore, whenever we want to plug in a new guaranteed interface, we
306          * need to create a new class and adjust the 'rate' of the 1:2 class.
307          * When unplugging we do the exact opposite - remove the associated
308          * class, and adjust the 'rate'.
309          *
310          * This description is rather long, but it is still a good idea to read
311          * it before you dig into the code.
312          */
313         if (hierarchical_class) {
314             virCommandFree(cmd);
315             cmd = virCommandNew(TC);
316             virCommandAddArgList(cmd, "class", "add", "dev", ifname, "parent",
317                                  "1:", "classid", "1:1", "htb", "rate", average,
318                                  "ceil", peak ? peak : average, NULL);
319             virNetDevBandwidthCmdAddOptimalQuantum(cmd, tx);
320             if (virCommandRun(cmd, NULL) < 0)
321                 goto cleanup;
322         }
323         virCommandFree(cmd);
324         cmd = virCommandNew(TC);
325         virCommandAddArgList(cmd, "class", "add", "dev", ifname, "parent",
326                              hierarchical_class ? "1:1" : "1:", "classid",
327                              hierarchical_class ? "1:2" : "1:1", "htb",
328                              "rate", average, NULL);
329 
330         if (peak)
331             virCommandAddArgList(cmd, "ceil", peak, NULL);
332         if (burst)
333             virCommandAddArgList(cmd, "burst", burst, NULL);
334 
335         virNetDevBandwidthCmdAddOptimalQuantum(cmd, tx);
336         if (virCommandRun(cmd, NULL) < 0)
337             goto cleanup;
338 
339         virCommandFree(cmd);
340         cmd = virCommandNew(TC);
341         virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "parent",
342                              hierarchical_class ? "1:2" : "1:1",
343                              "handle", "2:", "sfq", "perturb",
344                              "10", NULL);
345 
346         if (virCommandRun(cmd, NULL) < 0)
347             goto cleanup;
348 
349         virCommandFree(cmd);
350         cmd = virCommandNew(TC);
351         virCommandAddArgList(cmd, "filter", "add", "dev", ifname, "parent",
352                              "1:0", "protocol", "all", "prio", "1", "handle",
353                              "1", "fw", "flowid", "1", NULL);
354 
355         if (virCommandRun(cmd, NULL) < 0)
356             goto cleanup;
357 
358         VIR_FREE(average);
359         VIR_FREE(peak);
360         VIR_FREE(burst);
361     }
362 
363     if (rx) {
364         average = g_strdup_printf("%llukbps", rx->average);
365 
366         if (rx->burst) {
367             burst = g_strdup_printf("%llukb", rx->burst);
368         } else {
369             /* Internally, tc uses uint to store burst size (in bytes).
370              * Therefore, the largest value we can set is UINT_MAX bytes.
371              * We're outputting the vale in KiB though. */
372             unsigned long long avg = MIN(rx->average, UINT_MAX / 1024);
373 
374             burst = g_strdup_printf("%llukb", avg);
375         }
376 
377         virCommandFree(cmd);
378         cmd = virCommandNew(TC);
379             virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname,
380                                  "ingress", NULL);
381 
382         if (virCommandRun(cmd, NULL) < 0)
383             goto cleanup;
384 
385         virCommandFree(cmd);
386         cmd = virCommandNew(TC);
387         /* Set filter to match all ingress traffic */
388         virCommandAddArgList(cmd, "filter", "add", "dev", ifname, "parent",
389                              "ffff:", "protocol", "all", "u32", "match", "u32",
390                              "0", "0", "police", "rate", average,
391                              "burst", burst, "mtu", "64kb", "drop", "flowid",
392                              ":1", NULL);
393 
394         if (virCommandRun(cmd, NULL) < 0)
395             goto cleanup;
396     }
397 
398     ret = 0;
399 
400  cleanup:
401     virCommandFree(cmd);
402     VIR_FREE(average);
403     VIR_FREE(peak);
404     VIR_FREE(burst);
405     return ret;
406 }
407 
408 /**
409  * virNetDevBandwidthClear:
410  * @ifname: on which interface
411  *
412  * This function tries to disable QoS on specified interface
413  * by deleting root and ingress qdisc. However, this may fail
414  * if we try to remove the default one.
415  *
416  * Return 0 on success, -1 otherwise.
417  */
418 int
virNetDevBandwidthClear(const char * ifname)419 virNetDevBandwidthClear(const char *ifname)
420 {
421     int ret = 0;
422     int dummy; /* for ignoring the exit status */
423     virCommand *cmd = NULL;
424 
425     if (!ifname)
426        return 0;
427 
428     cmd = virCommandNew(TC);
429     virCommandAddArgList(cmd, "qdisc", "del", "dev", ifname, "root", NULL);
430 
431     if (virCommandRun(cmd, &dummy) < 0)
432         ret = -1;
433 
434     virCommandFree(cmd);
435 
436     cmd = virCommandNew(TC);
437     virCommandAddArgList(cmd, "qdisc",  "del", "dev", ifname, "ingress", NULL);
438 
439     if (virCommandRun(cmd, &dummy) < 0)
440         ret = -1;
441 
442     virCommandFree(cmd);
443 
444     return ret;
445 }
446 
447 /*
448  * virNetDevBandwidthCopy:
449  * @dest: destination
450  * @src:  source (may be NULL)
451  *
452  * Returns -1 on OOM error (which gets reported),
453  * 0 otherwise.
454  */
455 int
virNetDevBandwidthCopy(virNetDevBandwidth ** dest,const virNetDevBandwidth * src)456 virNetDevBandwidthCopy(virNetDevBandwidth **dest,
457                        const virNetDevBandwidth *src)
458 {
459     *dest = NULL;
460     if (!src) {
461         /* nothing to be copied */
462         return 0;
463     }
464 
465     *dest = g_new0(virNetDevBandwidth, 1);
466 
467     if (src->in) {
468         (*dest)->in = g_new0(virNetDevBandwidthRate, 1);
469         memcpy((*dest)->in, src->in, sizeof(*src->in));
470     }
471 
472     if (src->out) {
473         (*dest)->out = g_new0(virNetDevBandwidthRate, 1);
474         memcpy((*dest)->out, src->out, sizeof(*src->out));
475     }
476 
477     return 0;
478 }
479 
480 bool
virNetDevBandwidthEqual(const virNetDevBandwidth * a,const virNetDevBandwidth * b)481 virNetDevBandwidthEqual(const virNetDevBandwidth *a,
482                         const virNetDevBandwidth *b)
483 {
484     if (!a && !b)
485         return true;
486 
487     if (!a || !b)
488         return false;
489 
490     /* in */
491     if (a->in) {
492         if (!b->in)
493             return false;
494 
495         if (a->in->average != b->in->average ||
496             a->in->peak != b->in->peak ||
497             a->in->floor != b->in->floor ||
498             a->in->burst != b->in->burst)
499             return false;
500     } else if (b->in) {
501         return false;
502     }
503 
504     /* out */
505     if (a->out) {
506         if (!b->out)
507             return false;
508 
509         if (a->out->average != b->out->average ||
510             a->out->peak != b->out->peak ||
511             a->out->floor != b->out->floor ||
512             a->out->burst != b->out->burst)
513             return false;
514     } else if (b->out) {
515         return false;
516     }
517 
518     return true;
519 }
520 
521 /*
522  * virNetDevBandwidthPlug:
523  * @brname: name of the bridge
524  * @net_bandwidth: QoS settings on @brname
525  * @ifmac_ptr: MAC of interface
526  * @bandwidth: QoS settings for interface
527  * @id: unique ID (MUST be greater than 2)
528  *
529  * Set bridge part of interface QoS settings, e.g. guaranteed
530  * bandwidth.  @id is an unique ID (among @brname) from which
531  * other identifiers for class, qdisc and filter are derived.
532  * However, two classes were already set up (by
533  * virNetDevBandwidthSet). That's why this @id MUST be greater
534  * than 2. You may want to keep passed @id, as it is used later
535  * by virNetDevBandwidthUnplug.
536  *
537  * Returns:
538  * 0 if QoS set successfully
539  * -1 otherwise.
540  */
541 int
virNetDevBandwidthPlug(const char * brname,virNetDevBandwidth * net_bandwidth,const virMacAddr * ifmac_ptr,virNetDevBandwidth * bandwidth,unsigned int id)542 virNetDevBandwidthPlug(const char *brname,
543                        virNetDevBandwidth *net_bandwidth,
544                        const virMacAddr *ifmac_ptr,
545                        virNetDevBandwidth *bandwidth,
546                        unsigned int id)
547 {
548     int ret = -1;
549     virCommand *cmd = NULL;
550     char *class_id = NULL;
551     char *qdisc_id = NULL;
552     char *floor = NULL;
553     char *ceil = NULL;
554     char ifmacStr[VIR_MAC_STRING_BUFLEN];
555 
556     if (id <= 2) {
557         virReportError(VIR_ERR_INTERNAL_ERROR, _("Invalid class ID %d"), id);
558         return -1;
559     }
560 
561     virMacAddrFormat(ifmac_ptr, ifmacStr);
562 
563     if (!net_bandwidth || !net_bandwidth->in) {
564         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
565                        _("Bridge '%s' has no QoS set, therefore "
566                          "unable to set 'floor' on '%s'"),
567                        brname, ifmacStr);
568         return -1;
569     }
570 
571     class_id = g_strdup_printf("1:%x", id);
572     qdisc_id = g_strdup_printf("%x:", id);
573     floor = g_strdup_printf("%llukbps", bandwidth->in->floor);
574     ceil = g_strdup_printf("%llukbps", net_bandwidth->in->peak ?
575                            net_bandwidth->in->peak :
576                            net_bandwidth->in->average);
577 
578     cmd = virCommandNew(TC);
579     virCommandAddArgList(cmd, "class", "add", "dev", brname, "parent", "1:1",
580                          "classid", class_id, "htb", "rate", floor,
581                          "ceil", ceil, NULL);
582     virNetDevBandwidthCmdAddOptimalQuantum(cmd, bandwidth->in);
583 
584     if (virCommandRun(cmd, NULL) < 0)
585         goto cleanup;
586 
587     virCommandFree(cmd);
588     cmd = virCommandNew(TC);
589     virCommandAddArgList(cmd, "qdisc", "add", "dev", brname, "parent",
590                          class_id, "handle", qdisc_id, "sfq", "perturb",
591                          "10", NULL);
592 
593     if (virCommandRun(cmd, NULL) < 0)
594         goto cleanup;
595 
596     if (virNetDevBandwidthManipulateFilter(brname, ifmac_ptr, id,
597                                            class_id, false, true) < 0)
598         goto cleanup;
599 
600     ret = 0;
601 
602  cleanup:
603     VIR_FREE(ceil);
604     VIR_FREE(floor);
605     VIR_FREE(qdisc_id);
606     VIR_FREE(class_id);
607     virCommandFree(cmd);
608     return ret;
609 }
610 
611 /*
612  * virNetDevBandwidthUnplug:
613  * @brname: from which bridge are we unplugging
614  * @id: unique identifier (MUST be greater than 2)
615  *
616  * Remove QoS settings from bridge.
617  *
618  * Returns 0 on success, -1 otherwise.
619  */
620 int
virNetDevBandwidthUnplug(const char * brname,unsigned int id)621 virNetDevBandwidthUnplug(const char *brname,
622                          unsigned int id)
623 {
624     int ret = -1;
625     int cmd_ret = 0;
626     virCommand *cmd = NULL;
627     char *class_id = NULL;
628     char *qdisc_id = NULL;
629 
630     if (id <= 2) {
631         virReportError(VIR_ERR_INTERNAL_ERROR, _("Invalid class ID %d"), id);
632         return -1;
633     }
634 
635     class_id = g_strdup_printf("1:%x", id);
636     qdisc_id = g_strdup_printf("%x:", id);
637 
638     cmd = virCommandNew(TC);
639     virCommandAddArgList(cmd, "qdisc", "del", "dev", brname,
640                          "handle", qdisc_id, NULL);
641 
642     /* Don't threat tc errors as fatal, but
643      * try to remove as much as possible */
644     if (virCommandRun(cmd, &cmd_ret) < 0)
645         goto cleanup;
646 
647     if (virNetDevBandwidthManipulateFilter(brname, NULL, id,
648                                            NULL, true, false) < 0)
649         goto cleanup;
650 
651     virCommandFree(cmd);
652     cmd = virCommandNew(TC);
653     virCommandAddArgList(cmd, "class", "del", "dev", brname,
654                          "classid", class_id, NULL);
655 
656     if (virCommandRun(cmd, &cmd_ret) < 0)
657         goto cleanup;
658 
659     ret = 0;
660 
661  cleanup:
662     VIR_FREE(qdisc_id);
663     VIR_FREE(class_id);
664     virCommandFree(cmd);
665     return ret;
666 }
667 
668 /**
669  * virNetDevBandwidthUpdateRate:
670  * @ifname: interface name
671  * @id: unique identifier
672  * @bandwidth: used to derive 'ceil' of class with @id
673  * @new_rate: new rate
674  *
675  * This function updates the 'rate' attribute of HTB class.
676  * It can be used whenever a new interface is plugged to a
677  * bridge to adjust average throughput of non guaranteed
678  * NICs.
679  *
680  * Returns 0 on success, -1 otherwise.
681  */
682 int
virNetDevBandwidthUpdateRate(const char * ifname,unsigned int id,virNetDevBandwidth * bandwidth,unsigned long long new_rate)683 virNetDevBandwidthUpdateRate(const char *ifname,
684                              unsigned int id,
685                              virNetDevBandwidth *bandwidth,
686                              unsigned long long new_rate)
687 {
688     int ret = -1;
689     virCommand *cmd = NULL;
690     char *class_id = NULL;
691     char *rate = NULL;
692     char *ceil = NULL;
693 
694     class_id = g_strdup_printf("1:%x", id);
695     rate = g_strdup_printf("%llukbps", new_rate);
696     ceil = g_strdup_printf("%llukbps", bandwidth->in->peak ?
697                            bandwidth->in->peak :
698                            bandwidth->in->average);
699 
700     cmd = virCommandNew(TC);
701     virCommandAddArgList(cmd, "class", "change", "dev", ifname,
702                          "classid", class_id, "htb", "rate", rate,
703                          "ceil", ceil, NULL);
704     virNetDevBandwidthCmdAddOptimalQuantum(cmd, bandwidth->in);
705 
706     if (virCommandRun(cmd, NULL) < 0)
707         goto cleanup;
708 
709     ret = 0;
710 
711  cleanup:
712     virCommandFree(cmd);
713     VIR_FREE(class_id);
714     VIR_FREE(rate);
715     VIR_FREE(ceil);
716     return ret;
717 }
718 
719 /**
720  * virNetDevBandwidthUpdateFilter:
721  * @ifname: interface to operate on
722  * @ifmac_ptr: new MAC to update the filter with
723  * @id: filter ID
724  *
725  * Sometimes the host environment is so dynamic, that even a
726  * guest's MAC addresses change on the fly. When that happens we
727  * must update our QoS hierarchy so that the guest's traffic is
728  * placed into the correct QDiscs.  This function updates the
729  * filter for the interface @ifname with the unique identifier
730  * @id so that it uses the new MAC address of the guest interface
731  * @ifmac_ptr.
732  *
733  * Returns: 0 on success,
734  *         -1 on failure (with error reported).
735  */
736 int
virNetDevBandwidthUpdateFilter(const char * ifname,const virMacAddr * ifmac_ptr,unsigned int id)737 virNetDevBandwidthUpdateFilter(const char *ifname,
738                                const virMacAddr *ifmac_ptr,
739                                unsigned int id)
740 {
741     int ret = -1;
742     char *class_id = NULL;
743 
744     class_id = g_strdup_printf("1:%x", id);
745 
746     if (virNetDevBandwidthManipulateFilter(ifname, ifmac_ptr, id,
747                                            class_id, true, true) < 0)
748         goto cleanup;
749 
750     ret = 0;
751  cleanup:
752     VIR_FREE(class_id);
753     return ret;
754 }
755 
756 
757 
758 /**
759  * virNetDevBandwidthSetRootQDisc:
760  * @ifname: the interface name
761  * @qdisc: queueing discipline to set
762  *
763  * For given interface @ifname set its root queueing discipline
764  * to @qdisc. This can be used to replace the default qdisc
765  * (usually pfifo_fast or whatever is set in
766  * /proc/sys/net/core/default_qdisc) with different qdisc.
767  *
768  * Returns: 0 on success,
769  *         -1 if failed to exec tc (with error reported)
770  *         -2 if tc failed (with no error reported)
771  */
772 int
virNetDevBandwidthSetRootQDisc(const char * ifname,const char * qdisc)773 virNetDevBandwidthSetRootQDisc(const char *ifname,
774                                const char *qdisc)
775 {
776     g_autoptr(virCommand) cmd = NULL;
777     g_autofree char *outbuf = NULL;
778     g_autofree char *errbuf = NULL;
779     int status;
780 
781     /* Ideally, we would have a netlink implementation and just
782      * call it here.  But honestly, I tried and failed miserably.
783      * Fallback to spawning tc. */
784     cmd = virCommandNewArgList(TC, "qdisc", "add", "dev", ifname,
785                                "root", "handle", "0:", qdisc,
786                                NULL);
787 
788     virCommandAddEnvString(cmd, "LC_ALL=C");
789     virCommandSetOutputBuffer(cmd, &outbuf);
790     virCommandSetErrorBuffer(cmd, &errbuf);
791 
792     if (virCommandRun(cmd, &status) < 0)
793         return -1;
794 
795     if (status != 0) {
796         VIR_DEBUG("Setting qdisc failed: output='%s' err='%s'", outbuf, errbuf);
797         return -2;
798     }
799 
800     return 0;
801 }
802