191336b40SDon Lewis /* 291336b40SDon Lewis * FQ_Codel - The FlowQueue-Codel scheduler/AQM 391336b40SDon Lewis * 491336b40SDon Lewis * $FreeBSD$ 591336b40SDon Lewis * 691336b40SDon Lewis * Copyright (C) 2016 Centre for Advanced Internet Architectures, 791336b40SDon Lewis * Swinburne University of Technology, Melbourne, Australia. 891336b40SDon Lewis * Portions of this code were made possible in part by a gift from 991336b40SDon Lewis * The Comcast Innovation Fund. 1091336b40SDon Lewis * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> 1191336b40SDon Lewis * 1291336b40SDon Lewis * Redistribution and use in source and binary forms, with or without 1391336b40SDon Lewis * modification, are permitted provided that the following conditions 1491336b40SDon Lewis * are met: 1591336b40SDon Lewis * 1. Redistributions of source code must retain the above copyright 1691336b40SDon Lewis * notice, this list of conditions and the following disclaimer. 1791336b40SDon Lewis * 2. Redistributions in binary form must reproduce the above copyright 1891336b40SDon Lewis * notice, this list of conditions and the following disclaimer in the 1991336b40SDon Lewis * documentation and/or other materials provided with the distribution. 2091336b40SDon Lewis * 2191336b40SDon Lewis * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2291336b40SDon Lewis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2391336b40SDon Lewis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2491336b40SDon Lewis * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2591336b40SDon Lewis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2691336b40SDon Lewis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2791336b40SDon Lewis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2891336b40SDon Lewis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2991336b40SDon Lewis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3091336b40SDon Lewis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3191336b40SDon Lewis * SUCH DAMAGE. 3291336b40SDon Lewis */ 3391336b40SDon Lewis 3491336b40SDon Lewis #ifdef _KERNEL 3591336b40SDon Lewis #include <sys/malloc.h> 3691336b40SDon Lewis #include <sys/socket.h> 3791336b40SDon Lewis //#include <sys/socketvar.h> 3891336b40SDon Lewis #include <sys/kernel.h> 3991336b40SDon Lewis #include <sys/mbuf.h> 4091336b40SDon Lewis #include <sys/module.h> 4191336b40SDon Lewis #include <net/if.h> /* IFNAMSIZ */ 4291336b40SDon Lewis #include <netinet/in.h> 4391336b40SDon Lewis #include <netinet/ip_var.h> /* ipfw_rule_ref */ 4491336b40SDon Lewis #include <netinet/ip_fw.h> /* flow_id */ 4591336b40SDon Lewis #include <netinet/ip_dummynet.h> 4691336b40SDon Lewis 473f289c3fSJeff Roberson #include <sys/lock.h> 4891336b40SDon Lewis #include <sys/proc.h> 4991336b40SDon Lewis #include <sys/rwlock.h> 5091336b40SDon Lewis 5191336b40SDon Lewis #include <netpfil/ipfw/ip_fw_private.h> 5291336b40SDon Lewis #include <sys/sysctl.h> 5391336b40SDon Lewis #include <netinet/ip.h> 5491336b40SDon Lewis #include <netinet/ip6.h> 5591336b40SDon Lewis #include <netinet/ip_icmp.h> 5691336b40SDon Lewis #include <netinet/tcp.h> 5791336b40SDon Lewis #include <netinet/udp.h> 5891336b40SDon Lewis #include <sys/queue.h> 5991336b40SDon Lewis #include <sys/hash.h> 6091336b40SDon Lewis 6191336b40SDon Lewis #include <netpfil/ipfw/dn_heap.h> 6291336b40SDon Lewis #include <netpfil/ipfw/ip_dn_private.h> 6391336b40SDon Lewis 6491336b40SDon Lewis #include <netpfil/ipfw/dn_aqm.h> 6591336b40SDon Lewis #include <netpfil/ipfw/dn_aqm_codel.h> 6691336b40SDon Lewis #include <netpfil/ipfw/dn_sched.h> 6791336b40SDon Lewis #include <netpfil/ipfw/dn_sched_fq_codel.h> 6891336b40SDon Lewis #include <netpfil/ipfw/dn_sched_fq_codel_helper.h> 6991336b40SDon Lewis 7091336b40SDon Lewis #else 7191336b40SDon Lewis #include <dn_test.h> 7291336b40SDon Lewis #endif 7391336b40SDon Lewis 7491336b40SDon Lewis /* NOTE: In fq_codel module, we reimplements CoDel AQM functions 7591336b40SDon Lewis * because fq_codel use different flows (sub-queues) structure and 7691336b40SDon Lewis * dn_queue includes many variables not needed by a flow (sub-queue 7791336b40SDon Lewis * )i.e. avoid extra overhead (88 bytes vs 208 bytes). 7891336b40SDon Lewis * Also, CoDel functions manages stats of sub-queues as well as the main queue. 7991336b40SDon Lewis */ 8091336b40SDon Lewis 8191336b40SDon Lewis #define DN_SCHED_FQ_CODEL 6 8291336b40SDon Lewis 8391336b40SDon Lewis static struct dn_alg fq_codel_desc; 8491336b40SDon Lewis 8591336b40SDon Lewis /* fq_codel default parameters including codel */ 8691336b40SDon Lewis struct dn_sch_fq_codel_parms 8791336b40SDon Lewis fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US, 8891336b40SDon Lewis CODEL_ECN_ENABLED}, 1024, 10240, 1514}; 8991336b40SDon Lewis 9091336b40SDon Lewis static int 9191336b40SDon Lewis fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS) 9291336b40SDon Lewis { 9391336b40SDon Lewis int error; 9491336b40SDon Lewis long value; 9591336b40SDon Lewis 9691336b40SDon Lewis value = fq_codel_sysctl.ccfg.interval; 9791336b40SDon Lewis value /= AQM_TIME_1US; 9891336b40SDon Lewis error = sysctl_handle_long(oidp, &value, 0, req); 9991336b40SDon Lewis if (error != 0 || req->newptr == NULL) 10091336b40SDon Lewis return (error); 10191336b40SDon Lewis if (value < 1 || value > 100 * AQM_TIME_1S) 10291336b40SDon Lewis return (EINVAL); 10391336b40SDon Lewis fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ; 10491336b40SDon Lewis 10591336b40SDon Lewis return (0); 10691336b40SDon Lewis } 10791336b40SDon Lewis 10891336b40SDon Lewis static int 10991336b40SDon Lewis fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS) 11091336b40SDon Lewis { 11191336b40SDon Lewis int error; 11291336b40SDon Lewis long value; 11391336b40SDon Lewis 11491336b40SDon Lewis value = fq_codel_sysctl.ccfg.target; 11591336b40SDon Lewis value /= AQM_TIME_1US; 11691336b40SDon Lewis error = sysctl_handle_long(oidp, &value, 0, req); 11791336b40SDon Lewis if (error != 0 || req->newptr == NULL) 11891336b40SDon Lewis return (error); 11991336b40SDon Lewis if (value < 1 || value > 5 * AQM_TIME_1S) 12091336b40SDon Lewis return (EINVAL); 12191336b40SDon Lewis fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ; 12291336b40SDon Lewis 12391336b40SDon Lewis return (0); 12491336b40SDon Lewis } 12591336b40SDon Lewis 12691336b40SDon Lewis SYSBEGIN(f4) 12791336b40SDon Lewis 12891336b40SDon Lewis SYSCTL_DECL(_net_inet); 12991336b40SDon Lewis SYSCTL_DECL(_net_inet_ip); 13091336b40SDon Lewis SYSCTL_DECL(_net_inet_ip_dummynet); 13191336b40SDon Lewis static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel, 1327029da5cSPawel Biernacki CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 1337029da5cSPawel Biernacki "FQ_CODEL"); 13491336b40SDon Lewis 13591336b40SDon Lewis #ifdef SYSCTL_NODE 13691336b40SDon Lewis 13791336b40SDon Lewis SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target, 1387029da5cSPawel Biernacki CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1397029da5cSPawel Biernacki NULL, 0, fqcodel_sysctl_target_handler, "L", 14091336b40SDon Lewis "FQ_CoDel target in microsecond"); 14191336b40SDon Lewis SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval, 1427029da5cSPawel Biernacki CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1437029da5cSPawel Biernacki NULL, 0, fqcodel_sysctl_interval_handler, "L", 14491336b40SDon Lewis "FQ_CoDel interval in microsecond"); 14591336b40SDon Lewis 14691336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum, 14791336b40SDon Lewis CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum"); 14891336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows, 14991336b40SDon Lewis CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024, 15091336b40SDon Lewis "Number of queues for FQ_CoDel"); 15191336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit, 15291336b40SDon Lewis CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit"); 15391336b40SDon Lewis #endif 15491336b40SDon Lewis 15591336b40SDon Lewis /* Drop a packet form the head of codel queue */ 15691336b40SDon Lewis static void 15791336b40SDon Lewis codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si) 15891336b40SDon Lewis { 15991336b40SDon Lewis struct mbuf *m = q->mq.head; 16091336b40SDon Lewis 16191336b40SDon Lewis if (m == NULL) 16291336b40SDon Lewis return; 16391336b40SDon Lewis q->mq.head = m->m_nextpkt; 16491336b40SDon Lewis 16591336b40SDon Lewis fq_update_stats(q, si, -m->m_pkthdr.len, 1); 16691336b40SDon Lewis 16791336b40SDon Lewis if (si->main_q.ni.length == 0) /* queue is now idle */ 16891336b40SDon Lewis si->main_q.q_time = dn_cfg.curr_time; 16991336b40SDon Lewis 17091336b40SDon Lewis FREE_PKT(m); 17191336b40SDon Lewis } 17291336b40SDon Lewis 17391336b40SDon Lewis /* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet. 17491336b40SDon Lewis * Return 1 when unable to add timestamp, otherwise return 0 17591336b40SDon Lewis */ 17691336b40SDon Lewis static int 17791336b40SDon Lewis codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si) 17891336b40SDon Lewis { 17991336b40SDon Lewis uint64_t len; 18091336b40SDon Lewis 18191336b40SDon Lewis len = m->m_pkthdr.len; 18291336b40SDon Lewis /* finding maximum packet size */ 18391336b40SDon Lewis if (len > q->cst.maxpkt_size) 18491336b40SDon Lewis q->cst.maxpkt_size = len; 18591336b40SDon Lewis 18691336b40SDon Lewis /* Add timestamp to mbuf as MTAG */ 18791336b40SDon Lewis struct m_tag *mtag; 18891336b40SDon Lewis mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL); 18991336b40SDon Lewis if (mtag == NULL) 19091336b40SDon Lewis mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t), 19191336b40SDon Lewis M_NOWAIT); 192c4a6258dSMark Johnston if (mtag == NULL) 19391336b40SDon Lewis goto drop; 19491336b40SDon Lewis *(aqm_time_t *)(mtag + 1) = AQM_UNOW; 19591336b40SDon Lewis m_tag_prepend(m, mtag); 19691336b40SDon Lewis 19791336b40SDon Lewis mq_append(&q->mq, m); 19891336b40SDon Lewis fq_update_stats(q, si, len, 0); 19991336b40SDon Lewis return 0; 20091336b40SDon Lewis 20191336b40SDon Lewis drop: 20291336b40SDon Lewis fq_update_stats(q, si, len, 1); 20391336b40SDon Lewis m_freem(m); 20491336b40SDon Lewis return 1; 20591336b40SDon Lewis } 20691336b40SDon Lewis 20791336b40SDon Lewis /* 20891336b40SDon Lewis * Classify a packet to queue number using Jenkins hash function. 20991336b40SDon Lewis * Return: queue number 21091336b40SDon Lewis * the input of the hash are protocol no, perturbation, src IP, dst IP, 21191336b40SDon Lewis * src port, dst port, 21291336b40SDon Lewis */ 21391336b40SDon Lewis static inline int 21491336b40SDon Lewis fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si) 21591336b40SDon Lewis { 21691336b40SDon Lewis struct ip *ip; 21791336b40SDon Lewis struct tcphdr *th; 21891336b40SDon Lewis struct udphdr *uh; 21991336b40SDon Lewis uint8_t tuple[41]; 22091336b40SDon Lewis uint16_t hash=0; 22191336b40SDon Lewis 2224001fcbeSDon Lewis ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off); 22391336b40SDon Lewis //#ifdef INET6 22491336b40SDon Lewis struct ip6_hdr *ip6; 22591336b40SDon Lewis int isip6; 2264001fcbeSDon Lewis isip6 = (ip->ip_v == 6); 22791336b40SDon Lewis 22891336b40SDon Lewis if(isip6) { 2294001fcbeSDon Lewis ip6 = (struct ip6_hdr *)ip; 23091336b40SDon Lewis *((uint8_t *) &tuple[0]) = ip6->ip6_nxt; 23191336b40SDon Lewis *((uint32_t *) &tuple[1]) = si->perturbation; 23291336b40SDon Lewis memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16); 23391336b40SDon Lewis memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16); 23491336b40SDon Lewis 23591336b40SDon Lewis switch (ip6->ip6_nxt) { 23691336b40SDon Lewis case IPPROTO_TCP: 23791336b40SDon Lewis th = (struct tcphdr *)(ip6 + 1); 23891336b40SDon Lewis *((uint16_t *) &tuple[37]) = th->th_dport; 23991336b40SDon Lewis *((uint16_t *) &tuple[39]) = th->th_sport; 24091336b40SDon Lewis break; 24191336b40SDon Lewis 24291336b40SDon Lewis case IPPROTO_UDP: 24391336b40SDon Lewis uh = (struct udphdr *)(ip6 + 1); 24491336b40SDon Lewis *((uint16_t *) &tuple[37]) = uh->uh_dport; 24591336b40SDon Lewis *((uint16_t *) &tuple[39]) = uh->uh_sport; 24691336b40SDon Lewis break; 24791336b40SDon Lewis default: 24891336b40SDon Lewis memset(&tuple[37], 0, 4); 24991336b40SDon Lewis } 25091336b40SDon Lewis 25191336b40SDon Lewis hash = jenkins_hash(tuple, 41, HASHINIT) % fcount; 25291336b40SDon Lewis return hash; 25391336b40SDon Lewis } 25491336b40SDon Lewis //#endif 25591336b40SDon Lewis 25691336b40SDon Lewis /* IPv4 */ 25791336b40SDon Lewis *((uint8_t *) &tuple[0]) = ip->ip_p; 25891336b40SDon Lewis *((uint32_t *) &tuple[1]) = si->perturbation; 25991336b40SDon Lewis *((uint32_t *) &tuple[5]) = ip->ip_src.s_addr; 26091336b40SDon Lewis *((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr; 26191336b40SDon Lewis 26291336b40SDon Lewis switch (ip->ip_p) { 26391336b40SDon Lewis case IPPROTO_TCP: 26491336b40SDon Lewis th = (struct tcphdr *)(ip + 1); 26591336b40SDon Lewis *((uint16_t *) &tuple[13]) = th->th_dport; 26691336b40SDon Lewis *((uint16_t *) &tuple[15]) = th->th_sport; 26791336b40SDon Lewis break; 26891336b40SDon Lewis 26991336b40SDon Lewis case IPPROTO_UDP: 27091336b40SDon Lewis uh = (struct udphdr *)(ip + 1); 27191336b40SDon Lewis *((uint16_t *) &tuple[13]) = uh->uh_dport; 27291336b40SDon Lewis *((uint16_t *) &tuple[15]) = uh->uh_sport; 27391336b40SDon Lewis break; 27491336b40SDon Lewis default: 27591336b40SDon Lewis memset(&tuple[13], 0, 4); 27691336b40SDon Lewis } 27791336b40SDon Lewis hash = jenkins_hash(tuple, 17, HASHINIT) % fcount; 27891336b40SDon Lewis 27991336b40SDon Lewis return hash; 28091336b40SDon Lewis } 28191336b40SDon Lewis 28291336b40SDon Lewis /* 28391336b40SDon Lewis * Enqueue a packet into an appropriate queue according to 28491336b40SDon Lewis * FQ_CODEL algorithm. 28591336b40SDon Lewis */ 28691336b40SDon Lewis static int 28791336b40SDon Lewis fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q, 28891336b40SDon Lewis struct mbuf *m) 28991336b40SDon Lewis { 29091336b40SDon Lewis struct fq_codel_si *si; 29191336b40SDon Lewis struct fq_codel_schk *schk; 29291336b40SDon Lewis struct dn_sch_fq_codel_parms *param; 29391336b40SDon Lewis struct dn_queue *mainq; 29491336b40SDon Lewis int idx, drop, i, maxidx; 29591336b40SDon Lewis 29691336b40SDon Lewis mainq = (struct dn_queue *)(_si + 1); 29791336b40SDon Lewis si = (struct fq_codel_si *)_si; 29891336b40SDon Lewis schk = (struct fq_codel_schk *)(si->_si.sched+1); 29991336b40SDon Lewis param = &schk->cfg; 30091336b40SDon Lewis 30191336b40SDon Lewis /* classify a packet to queue number*/ 30291336b40SDon Lewis idx = fq_codel_classify_flow(m, param->flows_cnt, si); 30391336b40SDon Lewis /* enqueue packet into appropriate queue using CoDel AQM. 30491336b40SDon Lewis * Note: 'codel_enqueue' function returns 1 only when it unable to 30591336b40SDon Lewis * add timestamp to packet (no limit check)*/ 30691336b40SDon Lewis drop = codel_enqueue(&si->flows[idx], m, si); 30791336b40SDon Lewis 30891336b40SDon Lewis /* codel unable to timestamp a packet */ 30991336b40SDon Lewis if (drop) 31091336b40SDon Lewis return 1; 31191336b40SDon Lewis 31291336b40SDon Lewis /* If the flow (sub-queue) is not active ,then add it to the tail of 31391336b40SDon Lewis * new flows list, initialize and activate it. 31491336b40SDon Lewis */ 31591336b40SDon Lewis if (!si->flows[idx].active ) { 31691336b40SDon Lewis STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain); 31791336b40SDon Lewis si->flows[idx].deficit = param->quantum; 31891336b40SDon Lewis si->flows[idx].cst.dropping = false; 31991336b40SDon Lewis si->flows[idx].cst.first_above_time = 0; 32091336b40SDon Lewis si->flows[idx].active = 1; 32191336b40SDon Lewis //D("activate %d",idx); 32291336b40SDon Lewis } 32391336b40SDon Lewis 32491336b40SDon Lewis /* check the limit for all queues and remove a packet from the 32591336b40SDon Lewis * largest one 32691336b40SDon Lewis */ 32791336b40SDon Lewis if (mainq->ni.length > schk->cfg.limit) { D("over limit"); 32891336b40SDon Lewis /* find first active flow */ 32991336b40SDon Lewis for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++) 33091336b40SDon Lewis if (si->flows[maxidx].active) 33191336b40SDon Lewis break; 33291336b40SDon Lewis if (maxidx < schk->cfg.flows_cnt) { 33391336b40SDon Lewis /* find the largest sub- queue */ 33491336b40SDon Lewis for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) 33591336b40SDon Lewis if (si->flows[i].active && si->flows[i].stats.length > 33691336b40SDon Lewis si->flows[maxidx].stats.length) 33791336b40SDon Lewis maxidx = i; 33891336b40SDon Lewis codel_drop_head(&si->flows[maxidx], si); 33991336b40SDon Lewis D("maxidx = %d",maxidx); 34091336b40SDon Lewis drop = 1; 34191336b40SDon Lewis } 34291336b40SDon Lewis } 34391336b40SDon Lewis 34491336b40SDon Lewis return drop; 34591336b40SDon Lewis } 34691336b40SDon Lewis 34791336b40SDon Lewis /* 34891336b40SDon Lewis * Dequeue a packet from an appropriate queue according to 34991336b40SDon Lewis * FQ_CODEL algorithm. 35091336b40SDon Lewis */ 35191336b40SDon Lewis static struct mbuf * 35291336b40SDon Lewis fq_codel_dequeue(struct dn_sch_inst *_si) 35391336b40SDon Lewis { 35491336b40SDon Lewis struct fq_codel_si *si; 35591336b40SDon Lewis struct fq_codel_schk *schk; 35691336b40SDon Lewis struct dn_sch_fq_codel_parms *param; 35791336b40SDon Lewis struct fq_codel_flow *f; 35891336b40SDon Lewis struct mbuf *mbuf; 35991336b40SDon Lewis struct fq_codel_list *fq_codel_flowlist; 36091336b40SDon Lewis 36191336b40SDon Lewis si = (struct fq_codel_si *)_si; 36291336b40SDon Lewis schk = (struct fq_codel_schk *)(si->_si.sched+1); 36391336b40SDon Lewis param = &schk->cfg; 36491336b40SDon Lewis 36591336b40SDon Lewis do { 36691336b40SDon Lewis /* select a list to start with */ 36791336b40SDon Lewis if (STAILQ_EMPTY(&si->newflows)) 36891336b40SDon Lewis fq_codel_flowlist = &si->oldflows; 36991336b40SDon Lewis else 37091336b40SDon Lewis fq_codel_flowlist = &si->newflows; 37191336b40SDon Lewis 37291336b40SDon Lewis /* Both new and old queue lists are empty, return NULL */ 37391336b40SDon Lewis if (STAILQ_EMPTY(fq_codel_flowlist)) 37491336b40SDon Lewis return NULL; 37591336b40SDon Lewis 37691336b40SDon Lewis f = STAILQ_FIRST(fq_codel_flowlist); 37791336b40SDon Lewis while (f != NULL) { 37891336b40SDon Lewis /* if there is no flow(sub-queue) deficit, increase deficit 37991336b40SDon Lewis * by quantum, move the flow to the tail of old flows list 38091336b40SDon Lewis * and try another flow. 38191336b40SDon Lewis * Otherwise, the flow will be used for dequeue. 38291336b40SDon Lewis */ 38391336b40SDon Lewis if (f->deficit < 0) { 38491336b40SDon Lewis f->deficit += param->quantum; 38591336b40SDon Lewis STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain); 38691336b40SDon Lewis STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain); 38791336b40SDon Lewis } else 38891336b40SDon Lewis break; 38991336b40SDon Lewis 39091336b40SDon Lewis f = STAILQ_FIRST(fq_codel_flowlist); 39191336b40SDon Lewis } 39291336b40SDon Lewis 39391336b40SDon Lewis /* the new flows list is empty, try old flows list */ 39491336b40SDon Lewis if (STAILQ_EMPTY(fq_codel_flowlist)) 39591336b40SDon Lewis continue; 39691336b40SDon Lewis 39791336b40SDon Lewis /* Dequeue a packet from the selected flow */ 39891336b40SDon Lewis mbuf = fqc_codel_dequeue(f, si); 39991336b40SDon Lewis 40091336b40SDon Lewis /* Codel did not return a packet */ 40191336b40SDon Lewis if (!mbuf) { 40291336b40SDon Lewis /* If the selected flow belongs to new flows list, then move 40391336b40SDon Lewis * it to the tail of old flows list. Otherwise, deactivate it and 40491336b40SDon Lewis * remove it from the old list and 40591336b40SDon Lewis */ 40691336b40SDon Lewis if (fq_codel_flowlist == &si->newflows) { 40791336b40SDon Lewis STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain); 40891336b40SDon Lewis STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain); 40991336b40SDon Lewis } else { 41091336b40SDon Lewis f->active = 0; 41191336b40SDon Lewis STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain); 41291336b40SDon Lewis } 41391336b40SDon Lewis /* start again */ 41491336b40SDon Lewis continue; 41591336b40SDon Lewis } 41691336b40SDon Lewis 41791336b40SDon Lewis /* we have a packet to return, 41891336b40SDon Lewis * update flow deficit and return the packet*/ 41991336b40SDon Lewis f->deficit -= mbuf->m_pkthdr.len; 42091336b40SDon Lewis return mbuf; 42191336b40SDon Lewis 42291336b40SDon Lewis } while (1); 42391336b40SDon Lewis 42491336b40SDon Lewis /* unreachable point */ 42591336b40SDon Lewis return NULL; 42691336b40SDon Lewis } 42791336b40SDon Lewis 42891336b40SDon Lewis /* 42991336b40SDon Lewis * Initialize fq_codel scheduler instance. 43091336b40SDon Lewis * also, allocate memory for flows array. 43191336b40SDon Lewis */ 43291336b40SDon Lewis static int 43391336b40SDon Lewis fq_codel_new_sched(struct dn_sch_inst *_si) 43491336b40SDon Lewis { 43591336b40SDon Lewis struct fq_codel_si *si; 43691336b40SDon Lewis struct dn_queue *q; 43791336b40SDon Lewis struct fq_codel_schk *schk; 43891336b40SDon Lewis int i; 43991336b40SDon Lewis 44091336b40SDon Lewis si = (struct fq_codel_si *)_si; 44191336b40SDon Lewis schk = (struct fq_codel_schk *)(_si->sched+1); 44291336b40SDon Lewis 44391336b40SDon Lewis if(si->flows) { 44491336b40SDon Lewis D("si already configured!"); 44591336b40SDon Lewis return 0; 44691336b40SDon Lewis } 44791336b40SDon Lewis 44891336b40SDon Lewis /* init the main queue */ 44991336b40SDon Lewis q = &si->main_q; 45091336b40SDon Lewis set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q)); 45191336b40SDon Lewis q->_si = _si; 45291336b40SDon Lewis q->fs = _si->sched->fs; 45391336b40SDon Lewis 45491336b40SDon Lewis /* allocate memory for flows array */ 455454529cdSPedro F. Giffuni si->flows = mallocarray(schk->cfg.flows_cnt, 456454529cdSPedro F. Giffuni sizeof(struct fq_codel_flow), M_DUMMYNET, M_NOWAIT | M_ZERO); 45791336b40SDon Lewis if (si->flows == NULL) { 45891336b40SDon Lewis D("cannot allocate memory for fq_codel configuration parameters"); 45991336b40SDon Lewis return ENOMEM ; 46091336b40SDon Lewis } 46191336b40SDon Lewis 46291336b40SDon Lewis /* init perturbation for this si */ 46391336b40SDon Lewis si->perturbation = random(); 46491336b40SDon Lewis 46591336b40SDon Lewis /* init the old and new flows lists */ 46691336b40SDon Lewis STAILQ_INIT(&si->newflows); 46791336b40SDon Lewis STAILQ_INIT(&si->oldflows); 46891336b40SDon Lewis 46991336b40SDon Lewis /* init the flows (sub-queues) */ 47091336b40SDon Lewis for (i = 0; i < schk->cfg.flows_cnt; i++) { 47191336b40SDon Lewis /* init codel */ 47291336b40SDon Lewis si->flows[i].cst.maxpkt_size = 500; 47391336b40SDon Lewis } 47491336b40SDon Lewis 47591336b40SDon Lewis fq_codel_desc.ref_count++; 47691336b40SDon Lewis return 0; 47791336b40SDon Lewis } 47891336b40SDon Lewis 47991336b40SDon Lewis /* 48091336b40SDon Lewis * Free fq_codel scheduler instance. 48191336b40SDon Lewis */ 48291336b40SDon Lewis static int 48391336b40SDon Lewis fq_codel_free_sched(struct dn_sch_inst *_si) 48491336b40SDon Lewis { 48591336b40SDon Lewis struct fq_codel_si *si = (struct fq_codel_si *)_si ; 48691336b40SDon Lewis 48791336b40SDon Lewis /* free the flows array */ 48891336b40SDon Lewis free(si->flows , M_DUMMYNET); 48991336b40SDon Lewis si->flows = NULL; 49091336b40SDon Lewis fq_codel_desc.ref_count--; 49191336b40SDon Lewis 49291336b40SDon Lewis return 0; 49391336b40SDon Lewis } 49491336b40SDon Lewis 49591336b40SDon Lewis /* 49691336b40SDon Lewis * Configure fq_codel scheduler. 49791336b40SDon Lewis * the configurations for the scheduler is passed from userland. 49891336b40SDon Lewis */ 49991336b40SDon Lewis static int 50091336b40SDon Lewis fq_codel_config(struct dn_schk *_schk) 50191336b40SDon Lewis { 50291336b40SDon Lewis struct fq_codel_schk *schk; 50391336b40SDon Lewis struct dn_extra_parms *ep; 50491336b40SDon Lewis struct dn_sch_fq_codel_parms *fqc_cfg; 50591336b40SDon Lewis 50691336b40SDon Lewis schk = (struct fq_codel_schk *)(_schk+1); 50791336b40SDon Lewis ep = (struct dn_extra_parms *) _schk->cfg; 50891336b40SDon Lewis 50991336b40SDon Lewis /* par array contains fq_codel configuration as follow 51091336b40SDon Lewis * Codel: 0- target,1- interval, 2- flags 51191336b40SDon Lewis * FQ_CODEL: 3- quantum, 4- limit, 5- flows 51291336b40SDon Lewis */ 51391336b40SDon Lewis if (ep && ep->oid.len ==sizeof(*ep) && 51491336b40SDon Lewis ep->oid.subtype == DN_SCH_PARAMS) { 51591336b40SDon Lewis fqc_cfg = &schk->cfg; 51691336b40SDon Lewis if (ep->par[0] < 0) 51791336b40SDon Lewis fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target; 51891336b40SDon Lewis else 51991336b40SDon Lewis fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US; 52091336b40SDon Lewis 52191336b40SDon Lewis if (ep->par[1] < 0) 52291336b40SDon Lewis fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval; 52391336b40SDon Lewis else 52491336b40SDon Lewis fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US; 52591336b40SDon Lewis 52691336b40SDon Lewis if (ep->par[2] < 0) 52791336b40SDon Lewis fqc_cfg->ccfg.flags = 0; 52891336b40SDon Lewis else 52991336b40SDon Lewis fqc_cfg->ccfg.flags = ep->par[2]; 53091336b40SDon Lewis 53191336b40SDon Lewis /* FQ configurations */ 53291336b40SDon Lewis if (ep->par[3] < 0) 53391336b40SDon Lewis fqc_cfg->quantum = fq_codel_sysctl.quantum; 53491336b40SDon Lewis else 53591336b40SDon Lewis fqc_cfg->quantum = ep->par[3]; 53691336b40SDon Lewis 53791336b40SDon Lewis if (ep->par[4] < 0) 53891336b40SDon Lewis fqc_cfg->limit = fq_codel_sysctl.limit; 53991336b40SDon Lewis else 54091336b40SDon Lewis fqc_cfg->limit = ep->par[4]; 54191336b40SDon Lewis 54291336b40SDon Lewis if (ep->par[5] < 0) 54391336b40SDon Lewis fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt; 54491336b40SDon Lewis else 54591336b40SDon Lewis fqc_cfg->flows_cnt = ep->par[5]; 54691336b40SDon Lewis 54791336b40SDon Lewis /* Bound the configurations */ 54891336b40SDon Lewis fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 , 54991336b40SDon Lewis 5 * AQM_TIME_1S); ; 55091336b40SDon Lewis fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1, 55191336b40SDon Lewis 100 * AQM_TIME_1S); 55291336b40SDon Lewis 55391336b40SDon Lewis fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000); 55491336b40SDon Lewis fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480); 55591336b40SDon Lewis fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536); 55691336b40SDon Lewis } 55791336b40SDon Lewis else 55891336b40SDon Lewis return 1; 55991336b40SDon Lewis 56091336b40SDon Lewis return 0; 56191336b40SDon Lewis } 56291336b40SDon Lewis 56391336b40SDon Lewis /* 56491336b40SDon Lewis * Return fq_codel scheduler configurations 56591336b40SDon Lewis * the configurations for the scheduler is passed to userland. 56691336b40SDon Lewis */ 56791336b40SDon Lewis static int 56891336b40SDon Lewis fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) { 56991336b40SDon Lewis struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1); 57091336b40SDon Lewis struct dn_sch_fq_codel_parms *fqc_cfg; 57191336b40SDon Lewis 57291336b40SDon Lewis fqc_cfg = &schk->cfg; 57391336b40SDon Lewis 57491336b40SDon Lewis strcpy(ep->name, fq_codel_desc.name); 57591336b40SDon Lewis ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US; 57691336b40SDon Lewis ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US; 57791336b40SDon Lewis ep->par[2] = fqc_cfg->ccfg.flags; 57891336b40SDon Lewis 57991336b40SDon Lewis ep->par[3] = fqc_cfg->quantum; 58091336b40SDon Lewis ep->par[4] = fqc_cfg->limit; 58191336b40SDon Lewis ep->par[5] = fqc_cfg->flows_cnt; 58291336b40SDon Lewis 58391336b40SDon Lewis return 0; 58491336b40SDon Lewis } 58591336b40SDon Lewis 58691336b40SDon Lewis /* 58791336b40SDon Lewis * fq_codel scheduler descriptor 58891336b40SDon Lewis * contains the type of the scheduler, the name, the size of extra 58991336b40SDon Lewis * data structures, and function pointers. 59091336b40SDon Lewis */ 59191336b40SDon Lewis static struct dn_alg fq_codel_desc = { 59291336b40SDon Lewis _SI( .type = ) DN_SCHED_FQ_CODEL, 59391336b40SDon Lewis _SI( .name = ) "FQ_CODEL", 59491336b40SDon Lewis _SI( .flags = ) 0, 59591336b40SDon Lewis 59691336b40SDon Lewis _SI( .schk_datalen = ) sizeof(struct fq_codel_schk), 59791336b40SDon Lewis _SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst), 59891336b40SDon Lewis _SI( .q_datalen = ) 0, 59991336b40SDon Lewis 60091336b40SDon Lewis _SI( .enqueue = ) fq_codel_enqueue, 60191336b40SDon Lewis _SI( .dequeue = ) fq_codel_dequeue, 60291336b40SDon Lewis _SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/ 60391336b40SDon Lewis _SI( .destroy = ) NULL, /*sched x delete */ 60491336b40SDon Lewis _SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */ 60591336b40SDon Lewis _SI( .free_sched = ) fq_codel_free_sched, /* delete schd instance */ 60691336b40SDon Lewis _SI( .new_fsk = ) NULL, 60791336b40SDon Lewis _SI( .free_fsk = ) NULL, 60891336b40SDon Lewis _SI( .new_queue = ) NULL, 60991336b40SDon Lewis _SI( .free_queue = ) NULL, 61091336b40SDon Lewis _SI( .getconfig = ) fq_codel_getconfig, 61191336b40SDon Lewis _SI( .ref_count = ) 0 61291336b40SDon Lewis }; 61391336b40SDon Lewis 61491336b40SDon Lewis DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc); 615