1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_netlink.h"
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/malloc.h>
34 #include <sys/lock.h>
35 #include <sys/rmlock.h>
36 #include <sys/mbuf.h>
37 #include <sys/ck.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/syslog.h>
41 
42 #include <netlink/netlink.h>
43 #include <netlink/netlink_ctl.h>
44 #include <netlink/netlink_linux.h>
45 #include <netlink/netlink_var.h>
46 
47 #define	DEBUG_MOD_NAME	nl_writer
48 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
49 #include <netlink/netlink_debug.h>
50 _DECLARE_DEBUG(LOG_INFO);
51 
52 /*
53  * The goal of this file is to provide convenient message writing KPI on top of
54  * different storage methods (mbufs, uio, temporary memory chunks).
55  *
56  * The main KPI guarantee is the the (last) message always resides in the contiguous
57  *  memory buffer, so one is able to update the header after writing the entire message.
58  *
59  * This guarantee comes with a side effect of potentially reallocating underlying
60  *  buffer, so one needs to update the desired pointers after something is added
61  *  to the header.
62  *
63  * Messaging layer contains hooks performing transparent Linux translation for the messages.
64  *
65  * There are 3 types of supported targets:
66  *  * socket (adds mbufs to the socket buffer, used for message replies)
67  *  * group (sends mbuf/chain to the specified groups, used for the notifications)
68  *  * chain (returns mbuf chain, used in Linux message translation code)
69  *
70  * There are 3 types of storage:
71  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
72  *    fits in MCLBYTES)
73  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
74  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
75  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
76  *    Linux sockets, calls translation hook prior to sending messages to the socket).
77  *
78  * Internally, KPI switches between different types of storage when memory requirements
79  *  change. It happens transparently to the caller.
80  */
81 
82 
83 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
84 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
85 
86 struct nlwriter_ops {
87 	nlwriter_op_init	*init;
88 	nlwriter_op_write	*write_socket;
89 	nlwriter_op_write	*write_group;
90 	nlwriter_op_write	*write_chain;
91 };
92 
93 /*
94  * NS_WRITER_TYPE_BUF
95  * Writes message to a temporary memory buffer,
96  * flushing to the socket/group when buffer size limit is reached
97  */
98 static bool
99 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
100 {
101 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
102 	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
103 	if (__predict_false(nw->_storage == NULL))
104 		return (false);
105 	nw->alloc_len = size;
106 	nw->offset = 0;
107 	nw->hdr = NULL;
108 	nw->data = nw->_storage;
109 	nw->writer_type = NS_WRITER_TYPE_BUF;
110 	nw->malloc_flag = mflag;
111 	nw->num_messages = 0;
112 	nw->enomem = false;
113 	return (true);
114 }
115 
116 static bool
117 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
118 {
119 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
120 	if (__predict_false(datalen == 0)) {
121 		free(buf, M_NETLINK);
122 		return (true);
123 	}
124 
125 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
126 	if (__predict_false(m == NULL)) {
127 		/* XXX: should we set sorcverr? */
128 		free(buf, M_NETLINK);
129 		return (false);
130 	}
131 	m_append(m, datalen, buf);
132 	free(buf, M_NETLINK);
133 
134 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
135 	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
136 }
137 
138 static bool
139 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
140 {
141 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
142 	    nw->arg.group.proto, nw->arg.group.id);
143 	if (__predict_false(datalen == 0)) {
144 		free(buf, M_NETLINK);
145 		return (true);
146 	}
147 
148 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
149 	if (__predict_false(m == NULL)) {
150 		free(buf, M_NETLINK);
151 		return (false);
152 	}
153 	bool success = m_append(m, datalen, buf) != 0;
154 	free(buf, M_NETLINK);
155 
156 	if (!success)
157 		return (false);
158 
159 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
160 	return (true);
161 }
162 
163 static bool
164 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
165 {
166 	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
167 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
168 
169 	if (__predict_false(datalen == 0)) {
170 		free(buf, M_NETLINK);
171 		return (true);
172 	}
173 
174 	if (*m0 == NULL) {
175 		struct mbuf *m;
176 
177 		m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
178 		if (__predict_false(m == NULL)) {
179 			free(buf, M_NETLINK);
180 			return (false);
181 		}
182 		*m0 = m;
183 	}
184 	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
185 		free(buf, M_NETLINK);
186 		return (false);
187 	}
188 	return (true);
189 }
190 
191 
192 /*
193  * NS_WRITER_TYPE_MBUF
194  * Writes message to the allocated mbuf,
195  * flushing to socket/group when mbuf size limit is reached.
196  * This is the most efficient mechanism as it avoids double-copying.
197  *
198  * Allocates a single mbuf suitable to store up to @size bytes of data.
199  * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr
200  * If size <= MCLBYTES (2k), allocate a single mbuf cluster
201  * Otherwise, return NULL.
202  */
203 static bool
204 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
205 {
206 	struct mbuf *m;
207 
208 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
209 	m = m_get2(size, mflag, MT_DATA, M_PKTHDR);
210 	if (__predict_false(m == NULL))
211 		return (false);
212 	nw->alloc_len = M_TRAILINGSPACE(m);
213 	nw->offset = 0;
214 	nw->hdr = NULL;
215 	nw->_storage = (void *)m;
216 	nw->data = mtod(m, void *);
217 	nw->writer_type = NS_WRITER_TYPE_MBUF;
218 	nw->malloc_flag = mflag;
219 	nw->num_messages = 0;
220 	nw->enomem = false;
221 	memset(nw->data, 0, size);
222 	NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
223 	    m, size, nw->alloc_len, nw->data);
224 	return (true);
225 }
226 
227 static bool
228 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
229 {
230 	struct mbuf *m = (struct mbuf *)buf;
231 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
232 
233 	if (__predict_false(datalen == 0)) {
234 		m_freem(m);
235 		return (true);
236 	}
237 
238 	m->m_pkthdr.len = datalen;
239 	m->m_len = datalen;
240 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
241 	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
242 }
243 
244 static bool
245 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
246 {
247 	struct mbuf *m = (struct mbuf *)buf;
248 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
249 	    nw->arg.group.proto, nw->arg.group.id);
250 
251 	if (__predict_false(datalen == 0)) {
252 		m_freem(m);
253 		return (true);
254 	}
255 
256 	m->m_pkthdr.len = datalen;
257 	m->m_len = datalen;
258 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
259 	return (true);
260 }
261 
262 static bool
263 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
264 {
265 	struct mbuf *m_new = (struct mbuf *)buf;
266 	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
267 
268 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
269 
270 	if (__predict_false(datalen == 0)) {
271 		m_freem(m_new);
272 		return (true);
273 	}
274 
275 	m_new->m_pkthdr.len = datalen;
276 	m_new->m_len = datalen;
277 
278 	if (*m0 == NULL) {
279 		*m0 = m_new;
280 	} else {
281 		struct mbuf *m_last;
282 		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
283 			;
284 		m_last->m_next = m_new;
285 		(*m0)->m_pkthdr.len += datalen;
286 	}
287 
288 	return (true);
289 }
290 
291 /*
292  * NS_WRITER_TYPE_LBUF
293  * Writes message to the allocated memory buffer,
294  * flushing to socket/group when mbuf size limit is reached.
295  * Calls linux handler to rewrite messages before sending to the socket.
296  */
297 static bool
298 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
299 {
300 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
301 	size = roundup2(size, sizeof(void *));
302 	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
303 	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
304 	if (__predict_false(buf == NULL))
305 		return (false);
306 
307 	/* Fill buffer header first */
308 	struct linear_buffer *lb = (struct linear_buffer *)buf;
309 	lb->base = &buf[sizeof(struct linear_buffer) + size];
310 	lb->size = size + SCRATCH_BUFFER_SIZE;
311 
312 	nw->alloc_len = size;
313 	nw->offset = 0;
314 	nw->hdr = NULL;
315 	nw->_storage = buf;
316 	nw->data = (char *)(lb + 1);
317 	nw->malloc_flag = mflag;
318 	nw->writer_type = NS_WRITER_TYPE_LBUF;
319 	nw->num_messages = 0;
320 	nw->enomem = false;
321 	return (true);
322 }
323 
324 static bool
325 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
326 {
327 	struct linear_buffer *lb = (struct linear_buffer *)buf;
328 	char *data = (char *)(lb + 1);
329 	struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
330 
331 	if (__predict_false(datalen == 0)) {
332 		free(buf, M_NETLINK);
333 		return (true);
334 	}
335 
336 	struct mbuf *m = NULL;
337 	if (linux_netlink_p != NULL)
338 		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
339 	free(buf, M_NETLINK);
340 
341 	if (__predict_false(m == NULL)) {
342 		/* XXX: should we set sorcverr? */
343 		return (false);
344 	}
345 
346 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
347 	return (nl_send_one(m, nlp, cnt, io_flags));
348 }
349 
350 /* Shouldn't be called (maybe except Linux code originating message) */
351 static bool
352 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
353 {
354 	struct linear_buffer *lb = (struct linear_buffer *)buf;
355 	char *data = (char *)(lb + 1);
356 
357 	if (__predict_false(datalen == 0)) {
358 		free(buf, M_NETLINK);
359 		return (true);
360 	}
361 
362 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
363 	if (__predict_false(m == NULL)) {
364 		free(buf, M_NETLINK);
365 		return (false);
366 	}
367 	m_append(m, datalen, data);
368 	free(buf, M_NETLINK);
369 
370 	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
371 	return (true);
372 }
373 
374 static const struct nlwriter_ops nlmsg_writers[] = {
375 	/* NS_WRITER_TYPE_MBUF */
376 	{
377 		.init = nlmsg_get_ns_mbuf,
378 		.write_socket = nlmsg_write_socket_mbuf,
379 		.write_group = nlmsg_write_group_mbuf,
380 		.write_chain = nlmsg_write_chain_mbuf,
381 	},
382 	/* NS_WRITER_TYPE_BUF */
383 	{
384 		.init = nlmsg_get_ns_buf,
385 		.write_socket = nlmsg_write_socket_buf,
386 		.write_group = nlmsg_write_group_buf,
387 		.write_chain = nlmsg_write_chain_buf,
388 	},
389 	/* NS_WRITER_TYPE_LBUF */
390 	{
391 		.init = nlmsg_get_ns_lbuf,
392 		.write_socket = nlmsg_write_socket_lbuf,
393 		.write_group = nlmsg_write_group_lbuf,
394 	},
395 };
396 
397 static void
398 nlmsg_set_callback(struct nl_writer *nw)
399 {
400 	const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
401 
402 	switch (nw->writer_target) {
403 	case NS_WRITER_TARGET_SOCKET:
404 		nw->cb = pops->write_socket;
405 		break;
406 	case NS_WRITER_TARGET_GROUP:
407 		nw->cb = pops->write_group;
408 		break;
409 	case NS_WRITER_TARGET_CHAIN:
410 		nw->cb = pops->write_chain;
411 		break;
412 	default:
413 		panic("not implemented");
414 	}
415 }
416 
417 static bool
418 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
419 {
420 	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
421 	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
422 	return (nlmsg_writers[type].init(nw, size, waitok));
423 }
424 
425 static bool
426 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
427 {
428 	int type;
429 
430 	if (!is_linux) {
431 		if (__predict_true(size <= MCLBYTES))
432 			type = NS_WRITER_TYPE_MBUF;
433 		else
434 			type = NS_WRITER_TYPE_BUF;
435 	} else
436 		type = NS_WRITER_TYPE_LBUF;
437 	return (nlmsg_get_buf_type(nw, size, type, waitok));
438 }
439 
440 bool
441 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
442 {
443 	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
444 		return (false);
445 	nw->arg.ptr = (void *)nlp;
446 	nw->writer_target = NS_WRITER_TARGET_SOCKET;
447 	nlmsg_set_callback(nw);
448 	return (true);
449 }
450 
451 bool
452 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
453 {
454 	if (!nlmsg_get_buf(nw, size, false, false))
455 		return (false);
456 	nw->arg.group.proto = protocol;
457 	nw->arg.group.id = group_id;
458 	nw->writer_target = NS_WRITER_TARGET_GROUP;
459 	nlmsg_set_callback(nw);
460 	return (true);
461 }
462 
463 bool
464 _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
465 {
466 	if (!nlmsg_get_buf(nw, size, false, false))
467 		return (false);
468 	*pm = NULL;
469 	nw->arg.ptr = (void *)pm;
470 	nw->writer_target = NS_WRITER_TARGET_CHAIN;
471 	nlmsg_set_callback(nw);
472 	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
473 	return (true);
474 }
475 
476 void
477 _nlmsg_ignore_limit(struct nl_writer *nw)
478 {
479 	nw->ignore_limit = true;
480 }
481 
482 bool
483 _nlmsg_flush(struct nl_writer *nw)
484 {
485 
486 	if (__predict_false(nw->hdr != NULL)) {
487 		/* Last message has not been completed, skip it. */
488 		int completed_len = (char *)nw->hdr - nw->data;
489 		/* Send completed messages */
490 		nw->offset -= nw->offset - completed_len;
491 		nw->hdr = NULL;
492 	}
493 
494 	NL_LOG(LOG_DEBUG2, "OUT");
495 	bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
496 	nw->_storage = NULL;
497 
498 	if (!result) {
499 		NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
500 	}
501 
502 	return (result);
503 }
504 
505 /*
506  * Flushes previous data and allocates new underlying storage
507  *  sufficient for holding at least @required_len bytes.
508  * Return true on success.
509  */
510 bool
511 _nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
512 {
513 	struct nl_writer ns_new = {};
514 	int completed_len, new_len;
515 
516 	if (nw->enomem)
517 		return (false);
518 
519 	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
520 	    nw->offset, nw->alloc_len, required_len);
521 
522 	/* Calculated new buffer size and allocate it s*/
523 	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
524 	if (completed_len > 0 && required_len < MCLBYTES) {
525 		/* We already ran out of space, use the largest effective size */
526 		new_len = max(nw->alloc_len, MCLBYTES);
527 	} else {
528 		if (nw->alloc_len < MCLBYTES)
529 			new_len = MCLBYTES;
530 		else
531 			new_len = nw->alloc_len * 2;
532 		while (new_len < required_len)
533 			new_len *= 2;
534 	}
535 	bool waitok = (nw->malloc_flag == M_WAITOK);
536 	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
537 	if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
538 		nw->enomem = true;
539 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
540 		return (false);
541 	}
542 	if (nw->ignore_limit)
543 		nlmsg_ignore_limit(&ns_new);
544 
545 	/* Update callback data */
546 	ns_new.writer_target = nw->writer_target;
547 	nlmsg_set_callback(&ns_new);
548 	ns_new.arg = nw->arg;
549 
550 	/* Copy last (unfinished) header to the new storage */
551 	int last_len = nw->offset - completed_len;
552 	if (last_len > 0) {
553 		memcpy(ns_new.data, nw->hdr, last_len);
554 		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
555 		ns_new.offset = last_len;
556 	}
557 
558 	NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
559 
560 	/* Flush completed headers & switch to the new nw */
561 	nlmsg_flush(nw);
562 	memcpy(nw, &ns_new, sizeof(struct nl_writer));
563 	NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
564 
565 	return (true);
566 }
567 
568 bool
569 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
570     uint16_t flags, uint32_t len)
571 {
572 	struct nlmsghdr *hdr;
573 
574 	MPASS(nw->hdr == NULL);
575 
576 	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
577 	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
578 		if (!nlmsg_refill_buffer(nw, required_len))
579 			return (false);
580 	}
581 
582 	hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
583 
584 	hdr->nlmsg_len = len;
585 	hdr->nlmsg_type = type;
586 	hdr->nlmsg_flags = flags;
587 	hdr->nlmsg_seq = seq;
588 	hdr->nlmsg_pid = portid;
589 
590 	nw->hdr = hdr;
591 	nw->offset += sizeof(struct nlmsghdr);
592 
593 	return (true);
594 }
595 
596 bool
597 _nlmsg_end(struct nl_writer *nw)
598 {
599 	MPASS(nw->hdr != NULL);
600 
601 	if (nw->enomem) {
602 		NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
603 		nlmsg_abort(nw);
604 		return (false);
605 	}
606 
607 	nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
608 	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
609 	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
610 	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
611 	nw->hdr = NULL;
612 	nw->num_messages++;
613 	return (true);
614 }
615 
616 void
617 _nlmsg_abort(struct nl_writer *nw)
618 {
619 	if (nw->hdr != NULL) {
620 		nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
621 		nw->hdr = NULL;
622 	}
623 }
624 
625 void
626 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
627     struct nl_pstate *npt)
628 {
629 	struct nlmsgerr *errmsg;
630 	int payload_len;
631 	uint32_t flags = nlp->nl_flags;
632 	struct nl_writer *nw = npt->nw;
633 	bool cap_ack;
634 
635 	payload_len = sizeof(struct nlmsgerr);
636 
637 	/*
638 	 * The only case when we send the full message in the
639 	 * reply is when there is an error and NETLINK_CAP_ACK
640 	 * is not set.
641 	 */
642 	cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
643 	if (!cap_ack)
644 		payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
645 	payload_len = NETLINK_ALIGN(payload_len);
646 
647 	uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
648 	if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
649 		nl_flags |= NLM_F_ACK_TLVS;
650 
651 	NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
652 	    hdr->nlmsg_type, hdr->nlmsg_seq);
653 
654 	if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
655 		goto enomem;
656 
657 	errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
658 	errmsg->error = error;
659 	/* In case of error copy the whole message, else just the header */
660 	memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
661 
662 	if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
663 		nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
664 	if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
665 		nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
666 	if (npt->cookie != NULL)
667 		nlattr_add_raw(nw, npt->cookie);
668 
669 	if (nlmsg_end(nw))
670 		return;
671 enomem:
672 	NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
673 	    hdr->nlmsg_type, hdr->nlmsg_seq);
674 	nlmsg_abort(nw);
675 }
676 
677 bool
678 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
679 {
680 	if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
681 		NL_LOG(LOG_DEBUG, "Error finalizing table dump");
682 		return (false);
683 	}
684 	/* Save operation result */
685 	int *perror = nlmsg_reserve_object(nw, int);
686 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
687 	    nw->offset, perror);
688 	*perror = error;
689 	nlmsg_end(nw);
690 	nw->suppress_ack = true;
691 
692 	return (true);
693 }
694