1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2014 Couchbase, Inc.
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 
18 #ifndef NETBUF_H
19 #define NETBUF_H
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 /**
26  * @file
27  * @brief Netbuf write buffers
28  */
29 
30 /**
31  * @defgroup netbufs Netbufs
32  *
33  * # Introduction
34  *
35  * ## GOALS
36  *
37  * 1.  provide a simple buffer allocation API
38  *     From a logic perspective it's simplest to deal with a straight
39  *     contiguous buffer per packet.
40  *
41  * 2.  provide an efficient way of sending multiple contiguous packets. This
42  *     will reduce IOV fragmentation and reduce the number of trips to the
43  *     I/O plugin for multiple writes. Currently this is done very efficiently
44  *     with the ringbuffer - however this comes at the cost of copying all
45  *     request data to the ringbuffer itself. Our aim is to reduce the
46  *     number of copies while still maintaining a packed buffer.
47  *
48  * 3.  Allow a pluggable method by which user-provided data can be plugged
49  *     into the span/cursor/flush architecture.
50  *
51  * @addtogroup netbufs
52  * @{
53  */
54 
55 #include "sllist.h"
56 #include "netbuf-defs.h"
57 #include "netbuf-mblock.h"
58 
59 /**
60  * @brief Structure representing a buffer within netbufs
61  *
62  * @note It is recommended that you maintain the individual fields in your
63  * own structure and then re-create them as needed. The span structure is 16
64  * bytes on 64 bit systems, but can be reduced to 12 if needed. Additionally,
65  * you may already have the 'size' field stored/calculated elsewhere.
66  */
67 typedef struct {
68     /** @private Parent block */
69     nb_MBLOCK *parent;
70 
71     /** @private Offset from root at which this buffer begins */
72     nb_SIZE offset;
73 
74     /** write-once: Allocation size */
75     nb_SIZE size;
76 } nb_SPAN;
77 
78 #define NETBUF_INVALID_OFFSET (nb_SIZE)-1
79 
80 /**
81  * Creates a span from a buffer _not_ owned by netbufs.
82  * @param span the span to initialize
83  * @param buf the buffer
84  * @param len the length of the buffer
85  */
86 #define CREATE_STANDALONE_SPAN(span, buf, len) \
87     (span)->parent = (nb_MBLOCK *) (void *)buf; \
88     (span)->offset = NETBUF_INVALID_OFFSET; \
89     (span)->size = len;
90 
91 /** @private */
92 typedef struct {
93     sllist_node slnode;
94     char *base;
95     nb_SIZE len;
96     /* Extra 4 bytes here. WHAT WE DO!!! */
97     const void *parent; /* mc_PACKET */
98 } nb_SNDQELEM;
99 
100 /** @private */
101 typedef struct {
102     /** Linked list of pending spans to send */
103     sllist_root pending;
104 
105     /**
106      * List of PDUs to be flushed. A PDU is comprised of one or more IOVs
107      * (or even a subsection thereof)
108      */
109     sllist_root pdus;
110 
111     /** The last window which was part of the previous fill call */
112     nb_SNDQELEM *last_requested;
113 
114     /**
115      * Number of bytes enqueued in the 'last request' element. This is needed
116      * because it is possible for the last element to grow in length during
117      * a subsequent flush.
118      */
119     nb_SIZE last_offset;
120 
121     /** Offset from last PDU which was partially flushed */
122     nb_SIZE pdu_offset;
123 
124     /** Pool of elements to utilize */
125     nb_MBPOOL elempool;
126 } nb_SENDQ;
127 
128 struct netbuf_st {
129     /** Send Queue */
130     nb_SENDQ sendq;
131 
132     /** Pool for variable-size data */
133     nb_MBPOOL datapool;
134 
135     nb_SETTINGS settings;
136 };
137 
138 /**
139  * Quick way to get the span from a buffer, when the buffer is *known* to
140  * be standalone (i.e. CREATE_STANDALONE_SPAN()
141  * @param span The span from which to extract the buffer
142  * @return a pointer to the buffer
143  */
144 #define SPAN_SABUFFER_NC(span) ((char *)(span)->parent)
145 
146 /**
147  * Quick way to get the span from a buffer when the buffer is known *not*
148  * to be standalone
149  * @param span The span from which to extract the buffer
150  * @return A pointer to a buffer
151  */
152 #define SPAN_MBUFFER_NC(span) ((span)->parent->root + (span)->offset)
153 
154 /**
155  * @brief Retrieves a pointer to the buffer related to this span.
156  * @param span the span from which to extract the buffer
157  * @return a pointer to the buffer.
158  *
159  * @see SPAN_SABUFFER_NC
160  * @see SPAN_MBUFFER_NC
161  */
162 #define SPAN_BUFFER(span) \
163         (((span)->offset == NETBUF_INVALID_OFFSET) ? SPAN_SABUFFER_NC(span) : SPAN_MBUFFER_NC(span))
164 
165 /**
166  * @brief allocate a span
167  *
168  * Reserve a contiguous region of memory, in-order for a given span. The
169  * span will be reserved from the last block to be flushed to the network.
170  *
171  * The contents of the span are guaranteed to be contiguous (though not aligned)
172  * and are available via the SPAN_BUFFER macro.
173  *
174  * @return 0 if successful, -1 on error
175  */
176 int
177 netbuf_mblock_reserve(nb_MGR *mgr, nb_SPAN *span);
178 
179 /**
180  * @brief release a span
181  *
182  * Release a span previously allocated via reserve_span. It is assumed that the
183  * contents of the span have either:
184  *
185  * 1. been successfully sent to the network
186  * 2. have just been scheduled (and are being removed due to error handling)
187  * 3. have been partially sent to a connection which is being closed.
188  *
189  * @param mgr the manager in which this span is reserved
190  * @param span the span
191  */
192 void
193 netbuf_mblock_release(nb_MGR *mgr, nb_SPAN *span);
194 
195 /**
196  * @brief Enqueue a span for serialization
197  *
198  * Schedules an IOV to be placed inside the send queue. The storage of the
199  * underlying buffer must not be freed or otherwise modified until it has
200  * been sent.
201  *
202  * With the current usage model, flush status is implicitly completed once
203  * a response has arrived.
204  *
205  * Note that you may create the IOV from a SPAN object like so:
206  * @code{.c}
207  * iov->iov_len = span->size;
208  * iov->iov_base = SPAN_BUFFER(span);
209  * @endcode
210  */
211 void
212 netbuf_enqueue(nb_MGR *mgr, const nb_IOV *bufinfo, const void *parent);
213 
214 void
215 netbuf_enqueue_span(nb_MGR *mgr, nb_SPAN *span, const void *parent);
216 
217 /**
218  * Gets the number of IOV structures required to flush the entire contents of
219  * all buffers.
220  */
221 unsigned int
222 netbuf_get_niov(nb_MGR *mgr);
223 
224 /**
225  * @brief
226  * Populates an iovec structure for flushing a set of bytes from the various
227  * blocks.
228  *
229  * You may call this function mutltiple times, so long as each call to
230  * start_flush is eventually mapped with a call to end_flush.
231  *
232  * @code{.c}
233  * netbuf_start_flush(mgr, iov1, niov1);
234  * netbuf_start_flush(mgr, iov2, niov2);
235  * ...
236  * netbuf_end_flush(mgr, nbytes1);
237  * netbuf_end_flush(mgr, nbytes2);
238  * @endcode
239  *
240  * Additionally, only the LAST end_flush call may be supplied an nflushed
241  * parameter which is smaller than the size returned by start_flush. If the
242  * entire contents of the `iovs` structure cannot be flushed immediately and
243  * you do not wish to persist it until such a time that it may be flushed, then
244  * netbuf_reset_flush() should be called. See that functions' documentation
245  * for more details.
246  *
247  * This function may be thought of advancing a virtual cursor which is mapped
248  * to a send queue. Each time this function is called the cursor is advanced
249  * by the number of bytes that the library expects you to flush (i.e. the return
250  * value of this function). Typically the cursor is never rewound and any
251  * operation that advances the cursor places the burden on the user to
252  * actually flush the data contained within the IOV objects.
253  * The netbuf_end_flush() function merely has the task of releasing any memory
254  * used for mapping of already-flushed data.
255  *
256  * From a different perspective, each call to netbuf_start_flush() establishes
257  * a contract between the library and the user: The library guarantees that
258  * this specific region (referred to within the IOVs) will not be flushed by
259  * any other subsystem (i.e. nothing else will try to flush the same data).
260  * The user guarantees that the data will eventually be flushed, and that the
261  * data will be flushed in the order it was received via start_flush().
262  *
263  *
264  * @param mgr the manager object
265  * @param iovs an array of iovec structures
266  * @param niov the number of iovec structures allocated.
267  * @param[out] nused how many IOVs are actually required
268  *
269  * @return the number of bytes which can be flushed in this IOV. If the
270  * return value is 0 then there are no more bytes to flush.
271  *
272  * Note that the return value is limited by the number of IOV structures
273  * provided and should not be taken as an indicator of how many bytes are
274  * used overall.
275  */
276 nb_SIZE
277 netbuf_start_flush(nb_MGR *mgr, nb_IOV *iovs, int niov, int *nused);
278 
279 /**
280  * @brief Indicate that a flush has completed.
281  *
282  * Indicate that a number of bytes have been flushed. This should be called after
283  * the data retrieved by get_flushing_iov has been flushed to the TCP buffers.
284  *
285  * @param mgr the manager object
286  * @param nflushed how much data in bytes was flushed to the network.
287  */
288 void
289 netbuf_end_flush(nb_MGR *mgr, nb_SIZE nflushed);
290 
291 /**
292  * Reset the flush context for the buffer. This should be called only when the
293  * following is true:
294  *
295  * (1) There is only a single open call to netbuf_start_flush
296  * (2) The last call to end_flush did not pass all the bytes which were to
297  *     be flushed.
298  *
299  * In this case, the next call to start_flush() will return an IOV which begins
300  * where the last end_flush() finished, rather than the last start_flush().
301  * As a consequence it means that the previous IOV populated with start_flush
302  * is no longer valid and start_flush should be called again.
303  */
304 #define netbuf_reset_flush(mgr) do { \
305     (mgr)->sendq.last_requested = NULL; \
306     (mgr)->sendq.last_offset = 0; \
307 } while (0);
308 
309 /**
310  * Informational function to get the total size of all data in the
311  * buffers. This traverses all blocks, so call this for debugging only.
312  */
313 nb_SIZE
314 netbuf_get_size(const nb_MGR *mgr);
315 
316 /**
317  * Get the maximum size of a span which can be satisfied without using an
318  * additional block.
319  *
320  * @param mgr
321  *
322  * @param allow_wrap
323  * Whether to take into consideration wrapping. If this is true then the span
324  * size will allow wrapping. If disabled, then only the packed size will be
325  * available. Consider:
326  * <pre>
327  * [ ooooooo{S:10}xxxxxxxxx{C:10}ooooo{A:5} ]
328  * </pre>
329  * If wrapping is allowed, then the maximum span size will be 10, from 0..10
330  * but the last 5 bytes at the end will be lost for the duration of the block.
331  * If wrapping is not allowed then the maximum span size will be 5.
332  *
333  * @return
334  * the maximum span size without requiring additional blocks.
335  */
336 nb_SIZE
337 netbuf_mblock_get_next_size(const nb_MGR *mgr, int allow_wrap);
338 
339 /**
340  * @brief Initializes an nb_MGR structure
341  * @param mgr the manager to initialize
342  * @param settings
343  */
344 void
345 netbuf_init(nb_MGR *mgr, const nb_SETTINGS *settings);
346 
347 /**
348  * @brief Frees up any allocated resources for a given manager
349  * @param mgr the manager for which to release resources
350  */
351 void
352 netbuf_cleanup(nb_MGR *mgr);
353 
354 /**
355  * Populates the settings structure with the default settings. This structure
356  * may then be modified or tuned and passed to netbuf_init()
357  */
358 void
359 netbuf_default_settings(nb_SETTINGS *settings);
360 
361 /**
362  * Dump the internal structure of the manager to the screen. Useful for
363  * debugging.
364  */
365 void
366 netbuf_dump_status(nb_MGR *mgr, FILE *fp);
367 
368 
369 /**
370  * Mark a PDU as being enqueued. This should be called whenever the final IOV
371  * for a given PDU has just been enqueued.
372  *
373  * The PDU itself must remain valid and is assumed to contain an 'slnode'
374  * structure which will point to the next PDU. The PDU will later be removed
375  * from the queue when 'end_flush' has been called including its range.
376  *
377  * @param mgr The manager
378  *
379  * @param pdu An opaque pointer
380  *
381  * @param lloff The offset from the pdu pointer at which the slist_node
382  *        structure may be found.
383  */
384 void
385 netbuf_pdu_enqueue(nb_MGR *mgr, void *pdu, nb_SIZE lloff);
386 
387 
388 /**
389  * This callback is invoked during 'end_flush2'.
390  *
391  * @param pdu The PDU pointer enqueued via netbuf_pdu_enqueue()
392  *
393  * @param remaining A hint passed to the callback indicating how many bytes
394  *        remain on the stream. IFF remaining is greater than the size of the
395  *        PDU the callback may change internal state within the packet to mark
396  *        it as flushed.
397  *
398  *        XXX:
399  *        If nremaining < pdusize then it <b>must</b> be ignored. The value
400  *        may have been previously passed to the same callback during a
401  *        prior iteration.
402  *
403  *        This is done by design in order to avoid forcing each PDU to maintain
404  *        a variable indicating "How much was flushed".
405  *
406  * @param arg A pointer passed to the start_flush call; used to correlate any
407  *        data common to the queue itself.
408  *
409  * @return The size of the PDU. This will be used to determine future calls
410  *         to the callback for subsequent PDUs.
411  *
412  *         If size <= remaining then this PDU will be popped off the PDU queue
413  *         and is deemed to be no longer utilized by the send queue (and may
414  *         be released from the mblock allocator; if it's being used).
415  *
416  *         If size > remaining then no further callbacks will be invoked on
417  *         the relevant PDUs.
418  */
419 typedef nb_SIZE (*nb_getsize_fn)(void *pdu, nb_SIZE remaining, void *arg);
420 
421 void
422 netbuf_end_flush2(nb_MGR *mgr,
423                   unsigned int nflushed,
424                   nb_getsize_fn callback,
425                   nb_SIZE lloff, void *arg);
426 
427 
428 /**
429  * Ensures that the given internal structures of the manager are not allocated
430  * or otherwise in use by other systems. This is useful for testing to ensure
431  * that we wouldn't accidentally think everything is OK.
432  *
433  * Because resources are released at the block level, we might have had blocks
434  * which were partially allocated.
435  *
436  * This also checks the PDU and send queues as well.
437  */
438 int
439 netbuf_is_clean(nb_MGR *mgr);
440 
441 /**
442  * Determines if there is any data to be flushed to the network
443  */
444 int
445 netbuf_has_flushdata(nb_MGR *mgr);
446 
447 /**@}*/
448 
449 #ifdef __cplusplus
450 }
451 #endif
452 
453 #endif /* LCB_PACKET_H */
454