1 /* tvbuff.c
2  *
3  * Testy, Virtual(-izable) Buffer of guint8*'s
4  *
5  * "Testy" -- the buffer gets mad when an attempt to access data
6  *		beyond the bounds of the buffer. An exception is thrown.
7  *
8  * "Virtual" -- the buffer can have its own data, can use a subset of
9  *		the data of a backing tvbuff, or can be a composite of
10  *		other tvbuffs.
11  *
12  * Copyright (c) 2000 by Gilbert Ramirez <gram@alumni.rice.edu>
13  *
14  * Code to convert IEEE floating point formats to native floating point
15  * derived from code Copyright (c) Ashok Narayanan, 2000
16  *
17  * Wireshark - Network traffic analyzer
18  * By Gerald Combs <gerald@wireshark.org>
19  * Copyright 1998 Gerald Combs
20  *
21  * SPDX-License-Identifier: GPL-2.0-or-later
22  */
23 
24 #include "config.h"
25 
26 #include <string.h>
27 #include <stdio.h>
28 #include <errno.h>
29 
30 #include "wsutil/pint.h"
31 #include "wsutil/sign_ext.h"
32 #include "wsutil/unicode-utils.h"
33 #include "wsutil/nstime.h"
34 #include "wsutil/time_util.h"
35 #include <wsutil/ws_assert.h>
36 #include "tvbuff.h"
37 #include "tvbuff-int.h"
38 #include "strutil.h"
39 #include "to_str.h"
40 #include "charsets.h"
41 #include "proto.h"	/* XXX - only used for DISSECTOR_ASSERT, probably a new header file? */
42 #include "exceptions.h"
43 
44 /*
45  * Just make sure we include the prototype for strptime as well
46  * (needed for glibc 2.2) but make sure we do this only if not
47  * yet defined.
48  */
49 #include <time.h>
50 /*#ifndef HAVE_STRPTIME*/
51 #ifndef strptime
52 #include "wsutil/strptime.h"
53 #endif
54  /*#endif*/
55 
56 static guint64
57 _tvb_get_bits64(tvbuff_t *tvb, guint bit_offset, const gint total_no_of_bits);
58 
59 static guint64
60 _tvb_get_bits64_le(tvbuff_t *tvb, guint bit_offset, const gint total_no_of_bits);
61 
62 static inline gint
63 _tvb_captured_length_remaining(const tvbuff_t *tvb, const gint offset);
64 
65 static inline const guint8*
66 ensure_contiguous(tvbuff_t *tvb, const gint offset, const gint length);
67 
68 static inline guint8 *
69 tvb_get_raw_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint length);
70 
71 tvbuff_t *
tvb_new(const struct tvb_ops * ops)72 tvb_new(const struct tvb_ops *ops)
73 {
74 	tvbuff_t *tvb;
75 	gsize     size = ops->tvb_size;
76 
77 	ws_assert(size >= sizeof(*tvb));
78 
79 	tvb = (tvbuff_t *) g_slice_alloc(size);
80 
81 	tvb->next		 = NULL;
82 	tvb->ops		 = ops;
83 	tvb->initialized	 = FALSE;
84 	tvb->flags		 = 0;
85 	tvb->length		 = 0;
86 	tvb->reported_length	 = 0;
87 	tvb->contained_length	 = 0;
88 	tvb->real_data		 = NULL;
89 	tvb->raw_offset		 = -1;
90 	tvb->ds_tvb		 = NULL;
91 
92 	return tvb;
93 }
94 
95 static void
tvb_free_internal(tvbuff_t * tvb)96 tvb_free_internal(tvbuff_t *tvb)
97 {
98 	gsize     size;
99 
100 	DISSECTOR_ASSERT(tvb);
101 
102 	if (tvb->ops->tvb_free)
103 		tvb->ops->tvb_free(tvb);
104 
105 	size = tvb->ops->tvb_size;
106 
107 	g_slice_free1(size, tvb);
108 }
109 
110 /* XXX: just call tvb_free_chain();
111  *      Not removed so that existing dissectors using tvb_free() need not be changed.
112  *      I'd argue that existing calls to tvb_free() should have actually beeen
113  *      calls to tvb_free_chain() although the calls were OK as long as no
114  *      subsets, etc had been created on the tvb. */
115 void
tvb_free(tvbuff_t * tvb)116 tvb_free(tvbuff_t *tvb)
117 {
118 	tvb_free_chain(tvb);
119 }
120 
121 void
tvb_free_chain(tvbuff_t * tvb)122 tvb_free_chain(tvbuff_t  *tvb)
123 {
124 	tvbuff_t *next_tvb;
125 	DISSECTOR_ASSERT(tvb);
126 	while (tvb) {
127 		next_tvb = tvb->next;
128 		tvb_free_internal(tvb);
129 		tvb  = next_tvb;
130 	}
131 }
132 
133 tvbuff_t *
tvb_new_chain(tvbuff_t * parent,tvbuff_t * backing)134 tvb_new_chain(tvbuff_t *parent, tvbuff_t *backing)
135 {
136 	tvbuff_t *tvb = tvb_new_proxy(backing);
137 
138 	tvb_add_to_chain(parent, tvb);
139 	return tvb;
140 }
141 
142 void
tvb_add_to_chain(tvbuff_t * parent,tvbuff_t * child)143 tvb_add_to_chain(tvbuff_t *parent, tvbuff_t *child)
144 {
145 	tvbuff_t *tmp = child;
146 
147 	DISSECTOR_ASSERT(parent);
148 	DISSECTOR_ASSERT(child);
149 
150 	while (child) {
151 		tmp   = child;
152 		child = child->next;
153 
154 		tmp->next    = parent->next;
155 		parent->next = tmp;
156 	}
157 }
158 
159 /*
160  * Check whether that offset goes more than one byte past the
161  * end of the buffer.
162  *
163  * If not, return 0; otherwise, return exception
164  */
165 static inline int
validate_offset(const tvbuff_t * tvb,const guint abs_offset)166 validate_offset(const tvbuff_t *tvb, const guint abs_offset)
167 {
168 	if (G_LIKELY(abs_offset <= tvb->length)) {
169 		/* It's OK. */
170 		return 0;
171 	}
172 
173 	/*
174 	 * It's not OK, but why?  Which boundaries is it
175 	 * past?
176 	 */
177 	if (abs_offset <= tvb->contained_length) {
178 		/*
179 		 * It's past the captured length, but not past
180 		 * the reported end of any parent tvbuffs from
181 		 * which this is constructed, or the reported
182 		 * end of this tvbuff, so it's out of bounds
183 		 * solely because we're past the end of the
184 		 * captured data.
185 		 */
186 		return BoundsError;
187 	}
188 
189 	/*
190 	 * There's some actual packet boundary, not just the
191 	 * artificial boundary imposed by packet slicing, that
192 	 * we're past.
193 	 */
194 	if (abs_offset <= tvb->reported_length) {
195 		/*
196 		 * We're within the bounds of what this tvbuff
197 		 * purportedly contains, based on some length
198 		 * value, but we're not within the bounds of
199 		 * something from which this tvbuff was
200 		 * extracted, so that length value ran past
201 		 * the end of some parent tvbuff.
202 		 */
203 		return ContainedBoundsError;
204 	}
205 
206 	/*
207 	 * OK, we're past the bounds of what this tvbuff
208 	 * purportedly contains.
209 	 */
210 	if (tvb->flags & TVBUFF_FRAGMENT) {
211 		/*
212 		 * This tvbuff is the first fragment of a larger
213 		 * packet that hasn't been reassembled, so we
214 		 * assume that's the source of the prblem - if
215 		 * we'd reassembled the packet, we wouldn't
216 		 * have gone past the end.
217 		 *
218 		 * That might not be true, but for at least
219 		 * some forms of reassembly, such as IP
220 		 * reassembly, you don't know how big the
221 		 * reassembled packet is unless you reassemble
222 		 * it, so, in those cases, we can't determine
223 		 * whether we would have gone past the end
224 		 * had we reassembled the packet.
225 		 */
226 		return FragmentBoundsError;
227 	}
228 
229 	/*
230 	 * OK, it looks as if we ran past the claimed length
231 	 * of data.
232 	 */
233 	return ReportedBoundsError;
234 }
235 
236 static inline int
compute_offset(const tvbuff_t * tvb,const gint offset,guint * offset_ptr)237 compute_offset(const tvbuff_t *tvb, const gint offset, guint *offset_ptr)
238 {
239 	if (offset >= 0) {
240 		/* Positive offset - relative to the beginning of the packet. */
241 		if (G_LIKELY((guint) offset <= tvb->length)) {
242 			*offset_ptr = offset;
243 		} else if ((guint) offset <= tvb->contained_length) {
244 			return BoundsError;
245 		} else if ((guint) offset <= tvb->reported_length) {
246 			return ContainedBoundsError;
247 		} else if (tvb->flags & TVBUFF_FRAGMENT) {
248 			return FragmentBoundsError;
249 		} else {
250 			return ReportedBoundsError;
251 		}
252 	}
253 	else {
254 		/* Negative offset - relative to the end of the packet. */
255 		if (G_LIKELY((guint) -offset <= tvb->length)) {
256 			*offset_ptr = tvb->length + offset;
257 		} else if ((guint) -offset <= tvb->contained_length) {
258 			return BoundsError;
259 		} else if ((guint) -offset <= tvb->reported_length) {
260 			return ContainedBoundsError;
261 		} else if (tvb->flags & TVBUFF_FRAGMENT) {
262 			return FragmentBoundsError;
263 		} else {
264 			return ReportedBoundsError;
265 		}
266 	}
267 
268 	return 0;
269 }
270 
271 static inline int
compute_offset_and_remaining(const tvbuff_t * tvb,const gint offset,guint * offset_ptr,guint * rem_len)272 compute_offset_and_remaining(const tvbuff_t *tvb, const gint offset, guint *offset_ptr, guint *rem_len)
273 {
274 	int exception;
275 
276 	exception = compute_offset(tvb, offset, offset_ptr);
277 	if (!exception)
278 		*rem_len = tvb->length - *offset_ptr;
279 
280 	return exception;
281 }
282 
283 /* Computes the absolute offset and length based on a possibly-negative offset
284  * and a length that is possible -1 (which means "to the end of the data").
285  * Returns integer indicating whether the offset is in bounds (0) or
286  * not (exception number). The integer ptrs are modified with the new offset,
287  * captured (available) length, and contained length (amount that's present
288  * in the parent tvbuff based on its reported length).
289  * No exception is thrown; on success, we return 0, otherwise we return an
290  * exception for the caller to throw if appropriate.
291  *
292  * XXX - we return success (0), if the offset is positive and right
293  * after the end of the tvbuff (i.e., equal to the length).  We do this
294  * so that a dissector constructing a subset tvbuff for the next protocol
295  * will get a zero-length tvbuff, not an exception, if there's no data
296  * left for the next protocol - we want the next protocol to be the one
297  * that gets an exception, so the error is reported as an error in that
298  * protocol rather than the containing protocol.  */
299 static inline int
check_offset_length_no_exception(const tvbuff_t * tvb,const gint offset,gint const length_val,guint * offset_ptr,guint * length_ptr)300 check_offset_length_no_exception(const tvbuff_t *tvb,
301 				 const gint offset, gint const length_val,
302 				 guint *offset_ptr, guint *length_ptr)
303 {
304 	guint end_offset;
305 	int   exception;
306 
307 	DISSECTOR_ASSERT(offset_ptr);
308 	DISSECTOR_ASSERT(length_ptr);
309 
310 	/* Compute the offset */
311 	exception = compute_offset(tvb, offset, offset_ptr);
312 	if (exception)
313 		return exception;
314 
315 	if (length_val < -1) {
316 		/* XXX - ReportedBoundsError? */
317 		return BoundsError;
318 	}
319 
320 	/* Compute the length */
321 	if (length_val == -1)
322 		*length_ptr = tvb->length - *offset_ptr;
323 	else
324 		*length_ptr = length_val;
325 
326 	/*
327 	 * Compute the offset of the first byte past the length.
328 	 */
329 	end_offset = *offset_ptr + *length_ptr;
330 
331 	/*
332 	 * Check for an overflow
333 	 */
334 	if (end_offset < *offset_ptr)
335 		return BoundsError;
336 
337 	return validate_offset(tvb, end_offset);
338 }
339 
340 /* Checks (+/-) offset and length and throws an exception if
341  * either is out of bounds. Sets integer ptrs to the new offset
342  * and length. */
343 static inline void
check_offset_length(const tvbuff_t * tvb,const gint offset,gint const length_val,guint * offset_ptr,guint * length_ptr)344 check_offset_length(const tvbuff_t *tvb,
345 		    const gint offset, gint const length_val,
346 		    guint *offset_ptr, guint *length_ptr)
347 {
348 	int exception;
349 
350 	exception = check_offset_length_no_exception(tvb, offset, length_val, offset_ptr, length_ptr);
351 	if (exception)
352 		THROW(exception);
353 }
354 
355 void
tvb_check_offset_length(const tvbuff_t * tvb,const gint offset,gint const length_val,guint * offset_ptr,guint * length_ptr)356 tvb_check_offset_length(const tvbuff_t *tvb,
357 		        const gint offset, gint const length_val,
358 		        guint *offset_ptr, guint *length_ptr)
359 {
360 	check_offset_length(tvb, offset, length_val, offset_ptr, length_ptr);
361 }
362 
363 static const unsigned char left_aligned_bitmask[] = {
364 	0xff,
365 	0x80,
366 	0xc0,
367 	0xe0,
368 	0xf0,
369 	0xf8,
370 	0xfc,
371 	0xfe
372 };
373 
374 tvbuff_t *
tvb_new_octet_aligned(tvbuff_t * tvb,guint32 bit_offset,gint32 no_of_bits)375 tvb_new_octet_aligned(tvbuff_t *tvb, guint32 bit_offset, gint32 no_of_bits)
376 {
377 	tvbuff_t     *sub_tvb = NULL;
378 	guint32       byte_offset;
379 	gint32        datalen, i;
380 	guint8        left, right, remaining_bits, *buf;
381 	const guint8 *data;
382 
383 	DISSECTOR_ASSERT(tvb && tvb->initialized);
384 
385 	byte_offset = bit_offset >> 3;
386 	left = bit_offset % 8; /* for left-shifting */
387 	right = 8 - left; /* for right-shifting */
388 
389 	if (no_of_bits == -1) {
390 		datalen = _tvb_captured_length_remaining(tvb, byte_offset);
391 		remaining_bits = 0;
392 	} else {
393 		datalen = no_of_bits >> 3;
394 		remaining_bits = no_of_bits % 8;
395 		if (remaining_bits) {
396 			datalen++;
397 		}
398 	}
399 
400 	/* already aligned -> shortcut */
401 	if ((left == 0) && (remaining_bits == 0)) {
402 		return tvb_new_subset_length_caplen(tvb, byte_offset, datalen, datalen);
403 	}
404 
405 	DISSECTOR_ASSERT(datalen>0);
406 
407 	/* if at least one trailing byte is available, we must use the content
408 	* of that byte for the last shift (i.e. tvb_get_ptr() must use datalen + 1
409 	* if non extra byte is available, the last shifted byte requires
410 	* special treatment
411 	*/
412 	if (_tvb_captured_length_remaining(tvb, byte_offset) > datalen) {
413 		data = ensure_contiguous(tvb, byte_offset, datalen + 1); /* tvb_get_ptr */
414 
415 		/* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
416 		buf = (guint8 *)g_malloc(datalen);
417 
418 		/* shift tvb data bit_offset bits to the left */
419 		for (i = 0; i < datalen; i++)
420 			buf[i] = (data[i] << left) | (data[i+1] >> right);
421 	} else {
422 		data = ensure_contiguous(tvb, byte_offset, datalen); /* tvb_get_ptr() */
423 
424 		/* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
425 		buf = (guint8 *)g_malloc(datalen);
426 
427 		/* shift tvb data bit_offset bits to the left */
428 		for (i = 0; i < (datalen-1); i++)
429 			buf[i] = (data[i] << left) | (data[i+1] >> right);
430 		buf[datalen-1] = data[datalen-1] << left; /* set last octet */
431 	}
432 	buf[datalen-1] &= left_aligned_bitmask[remaining_bits];
433 
434 	sub_tvb = tvb_new_child_real_data(tvb, buf, datalen, datalen);
435 	tvb_set_free_cb(sub_tvb, g_free);
436 
437 	return sub_tvb;
438 }
439 
440 tvbuff_t *
tvb_new_octet_right_aligned(tvbuff_t * tvb,guint32 bit_offset,gint32 no_of_bits)441 tvb_new_octet_right_aligned(tvbuff_t *tvb, guint32 bit_offset, gint32 no_of_bits)
442 {
443 	tvbuff_t     *sub_tvb = NULL;
444 	guint32       byte_offset;
445 	gint          src_len, dst_len, i;
446 	guint8        left, right, remaining_bits, *buf;
447 	const guint8 *data;
448 
449 	DISSECTOR_ASSERT(tvb && tvb->initialized);
450 
451 	byte_offset = bit_offset / 8;
452 	/* right shift to put bits in place and discard least significant bits */
453 	right = bit_offset % 8;
454 	/* left shift to get most significant bits from next octet */
455 	left = 8 - right;
456 
457 	if (no_of_bits == -1) {
458 		dst_len = _tvb_captured_length_remaining(tvb, byte_offset);
459 		remaining_bits = 0;
460 	} else {
461 		dst_len = no_of_bits / 8;
462 		remaining_bits = no_of_bits % 8;
463 		if (remaining_bits) {
464 			dst_len++;
465 		}
466 	}
467 
468 	/* already aligned -> shortcut */
469 	if ((right == 0) && (remaining_bits == 0)) {
470 		return tvb_new_subset_length_caplen(tvb, byte_offset, dst_len, dst_len);
471 	}
472 
473 	DISSECTOR_ASSERT(dst_len>0);
474 
475 	if (_tvb_captured_length_remaining(tvb, byte_offset) > dst_len) {
476 		/* last octet will get data from trailing octet */
477 		src_len = dst_len + 1;
478 	} else {
479 		/* last octet will be zero padded */
480 		src_len = dst_len;
481 	}
482 
483 	data = ensure_contiguous(tvb, byte_offset, src_len); /* tvb_get_ptr */
484 
485 	/* Do this allocation AFTER tvb_get_ptr() (which could throw an exception) */
486 	buf = (guint8 *)g_malloc(dst_len);
487 
488 	for (i = 0; i < (dst_len - 1); i++)
489 		buf[i] = (data[i] >> right) | (data[i+1] << left);
490 
491 	/* Special handling for last octet */
492 	buf[i] = (data[i] >> right);
493 	/* Shift most significant bits from trailing octet if available */
494 	if (src_len > dst_len)
495 		buf[i] |= (data[i+1] << left);
496 	/* Preserve only remaining bits in last octet if not multiple of 8 */
497 	if (remaining_bits)
498 		buf[i] &= ((1 << remaining_bits) - 1);
499 
500 	sub_tvb = tvb_new_child_real_data(tvb, buf, dst_len, dst_len);
501 	tvb_set_free_cb(sub_tvb, g_free);
502 
503 	return sub_tvb;
504 }
505 
506 static tvbuff_t *
tvb_generic_clone_offset_len(tvbuff_t * tvb,guint offset,guint len)507 tvb_generic_clone_offset_len(tvbuff_t *tvb, guint offset, guint len)
508 {
509 	tvbuff_t *cloned_tvb;
510 	guint8 *data;
511 
512 	DISSECTOR_ASSERT(tvb_bytes_exist(tvb, offset, len));
513 
514 	data = (guint8 *) g_malloc(len);
515 
516 	tvb_memcpy(tvb, data, offset, len);
517 
518 	cloned_tvb = tvb_new_real_data(data, len, len);
519 	tvb_set_free_cb(cloned_tvb, g_free);
520 
521 	return cloned_tvb;
522 }
523 
524 tvbuff_t *
tvb_clone_offset_len(tvbuff_t * tvb,guint offset,guint len)525 tvb_clone_offset_len(tvbuff_t *tvb, guint offset, guint len)
526 {
527 	if (tvb->ops->tvb_clone) {
528 		tvbuff_t *cloned_tvb;
529 
530 		cloned_tvb = tvb->ops->tvb_clone(tvb, offset, len);
531 		if (cloned_tvb)
532 			return cloned_tvb;
533 	}
534 
535 	return tvb_generic_clone_offset_len(tvb, offset, len);
536 }
537 
538 tvbuff_t *
tvb_clone(tvbuff_t * tvb)539 tvb_clone(tvbuff_t *tvb)
540 {
541 	return tvb_clone_offset_len(tvb, 0, tvb->length);
542 }
543 
544 guint
tvb_captured_length(const tvbuff_t * tvb)545 tvb_captured_length(const tvbuff_t *tvb)
546 {
547 	DISSECTOR_ASSERT(tvb && tvb->initialized);
548 
549 	return tvb->length;
550 }
551 
552 /* For tvbuff internal use */
553 static inline gint
_tvb_captured_length_remaining(const tvbuff_t * tvb,const gint offset)554 _tvb_captured_length_remaining(const tvbuff_t *tvb, const gint offset)
555 {
556 	guint abs_offset = 0, rem_length;
557 	int   exception;
558 
559 	exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
560 	if (exception)
561 		return 0;
562 
563 	return rem_length;
564 }
565 
566 gint
tvb_captured_length_remaining(const tvbuff_t * tvb,const gint offset)567 tvb_captured_length_remaining(const tvbuff_t *tvb, const gint offset)
568 {
569 	guint abs_offset = 0, rem_length;
570 	int   exception;
571 
572 	DISSECTOR_ASSERT(tvb && tvb->initialized);
573 
574 	exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
575 	if (exception)
576 		return 0;
577 
578 	return rem_length;
579 }
580 
581 guint
tvb_ensure_captured_length_remaining(const tvbuff_t * tvb,const gint offset)582 tvb_ensure_captured_length_remaining(const tvbuff_t *tvb, const gint offset)
583 {
584 	guint abs_offset = 0, rem_length = 0;
585 	int   exception;
586 
587 	DISSECTOR_ASSERT(tvb && tvb->initialized);
588 
589 	exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &rem_length);
590 	if (exception)
591 		THROW(exception);
592 
593 	if (rem_length == 0) {
594 		/*
595 		 * This routine ensures there's at least one byte available.
596 		 * There aren't any bytes available, so throw the appropriate
597 		 * exception.
598 		 */
599 		if (abs_offset < tvb->contained_length) {
600 			THROW(BoundsError);
601 		} else if (abs_offset < tvb->reported_length) {
602 			THROW(ContainedBoundsError);
603 		} else if (tvb->flags & TVBUFF_FRAGMENT) {
604 			THROW(FragmentBoundsError);
605 		} else {
606 			THROW(ReportedBoundsError);
607 		}
608 	}
609 	return rem_length;
610 }
611 
612 /* Validates that 'length' bytes are available starting from
613  * offset (pos/neg). Does not throw an exception. */
614 gboolean
tvb_bytes_exist(const tvbuff_t * tvb,const gint offset,const gint length)615 tvb_bytes_exist(const tvbuff_t *tvb, const gint offset, const gint length)
616 {
617 	guint abs_offset = 0, abs_length;
618 	int   exception;
619 
620 	DISSECTOR_ASSERT(tvb && tvb->initialized);
621 
622 	/*
623 	 * Negative lengths are not possible and indicate a bug (e.g. arithmetic
624 	 * error or an overly large value from packet data).
625 	 */
626 	if (length < 0)
627 		return FALSE;
628 
629 	exception = check_offset_length_no_exception(tvb, offset, length, &abs_offset, &abs_length);
630 	if (exception)
631 		return FALSE;
632 
633 	return TRUE;
634 }
635 
636 /* Validates that 'length' bytes, where 'length' is a 64-bit unsigned
637  * integer, are available starting from offset (pos/neg). Throws an
638  * exception if they aren't. */
639 void
tvb_ensure_bytes_exist64(const tvbuff_t * tvb,const gint offset,const guint64 length)640 tvb_ensure_bytes_exist64(const tvbuff_t *tvb, const gint offset, const guint64 length)
641 {
642 	/*
643 	 * Make sure the value fits in a signed integer; if not, assume
644 	 * that means that it's too big.
645 	 */
646 	if (length > G_MAXINT) {
647 		THROW(ReportedBoundsError);
648 	}
649 
650 	/* OK, now cast it and try it with tvb_ensure_bytes_exist(). */
651 	tvb_ensure_bytes_exist(tvb, offset, (gint)length);
652 }
653 
654 /* Validates that 'length' bytes are available starting from
655  * offset (pos/neg). Throws an exception if they aren't. */
656 void
tvb_ensure_bytes_exist(const tvbuff_t * tvb,const gint offset,const gint length)657 tvb_ensure_bytes_exist(const tvbuff_t *tvb, const gint offset, const gint length)
658 {
659 	guint real_offset, end_offset;
660 
661 	DISSECTOR_ASSERT(tvb && tvb->initialized);
662 
663 	/*
664 	 * -1 doesn't mean "until end of buffer", as that's pointless
665 	 * for this routine.  We must treat it as a Really Large Positive
666 	 * Number, so that we throw an exception; we throw
667 	 * ReportedBoundsError, as if it were past even the end of a
668 	 * reassembled packet, and past the end of even the data we
669 	 * didn't capture.
670 	 *
671 	 * We do the same with other negative lengths.
672 	 */
673 	if (length < 0) {
674 		THROW(ReportedBoundsError);
675 	}
676 
677 	/* XXX: Below this point could be replaced with a call to
678 	 * check_offset_length with no functional change, however this is a
679 	 * *very* hot path and check_offset_length is not well-optimized for
680 	 * this case, so we eat some code duplication for a lot of speedup. */
681 
682 	if (offset >= 0) {
683 		/* Positive offset - relative to the beginning of the packet. */
684 		if (G_LIKELY((guint) offset <= tvb->length)) {
685 			real_offset = offset;
686 		} else if ((guint) offset <= tvb->contained_length) {
687 			THROW(BoundsError);
688 		} else if ((guint) offset <= tvb->reported_length) {
689 			THROW(ContainedBoundsError);
690 		} else if (tvb->flags & TVBUFF_FRAGMENT) {
691 			THROW(FragmentBoundsError);
692 		} else {
693 			THROW(ReportedBoundsError);
694 		}
695 	}
696 	else {
697 		/* Negative offset - relative to the end of the packet. */
698 		if (G_LIKELY((guint) -offset <= tvb->length)) {
699 			real_offset = tvb->length + offset;
700 		} else if ((guint) -offset <= tvb->contained_length) {
701 			THROW(BoundsError);
702 		} else if ((guint) -offset <= tvb->reported_length) {
703 			THROW(ContainedBoundsError);
704 		} else if (tvb->flags & TVBUFF_FRAGMENT) {
705 			THROW(FragmentBoundsError);
706 		} else {
707 			THROW(ReportedBoundsError);
708 		}
709 	}
710 
711 	/*
712 	 * Compute the offset of the first byte past the length.
713 	 */
714 	end_offset = real_offset + length;
715 
716 	/*
717 	 * Check for an overflow
718 	 */
719 	if (end_offset < real_offset)
720 		THROW(BoundsError);
721 
722 	if (G_LIKELY(end_offset <= tvb->length))
723 		return;
724 	else if (end_offset <= tvb->contained_length)
725 		THROW(BoundsError);
726 	else if (end_offset <= tvb->reported_length)
727 		THROW(ContainedBoundsError);
728 	else if (tvb->flags & TVBUFF_FRAGMENT)
729 		THROW(FragmentBoundsError);
730 	else
731 		THROW(ReportedBoundsError);
732 }
733 
734 gboolean
tvb_offset_exists(const tvbuff_t * tvb,const gint offset)735 tvb_offset_exists(const tvbuff_t *tvb, const gint offset)
736 {
737 	guint abs_offset = 0;
738 	int   exception;
739 
740 	DISSECTOR_ASSERT(tvb && tvb->initialized);
741 
742 	exception = compute_offset(tvb, offset, &abs_offset);
743 	if (exception)
744 		return FALSE;
745 
746 	/* compute_offset only throws an exception on >, not >= because of the
747 	 * comment above check_offset_length_no_exception, but here we want the
748 	 * opposite behaviour so we check ourselves... */
749 	if (abs_offset < tvb->length) {
750 		return TRUE;
751 	}
752 	else {
753 		return FALSE;
754 	}
755 }
756 
757 guint
tvb_reported_length(const tvbuff_t * tvb)758 tvb_reported_length(const tvbuff_t *tvb)
759 {
760 	DISSECTOR_ASSERT(tvb && tvb->initialized);
761 
762 	return tvb->reported_length;
763 }
764 
765 gint
tvb_reported_length_remaining(const tvbuff_t * tvb,const gint offset)766 tvb_reported_length_remaining(const tvbuff_t *tvb, const gint offset)
767 {
768 	guint abs_offset = 0;
769 	int   exception;
770 
771 	DISSECTOR_ASSERT(tvb && tvb->initialized);
772 
773 	exception = compute_offset(tvb, offset, &abs_offset);
774 	if (exception)
775 		return 0;
776 
777 	if (tvb->reported_length >= abs_offset)
778 		return tvb->reported_length - abs_offset;
779 	else
780 		return 0;
781 }
782 
783 guint
tvb_ensure_reported_length_remaining(const tvbuff_t * tvb,const gint offset)784 tvb_ensure_reported_length_remaining(const tvbuff_t *tvb, const gint offset)
785 {
786 	guint abs_offset = 0;
787 	int   exception;
788 
789 	DISSECTOR_ASSERT(tvb && tvb->initialized);
790 
791 	exception = compute_offset(tvb, offset, &abs_offset);
792 	if (exception)
793 		THROW(exception);
794 
795 	if (tvb->reported_length >= abs_offset)
796 		return tvb->reported_length - abs_offset;
797 	else
798 		THROW(ReportedBoundsError);
799 }
800 
801 /* Set the reported length of a tvbuff to a given value; used for protocols
802  * whose headers contain an explicit length and where the calling
803  * dissector's payload may include padding as well as the packet for
804  * this protocol.
805  * Also adjusts the available and contained length. */
806 void
tvb_set_reported_length(tvbuff_t * tvb,const guint reported_length)807 tvb_set_reported_length(tvbuff_t *tvb, const guint reported_length)
808 {
809 	DISSECTOR_ASSERT(tvb && tvb->initialized);
810 
811 	if (reported_length > tvb->reported_length)
812 		THROW(ReportedBoundsError);
813 
814 	tvb->reported_length = reported_length;
815 	if (reported_length < tvb->length)
816 		tvb->length = reported_length;
817 	if (reported_length < tvb->contained_length)
818 		tvb->contained_length = reported_length;
819 }
820 
821 guint
tvb_offset_from_real_beginning_counter(const tvbuff_t * tvb,const guint counter)822 tvb_offset_from_real_beginning_counter(const tvbuff_t *tvb, const guint counter)
823 {
824 	if (tvb->ops->tvb_offset)
825 		return tvb->ops->tvb_offset(tvb, counter);
826 
827 	DISSECTOR_ASSERT_NOT_REACHED();
828 	return 0;
829 }
830 
831 guint
tvb_offset_from_real_beginning(const tvbuff_t * tvb)832 tvb_offset_from_real_beginning(const tvbuff_t *tvb)
833 {
834 	return tvb_offset_from_real_beginning_counter(tvb, 0);
835 }
836 
837 static inline const guint8*
ensure_contiguous_no_exception(tvbuff_t * tvb,const gint offset,const gint length,int * pexception)838 ensure_contiguous_no_exception(tvbuff_t *tvb, const gint offset, const gint length, int *pexception)
839 {
840 	guint abs_offset = 0, abs_length = 0;
841 	int   exception;
842 
843 	exception = check_offset_length_no_exception(tvb, offset, length, &abs_offset, &abs_length);
844 	if (exception) {
845 		if (pexception)
846 			*pexception = exception;
847 		return NULL;
848 	}
849 
850 	/*
851 	 * Special case: if the caller (e.g. tvb_get_ptr) requested no data,
852 	 * then it is acceptable to have an empty tvb (!tvb->real_data).
853 	 */
854 	if (length == 0) {
855 		return NULL;
856 	}
857 
858 	/*
859 	 * We know that all the data is present in the tvbuff, so
860 	 * no exceptions should be thrown.
861 	 */
862 	if (tvb->real_data)
863 		return tvb->real_data + abs_offset;
864 
865 	if (tvb->ops->tvb_get_ptr)
866 		return tvb->ops->tvb_get_ptr(tvb, abs_offset, abs_length);
867 
868 	DISSECTOR_ASSERT_NOT_REACHED();
869 	return NULL;
870 }
871 
872 static inline const guint8*
ensure_contiguous(tvbuff_t * tvb,const gint offset,const gint length)873 ensure_contiguous(tvbuff_t *tvb, const gint offset, const gint length)
874 {
875 	int           exception = 0;
876 	const guint8 *p;
877 
878 	p = ensure_contiguous_no_exception(tvb, offset, length, &exception);
879 	if (p == NULL && length != 0) {
880 		DISSECTOR_ASSERT(exception > 0);
881 		THROW(exception);
882 	}
883 	return p;
884 }
885 
886 static inline const guint8*
fast_ensure_contiguous(tvbuff_t * tvb,const gint offset,const guint length)887 fast_ensure_contiguous(tvbuff_t *tvb, const gint offset, const guint length)
888 {
889 	guint end_offset;
890 	guint u_offset;
891 
892 	DISSECTOR_ASSERT(tvb && tvb->initialized);
893 	/* We don't check for overflow in this fast path so we only handle simple types */
894 	DISSECTOR_ASSERT(length <= 8);
895 
896 	if (offset < 0 || !tvb->real_data) {
897 		return ensure_contiguous(tvb, offset, length);
898 	}
899 
900 	u_offset = offset;
901 	end_offset = u_offset + length;
902 
903 	if (G_LIKELY(end_offset <= tvb->length)) {
904 		return tvb->real_data + u_offset;
905 	} else if (end_offset <= tvb->contained_length) {
906 		THROW(BoundsError);
907 	} else if (end_offset <= tvb->reported_length) {
908 		THROW(ContainedBoundsError);
909 	} else if (tvb->flags & TVBUFF_FRAGMENT) {
910 		THROW(FragmentBoundsError);
911 	} else {
912 		THROW(ReportedBoundsError);
913 	}
914 	/* not reached */
915 	return NULL;
916 }
917 
918 
919 
920 /************** ACCESSORS **************/
921 
922 void *
tvb_memcpy(tvbuff_t * tvb,void * target,const gint offset,size_t length)923 tvb_memcpy(tvbuff_t *tvb, void *target, const gint offset, size_t length)
924 {
925 	guint	abs_offset = 0, abs_length = 0;
926 
927 	DISSECTOR_ASSERT(tvb && tvb->initialized);
928 
929 	/*
930 	 * XXX - we should eliminate the "length = -1 means 'to the end
931 	 * of the tvbuff'" convention, and use other means to achieve
932 	 * that; this would let us eliminate a bunch of checks for
933 	 * negative lengths in cases where the protocol has a 32-bit
934 	 * length field.
935 	 *
936 	 * Allowing -1 but throwing an assertion on other negative
937 	 * lengths is a bit more work with the length being a size_t;
938 	 * instead, we check for a length <= 2^31-1.
939 	 */
940 	DISSECTOR_ASSERT(length <= 0x7FFFFFFF);
941 	check_offset_length(tvb, offset, (gint) length, &abs_offset, &abs_length);
942 
943 	if (tvb->real_data) {
944 		return memcpy(target, tvb->real_data + abs_offset, abs_length);
945 	}
946 
947 	if (tvb->ops->tvb_memcpy)
948 		return tvb->ops->tvb_memcpy(tvb, target, abs_offset, abs_length);
949 
950 	/*
951 	 * If the length is 0, there's nothing to do.
952 	 * (tvb->real_data could be null if it's allocated with
953 	 * a size of length.)
954 	 */
955 	if (length != 0) {
956 		/*
957 		 * XXX, fallback to slower method
958 		 */
959 		DISSECTOR_ASSERT_NOT_REACHED();
960 	}
961 	return NULL;
962 }
963 
964 
965 /*
966  * XXX - this doesn't treat a length of -1 as an error.
967  * If it did, this could replace some code that calls
968  * "tvb_ensure_bytes_exist()" and then allocates a buffer and copies
969  * data to it.
970  *
971  * "composite_get_ptr()" depends on -1 not being
972  * an error; does anything else depend on this routine treating -1 as
973  * meaning "to the end of the buffer"?
974  *
975  * If scope is NULL, memory is allocated with g_malloc() and user must
976  * explicitly free it with g_free().
977  * If scope is not NULL, memory is allocated with the corresponding pool
978  * lifetime.
979  */
980 void *
tvb_memdup(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,size_t length)981 tvb_memdup(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, size_t length)
982 {
983 	guint  abs_offset = 0, abs_length = 0;
984 	void  *duped;
985 
986 	DISSECTOR_ASSERT(tvb && tvb->initialized);
987 
988 	check_offset_length(tvb, offset, (gint) length, &abs_offset, &abs_length);
989 
990 	duped = wmem_alloc(scope, abs_length);
991 	return tvb_memcpy(tvb, duped, abs_offset, abs_length);
992 }
993 
994 
995 
996 const guint8*
tvb_get_ptr(tvbuff_t * tvb,const gint offset,const gint length)997 tvb_get_ptr(tvbuff_t *tvb, const gint offset, const gint length)
998 {
999 	return ensure_contiguous(tvb, offset, length);
1000 }
1001 
1002 /* ---------------- */
1003 guint8
tvb_get_guint8(tvbuff_t * tvb,const gint offset)1004 tvb_get_guint8(tvbuff_t *tvb, const gint offset)
1005 {
1006 	const guint8 *ptr;
1007 
1008 	ptr = fast_ensure_contiguous(tvb, offset, 1);
1009 	return *ptr;
1010 }
1011 
1012 gint8
tvb_get_gint8(tvbuff_t * tvb,const gint offset)1013 tvb_get_gint8(tvbuff_t *tvb, const gint offset)
1014 {
1015 	const guint8 *ptr;
1016 
1017 	ptr = fast_ensure_contiguous(tvb, offset, 1);
1018 	return *ptr;
1019 }
1020 
1021 guint16
tvb_get_ntohs(tvbuff_t * tvb,const gint offset)1022 tvb_get_ntohs(tvbuff_t *tvb, const gint offset)
1023 {
1024 	const guint8 *ptr;
1025 
1026 	ptr = fast_ensure_contiguous(tvb, offset, 2);
1027 	return pntoh16(ptr);
1028 }
1029 
1030 gint16
tvb_get_ntohis(tvbuff_t * tvb,const gint offset)1031 tvb_get_ntohis(tvbuff_t *tvb, const gint offset)
1032 {
1033 	const guint8 *ptr;
1034 
1035 	ptr = fast_ensure_contiguous(tvb, offset, 2);
1036 	return pntoh16(ptr);
1037 }
1038 
1039 guint32
tvb_get_ntoh24(tvbuff_t * tvb,const gint offset)1040 tvb_get_ntoh24(tvbuff_t *tvb, const gint offset)
1041 {
1042 	const guint8 *ptr;
1043 
1044 	ptr = fast_ensure_contiguous(tvb, offset, 3);
1045 	return pntoh24(ptr);
1046 }
1047 
1048 gint32
tvb_get_ntohi24(tvbuff_t * tvb,const gint offset)1049 tvb_get_ntohi24(tvbuff_t *tvb, const gint offset)
1050 {
1051 	guint32 ret;
1052 
1053 	ret = ws_sign_ext32(tvb_get_ntoh24(tvb, offset), 24);
1054 
1055 	return (gint32)ret;
1056 }
1057 
1058 guint32
tvb_get_ntohl(tvbuff_t * tvb,const gint offset)1059 tvb_get_ntohl(tvbuff_t *tvb, const gint offset)
1060 {
1061 	const guint8 *ptr;
1062 
1063 	ptr = fast_ensure_contiguous(tvb, offset, 4);
1064 	return pntoh32(ptr);
1065 }
1066 
1067 gint32
tvb_get_ntohil(tvbuff_t * tvb,const gint offset)1068 tvb_get_ntohil(tvbuff_t *tvb, const gint offset)
1069 {
1070 	const guint8 *ptr;
1071 
1072 	ptr = fast_ensure_contiguous(tvb, offset, 4);
1073 	return pntoh32(ptr);
1074 }
1075 
1076 guint64
tvb_get_ntoh40(tvbuff_t * tvb,const gint offset)1077 tvb_get_ntoh40(tvbuff_t *tvb, const gint offset)
1078 {
1079 	const guint8 *ptr;
1080 
1081 	ptr = fast_ensure_contiguous(tvb, offset, 5);
1082 	return pntoh40(ptr);
1083 }
1084 
1085 gint64
tvb_get_ntohi40(tvbuff_t * tvb,const gint offset)1086 tvb_get_ntohi40(tvbuff_t *tvb, const gint offset)
1087 {
1088 	guint64 ret;
1089 
1090 	ret = ws_sign_ext64(tvb_get_ntoh40(tvb, offset), 40);
1091 
1092 	return (gint64)ret;
1093 }
1094 
1095 guint64
tvb_get_ntoh48(tvbuff_t * tvb,const gint offset)1096 tvb_get_ntoh48(tvbuff_t *tvb, const gint offset)
1097 {
1098 	const guint8 *ptr;
1099 
1100 	ptr = fast_ensure_contiguous(tvb, offset, 6);
1101 	return pntoh48(ptr);
1102 }
1103 
1104 gint64
tvb_get_ntohi48(tvbuff_t * tvb,const gint offset)1105 tvb_get_ntohi48(tvbuff_t *tvb, const gint offset)
1106 {
1107 	guint64 ret;
1108 
1109 	ret = ws_sign_ext64(tvb_get_ntoh48(tvb, offset), 48);
1110 
1111 	return (gint64)ret;
1112 }
1113 
1114 guint64
tvb_get_ntoh56(tvbuff_t * tvb,const gint offset)1115 tvb_get_ntoh56(tvbuff_t *tvb, const gint offset)
1116 {
1117 	const guint8 *ptr;
1118 
1119 	ptr = fast_ensure_contiguous(tvb, offset, 7);
1120 	return pntoh56(ptr);
1121 }
1122 
1123 gint64
tvb_get_ntohi56(tvbuff_t * tvb,const gint offset)1124 tvb_get_ntohi56(tvbuff_t *tvb, const gint offset)
1125 {
1126 	guint64 ret;
1127 
1128 	ret = ws_sign_ext64(tvb_get_ntoh56(tvb, offset), 56);
1129 
1130 	return (gint64)ret;
1131 }
1132 
1133 guint64
tvb_get_ntoh64(tvbuff_t * tvb,const gint offset)1134 tvb_get_ntoh64(tvbuff_t *tvb, const gint offset)
1135 {
1136 	const guint8 *ptr;
1137 
1138 	ptr = fast_ensure_contiguous(tvb, offset, 8);
1139 	return pntoh64(ptr);
1140 }
1141 
1142 gint64
tvb_get_ntohi64(tvbuff_t * tvb,const gint offset)1143 tvb_get_ntohi64(tvbuff_t *tvb, const gint offset)
1144 {
1145 	const guint8 *ptr;
1146 
1147 	ptr = fast_ensure_contiguous(tvb, offset, 8);
1148 	return pntoh64(ptr);
1149 }
1150 
1151 guint16
tvb_get_guint16(tvbuff_t * tvb,const gint offset,const guint encoding)1152 tvb_get_guint16(tvbuff_t *tvb, const gint offset, const guint encoding) {
1153 	if (encoding & ENC_LITTLE_ENDIAN) {
1154 		return tvb_get_letohs(tvb, offset);
1155 	} else {
1156 		return tvb_get_ntohs(tvb, offset);
1157 	}
1158 }
1159 
1160 gint16
tvb_get_gint16(tvbuff_t * tvb,const gint offset,const guint encoding)1161 tvb_get_gint16(tvbuff_t *tvb, const gint offset, const guint encoding) {
1162 	if (encoding & ENC_LITTLE_ENDIAN) {
1163 		return tvb_get_letohis(tvb, offset);
1164 	} else {
1165 		return tvb_get_ntohis(tvb, offset);
1166 	}
1167 }
1168 
1169 guint32
tvb_get_guint24(tvbuff_t * tvb,const gint offset,const guint encoding)1170 tvb_get_guint24(tvbuff_t *tvb, const gint offset, const guint encoding) {
1171 	if (encoding & ENC_LITTLE_ENDIAN) {
1172 		return tvb_get_letoh24(tvb, offset);
1173 	} else {
1174 		return tvb_get_ntoh24(tvb, offset);
1175 	}
1176 }
1177 
1178 gint32
tvb_get_gint24(tvbuff_t * tvb,const gint offset,const guint encoding)1179 tvb_get_gint24(tvbuff_t *tvb, const gint offset, const guint encoding) {
1180 	if (encoding & ENC_LITTLE_ENDIAN) {
1181 		return tvb_get_letohi24(tvb, offset);
1182 	} else {
1183 		return tvb_get_ntohi24(tvb, offset);
1184 	}
1185 }
1186 
1187 guint32
tvb_get_guint32(tvbuff_t * tvb,const gint offset,const guint encoding)1188 tvb_get_guint32(tvbuff_t *tvb, const gint offset, const guint encoding) {
1189 	if (encoding & ENC_LITTLE_ENDIAN) {
1190 		return tvb_get_letohl(tvb, offset);
1191 	} else {
1192 		return tvb_get_ntohl(tvb, offset);
1193 	}
1194 }
1195 
1196 gint32
tvb_get_gint32(tvbuff_t * tvb,const gint offset,const guint encoding)1197 tvb_get_gint32(tvbuff_t *tvb, const gint offset, const guint encoding) {
1198 	if (encoding & ENC_LITTLE_ENDIAN) {
1199 		return tvb_get_letohil(tvb, offset);
1200 	} else {
1201 		return tvb_get_ntohil(tvb, offset);
1202 	}
1203 }
1204 
1205 guint64
tvb_get_guint40(tvbuff_t * tvb,const gint offset,const guint encoding)1206 tvb_get_guint40(tvbuff_t *tvb, const gint offset, const guint encoding) {
1207 	if (encoding & ENC_LITTLE_ENDIAN) {
1208 		return tvb_get_letoh40(tvb, offset);
1209 	} else {
1210 		return tvb_get_ntoh40(tvb, offset);
1211 	}
1212 }
1213 
1214 gint64
tvb_get_gint40(tvbuff_t * tvb,const gint offset,const guint encoding)1215 tvb_get_gint40(tvbuff_t *tvb, const gint offset, const guint encoding) {
1216 	if (encoding & ENC_LITTLE_ENDIAN) {
1217 		return tvb_get_letohi40(tvb, offset);
1218 	} else {
1219 		return tvb_get_ntohi40(tvb, offset);
1220 	}
1221 }
1222 
1223 guint64
tvb_get_guint48(tvbuff_t * tvb,const gint offset,const guint encoding)1224 tvb_get_guint48(tvbuff_t *tvb, const gint offset, const guint encoding) {
1225 	if (encoding & ENC_LITTLE_ENDIAN) {
1226 		return tvb_get_letoh48(tvb, offset);
1227 	} else {
1228 		return tvb_get_ntoh48(tvb, offset);
1229 	}
1230 }
1231 
1232 gint64
tvb_get_gint48(tvbuff_t * tvb,const gint offset,const guint encoding)1233 tvb_get_gint48(tvbuff_t *tvb, const gint offset, const guint encoding) {
1234 	if (encoding & ENC_LITTLE_ENDIAN) {
1235 		return tvb_get_letohi48(tvb, offset);
1236 	} else {
1237 		return tvb_get_ntohi48(tvb, offset);
1238 	}
1239 }
1240 
1241 guint64
tvb_get_guint56(tvbuff_t * tvb,const gint offset,const guint encoding)1242 tvb_get_guint56(tvbuff_t *tvb, const gint offset, const guint encoding) {
1243 	if (encoding & ENC_LITTLE_ENDIAN) {
1244 		return tvb_get_letoh56(tvb, offset);
1245 	} else {
1246 		return tvb_get_ntoh56(tvb, offset);
1247 	}
1248 }
1249 
1250 gint64
tvb_get_gint56(tvbuff_t * tvb,const gint offset,const guint encoding)1251 tvb_get_gint56(tvbuff_t *tvb, const gint offset, const guint encoding) {
1252 	if (encoding & ENC_LITTLE_ENDIAN) {
1253 		return tvb_get_letohi56(tvb, offset);
1254 	} else {
1255 		return tvb_get_ntohi56(tvb, offset);
1256 	}
1257 }
1258 
1259 guint64
tvb_get_guint64(tvbuff_t * tvb,const gint offset,const guint encoding)1260 tvb_get_guint64(tvbuff_t *tvb, const gint offset, const guint encoding) {
1261 	if (encoding & ENC_LITTLE_ENDIAN) {
1262 		return tvb_get_letoh64(tvb, offset);
1263 	} else {
1264 		return tvb_get_ntoh64(tvb, offset);
1265 	}
1266 }
1267 
1268 gint64
tvb_get_gint64(tvbuff_t * tvb,const gint offset,const guint encoding)1269 tvb_get_gint64(tvbuff_t *tvb, const gint offset, const guint encoding) {
1270 	if (encoding & ENC_LITTLE_ENDIAN) {
1271 		return tvb_get_letohi64(tvb, offset);
1272 	} else {
1273 		return tvb_get_ntohi64(tvb, offset);
1274 	}
1275 }
1276 
1277 gfloat
tvb_get_ieee_float(tvbuff_t * tvb,const gint offset,const guint encoding)1278 tvb_get_ieee_float(tvbuff_t *tvb, const gint offset, const guint encoding) {
1279 	if (encoding & ENC_LITTLE_ENDIAN) {
1280 		return tvb_get_letohieee_float(tvb, offset);
1281 	} else {
1282 		return tvb_get_ntohieee_float(tvb, offset);
1283 	}
1284 }
1285 
1286 gdouble
tvb_get_ieee_double(tvbuff_t * tvb,const gint offset,const guint encoding)1287 tvb_get_ieee_double(tvbuff_t *tvb, const gint offset, const guint encoding) {
1288 	if (encoding & ENC_LITTLE_ENDIAN) {
1289 		return tvb_get_letohieee_double(tvb, offset);
1290 	} else {
1291 		return tvb_get_ntohieee_double(tvb, offset);
1292 	}
1293 }
1294 
1295 /*
1296  * Stuff for IEEE float handling on platforms that don't have IEEE
1297  * format as the native floating-point format.
1298  *
1299  * For now, we treat only the VAX as such a platform.
1300  *
1301  * XXX - other non-IEEE boxes that can run UN*X include some Crays,
1302  * and possibly other machines.  However, I don't know whether there
1303  * are any other machines that could run Wireshark and that don't use
1304  * IEEE format.  As far as I know, all of the main current and past
1305  * commercial microprocessor families on which OSes that support
1306  * Wireshark can run use IEEE format (x86, ARM, 68k, SPARC, MIPS,
1307  * PA-RISC, Alpha, IA-64, and so on), and it appears that the official
1308  * Linux port to System/390 and zArchitecture uses IEEE format floating-
1309  * point rather than IBM hex floating-point (not a huge surprise), so
1310  * I'm not sure that leaves any 32-bit or larger UN*X or Windows boxes,
1311  * other than VAXes, that don't use IEEE format.  If you're not running
1312  * UN*X or Windows, the floating-point format is probably going to be
1313  * the least of your problems in a port.
1314  */
1315 
1316 #if defined(vax)
1317 
1318 #include <math.h>
1319 
1320 /*
1321  * Single-precision.
1322  */
1323 #define IEEE_SP_NUMBER_WIDTH	32	/* bits in number */
1324 #define IEEE_SP_EXP_WIDTH	8	/* bits in exponent */
1325 #define IEEE_SP_MANTISSA_WIDTH	23	/* IEEE_SP_NUMBER_WIDTH - 1 - IEEE_SP_EXP_WIDTH */
1326 
1327 #define IEEE_SP_SIGN_MASK	0x80000000
1328 #define IEEE_SP_EXPONENT_MASK	0x7F800000
1329 #define IEEE_SP_MANTISSA_MASK	0x007FFFFF
1330 #define IEEE_SP_INFINITY	IEEE_SP_EXPONENT_MASK
1331 
1332 #define IEEE_SP_IMPLIED_BIT (1 << IEEE_SP_MANTISSA_WIDTH)
1333 #define IEEE_SP_INFINITE ((1 << IEEE_SP_EXP_WIDTH) - 1)
1334 #define IEEE_SP_BIAS ((1 << (IEEE_SP_EXP_WIDTH - 1)) - 1)
1335 
1336 static int
ieee_float_is_zero(const guint32 w)1337 ieee_float_is_zero(const guint32 w)
1338 {
1339 	return ((w & ~IEEE_SP_SIGN_MASK) == 0);
1340 }
1341 
1342 static gfloat
get_ieee_float(const guint32 w)1343 get_ieee_float(const guint32 w)
1344 {
1345 	long sign;
1346 	long exponent;
1347 	long mantissa;
1348 
1349 	sign = w & IEEE_SP_SIGN_MASK;
1350 	exponent = w & IEEE_SP_EXPONENT_MASK;
1351 	mantissa = w & IEEE_SP_MANTISSA_MASK;
1352 
1353 	if (ieee_float_is_zero(w)) {
1354 		/* number is zero, unnormalized, or not-a-number */
1355 		return 0.0;
1356 	}
1357 #if 0
1358 	/*
1359 	 * XXX - how to handle this?
1360 	 */
1361 	if (IEEE_SP_INFINITY == exponent) {
1362 		/*
1363 		 * number is positive or negative infinity, or a special value
1364 		 */
1365 		return (sign? MINUS_INFINITY: PLUS_INFINITY);
1366 	}
1367 #endif
1368 
1369 	exponent = ((exponent >> IEEE_SP_MANTISSA_WIDTH) - IEEE_SP_BIAS) -
1370 		IEEE_SP_MANTISSA_WIDTH;
1371 	mantissa |= IEEE_SP_IMPLIED_BIT;
1372 
1373 	if (sign)
1374 		return -mantissa * pow(2, exponent);
1375 	else
1376 		return mantissa * pow(2, exponent);
1377 }
1378 
1379 /*
1380  * Double-precision.
1381  * We assume that if you don't have IEEE floating-point, you have a
1382  * compiler that understands 64-bit integral quantities.
1383  */
1384 #define IEEE_DP_NUMBER_WIDTH	64	/* bits in number */
1385 #define IEEE_DP_EXP_WIDTH	11	/* bits in exponent */
1386 #define IEEE_DP_MANTISSA_WIDTH	52	/* IEEE_DP_NUMBER_WIDTH - 1 - IEEE_DP_EXP_WIDTH */
1387 
1388 #define IEEE_DP_SIGN_MASK	G_GINT64_CONSTANT(0x8000000000000000)
1389 #define IEEE_DP_EXPONENT_MASK	G_GINT64_CONSTANT(0x7FF0000000000000)
1390 #define IEEE_DP_MANTISSA_MASK	G_GINT64_CONSTANT(0x000FFFFFFFFFFFFF)
1391 #define IEEE_DP_INFINITY	IEEE_DP_EXPONENT_MASK
1392 
1393 #define IEEE_DP_IMPLIED_BIT (G_GINT64_CONSTANT(1) << IEEE_DP_MANTISSA_WIDTH)
1394 #define IEEE_DP_INFINITE ((1 << IEEE_DP_EXP_WIDTH) - 1)
1395 #define IEEE_DP_BIAS ((1 << (IEEE_DP_EXP_WIDTH - 1)) - 1)
1396 
1397 static int
ieee_double_is_zero(const guint64 w)1398 ieee_double_is_zero(const guint64 w)
1399 {
1400 	return ((w & ~IEEE_SP_SIGN_MASK) == 0);
1401 }
1402 
1403 static gdouble
get_ieee_double(const guint64 w)1404 get_ieee_double(const guint64 w)
1405 {
1406 	gint64 sign;
1407 	gint64 exponent;
1408 	gint64 mantissa;
1409 
1410 	sign = w & IEEE_DP_SIGN_MASK;
1411 	exponent = w & IEEE_DP_EXPONENT_MASK;
1412 	mantissa = w & IEEE_DP_MANTISSA_MASK;
1413 
1414 	if (ieee_double_is_zero(w)) {
1415 		/* number is zero, unnormalized, or not-a-number */
1416 		return 0.0;
1417 	}
1418 #if 0
1419 	/*
1420 	 * XXX - how to handle this?
1421 	 */
1422 	if (IEEE_DP_INFINITY == exponent) {
1423 		/*
1424 		 * number is positive or negative infinity, or a special value
1425 		 */
1426 		return (sign? MINUS_INFINITY: PLUS_INFINITY);
1427 	}
1428 #endif
1429 
1430 	exponent = ((exponent >> IEEE_DP_MANTISSA_WIDTH) - IEEE_DP_BIAS) -
1431 		IEEE_DP_MANTISSA_WIDTH;
1432 	mantissa |= IEEE_DP_IMPLIED_BIT;
1433 
1434 	if (sign)
1435 		return -mantissa * pow(2, exponent);
1436 	else
1437 		return mantissa * pow(2, exponent);
1438 }
1439 #endif
1440 
1441 /*
1442  * Fetches an IEEE single-precision floating-point number, in
1443  * big-endian form, and returns a "float".
1444  *
1445  * XXX - should this be "double", in case there are IEEE single-
1446  * precision numbers that won't fit in some platform's native
1447  * "float" format?
1448  */
1449 gfloat
tvb_get_ntohieee_float(tvbuff_t * tvb,const int offset)1450 tvb_get_ntohieee_float(tvbuff_t *tvb, const int offset)
1451 {
1452 #if defined(vax)
1453 	return get_ieee_float(tvb_get_ntohl(tvb, offset));
1454 #else
1455 	union {
1456 		gfloat	f;
1457 		guint32 w;
1458 	} ieee_fp_union;
1459 
1460 	ieee_fp_union.w = tvb_get_ntohl(tvb, offset);
1461 	return ieee_fp_union.f;
1462 #endif
1463 }
1464 
1465 /*
1466  * Fetches an IEEE double-precision floating-point number, in
1467  * big-endian form, and returns a "double".
1468  */
1469 gdouble
tvb_get_ntohieee_double(tvbuff_t * tvb,const int offset)1470 tvb_get_ntohieee_double(tvbuff_t *tvb, const int offset)
1471 {
1472 #if defined(vax)
1473 	union {
1474 		guint32 w[2];
1475 		guint64 dw;
1476 	} ieee_fp_union;
1477 #else
1478 	union {
1479 		gdouble d;
1480 		guint32 w[2];
1481 	} ieee_fp_union;
1482 #endif
1483 
1484 #if G_BYTE_ORDER == G_BIG_ENDIAN
1485 	ieee_fp_union.w[0] = tvb_get_ntohl(tvb, offset);
1486 	ieee_fp_union.w[1] = tvb_get_ntohl(tvb, offset+4);
1487 #else
1488 	ieee_fp_union.w[0] = tvb_get_ntohl(tvb, offset+4);
1489 	ieee_fp_union.w[1] = tvb_get_ntohl(tvb, offset);
1490 #endif
1491 #if defined(vax)
1492 	return get_ieee_double(ieee_fp_union.dw);
1493 #else
1494 	return ieee_fp_union.d;
1495 #endif
1496 }
1497 
1498 guint16
tvb_get_letohs(tvbuff_t * tvb,const gint offset)1499 tvb_get_letohs(tvbuff_t *tvb, const gint offset)
1500 {
1501 	const guint8 *ptr;
1502 
1503 	ptr = fast_ensure_contiguous(tvb, offset, 2);
1504 	return pletoh16(ptr);
1505 }
1506 
1507 gint16
tvb_get_letohis(tvbuff_t * tvb,const gint offset)1508 tvb_get_letohis(tvbuff_t *tvb, const gint offset)
1509 {
1510 	const guint8 *ptr;
1511 
1512 	ptr = fast_ensure_contiguous(tvb, offset, 2);
1513 	return pletoh16(ptr);
1514 }
1515 
1516 guint32
tvb_get_letoh24(tvbuff_t * tvb,const gint offset)1517 tvb_get_letoh24(tvbuff_t *tvb, const gint offset)
1518 {
1519 	const guint8 *ptr;
1520 
1521 	ptr = fast_ensure_contiguous(tvb, offset, 3);
1522 	return pletoh24(ptr);
1523 }
1524 
1525 gint32
tvb_get_letohi24(tvbuff_t * tvb,const gint offset)1526 tvb_get_letohi24(tvbuff_t *tvb, const gint offset)
1527 {
1528 	guint32 ret;
1529 
1530 	ret = ws_sign_ext32(tvb_get_letoh24(tvb, offset), 24);
1531 
1532 	return (gint32)ret;
1533 }
1534 
1535 guint32
tvb_get_letohl(tvbuff_t * tvb,const gint offset)1536 tvb_get_letohl(tvbuff_t *tvb, const gint offset)
1537 {
1538 	const guint8 *ptr;
1539 
1540 	ptr = fast_ensure_contiguous(tvb, offset, 4);
1541 	return pletoh32(ptr);
1542 }
1543 
1544 gint32
tvb_get_letohil(tvbuff_t * tvb,const gint offset)1545 tvb_get_letohil(tvbuff_t *tvb, const gint offset)
1546 {
1547 	const guint8 *ptr;
1548 
1549 	ptr = fast_ensure_contiguous(tvb, offset, 4);
1550 	return pletoh32(ptr);
1551 }
1552 
1553 guint64
tvb_get_letoh40(tvbuff_t * tvb,const gint offset)1554 tvb_get_letoh40(tvbuff_t *tvb, const gint offset)
1555 {
1556 	const guint8 *ptr;
1557 
1558 	ptr = fast_ensure_contiguous(tvb, offset, 5);
1559 	return pletoh40(ptr);
1560 }
1561 
1562 gint64
tvb_get_letohi40(tvbuff_t * tvb,const gint offset)1563 tvb_get_letohi40(tvbuff_t *tvb, const gint offset)
1564 {
1565 	guint64 ret;
1566 
1567 	ret = ws_sign_ext64(tvb_get_letoh40(tvb, offset), 40);
1568 
1569 	return (gint64)ret;
1570 }
1571 
1572 guint64
tvb_get_letoh48(tvbuff_t * tvb,const gint offset)1573 tvb_get_letoh48(tvbuff_t *tvb, const gint offset)
1574 {
1575 	const guint8 *ptr;
1576 
1577 	ptr = fast_ensure_contiguous(tvb, offset, 6);
1578 	return pletoh48(ptr);
1579 }
1580 
1581 gint64
tvb_get_letohi48(tvbuff_t * tvb,const gint offset)1582 tvb_get_letohi48(tvbuff_t *tvb, const gint offset)
1583 {
1584 	guint64 ret;
1585 
1586 	ret = ws_sign_ext64(tvb_get_letoh48(tvb, offset), 48);
1587 
1588 	return (gint64)ret;
1589 }
1590 
1591 guint64
tvb_get_letoh56(tvbuff_t * tvb,const gint offset)1592 tvb_get_letoh56(tvbuff_t *tvb, const gint offset)
1593 {
1594 	const guint8 *ptr;
1595 
1596 	ptr = fast_ensure_contiguous(tvb, offset, 7);
1597 	return pletoh56(ptr);
1598 }
1599 
1600 gint64
tvb_get_letohi56(tvbuff_t * tvb,const gint offset)1601 tvb_get_letohi56(tvbuff_t *tvb, const gint offset)
1602 {
1603 	guint64 ret;
1604 
1605 	ret = ws_sign_ext64(tvb_get_letoh56(tvb, offset), 56);
1606 
1607 	return (gint64)ret;
1608 }
1609 
1610 guint64
tvb_get_letoh64(tvbuff_t * tvb,const gint offset)1611 tvb_get_letoh64(tvbuff_t *tvb, const gint offset)
1612 {
1613 	const guint8 *ptr;
1614 
1615 	ptr = fast_ensure_contiguous(tvb, offset, 8);
1616 	return pletoh64(ptr);
1617 }
1618 
1619 gint64
tvb_get_letohi64(tvbuff_t * tvb,const gint offset)1620 tvb_get_letohi64(tvbuff_t *tvb, const gint offset)
1621 {
1622 	const guint8 *ptr;
1623 
1624 	ptr = fast_ensure_contiguous(tvb, offset, 8);
1625 	return pletoh64(ptr);
1626 }
1627 
1628 /*
1629  * Fetches an IEEE single-precision floating-point number, in
1630  * little-endian form, and returns a "float".
1631  *
1632  * XXX - should this be "double", in case there are IEEE single-
1633  * precision numbers that won't fit in some platform's native
1634  * "float" format?
1635  */
1636 gfloat
tvb_get_letohieee_float(tvbuff_t * tvb,const int offset)1637 tvb_get_letohieee_float(tvbuff_t *tvb, const int offset)
1638 {
1639 #if defined(vax)
1640 	return get_ieee_float(tvb_get_letohl(tvb, offset));
1641 #else
1642 	union {
1643 		gfloat f;
1644 		guint32 w;
1645 	} ieee_fp_union;
1646 
1647 	ieee_fp_union.w = tvb_get_letohl(tvb, offset);
1648 	return ieee_fp_union.f;
1649 #endif
1650 }
1651 
1652 /*
1653  * Fetches an IEEE double-precision floating-point number, in
1654  * little-endian form, and returns a "double".
1655  */
1656 gdouble
tvb_get_letohieee_double(tvbuff_t * tvb,const int offset)1657 tvb_get_letohieee_double(tvbuff_t *tvb, const int offset)
1658 {
1659 #if defined(vax)
1660 	union {
1661 		guint32 w[2];
1662 		guint64 dw;
1663 	} ieee_fp_union;
1664 #else
1665 	union {
1666 		gdouble d;
1667 		guint32 w[2];
1668 	} ieee_fp_union;
1669 #endif
1670 
1671 #if G_BYTE_ORDER == G_BIG_ENDIAN
1672 	ieee_fp_union.w[0] = tvb_get_letohl(tvb, offset+4);
1673 	ieee_fp_union.w[1] = tvb_get_letohl(tvb, offset);
1674 #else
1675 	ieee_fp_union.w[0] = tvb_get_letohl(tvb, offset);
1676 	ieee_fp_union.w[1] = tvb_get_letohl(tvb, offset+4);
1677 #endif
1678 #if defined(vax)
1679 	return get_ieee_double(ieee_fp_union.dw);
1680 #else
1681 	return ieee_fp_union.d;
1682 #endif
1683 }
1684 
1685 /* This function is a slight misnomer. It accepts all encodings that are
1686  * ASCII "enough", which means encodings that are the same as US-ASCII
1687  * for textual representations of dates and hex bytes; i.e., the same
1688  * for the hex digits and Z (in practice, all alphanumerics), and the
1689  * four separators ':' '-' '.' and ' '
1690  * That means that any encoding that keeps the ISO/IEC 646 invariant
1691  * characters the same (including the T.61 8 bit encoding and multibyte
1692  * encodings like EUC-KR and GB18030) are OK, even if they replace characters
1693  * like '$' '#' and '\' with national variants, but not encodings like UTF-16
1694  * that include extra null bytes.
1695  * For our current purposes, the unpacked GSM 7-bit default alphabet (but not
1696  * all National Language Shift Tables) also satisfies this requirement, but
1697  * note that it does *not* keep all ISO/IEC 646 invariant characters the same.
1698  * If this internal function gets used for additional purposes than currently,
1699  * the set of encodings that it accepts could change.
1700  * */
1701 static inline void
validate_single_byte_ascii_encoding(const guint encoding)1702 validate_single_byte_ascii_encoding(const guint encoding)
1703 {
1704 	const guint enc = encoding & ~ENC_CHARENCODING_MASK;
1705 
1706 	switch (enc) {
1707 	    case ENC_UTF_16:
1708 	    case ENC_UCS_2:
1709 	    case ENC_UCS_4:
1710 	    case ENC_3GPP_TS_23_038_7BITS_PACKED:
1711 	    case ENC_ASCII_7BITS:
1712 	    case ENC_EBCDIC:
1713 	    case ENC_EBCDIC_CP037:
1714 	    case ENC_BCD_DIGITS_0_9:
1715 	    case ENC_KEYPAD_ABC_TBCD:
1716 	    case ENC_KEYPAD_BC_TBCD:
1717 	    case ENC_ETSI_TS_102_221_ANNEX_A:
1718 	    case ENC_APN_STR:
1719 	    REPORT_DISSECTOR_BUG("Invalid string encoding type passed to tvb_get_string_XXX");
1720 	    break;
1721 	    default:
1722 	    break;
1723 	}
1724 	/* make sure something valid was set */
1725 	if (enc == 0)
1726 	    REPORT_DISSECTOR_BUG("No string encoding type passed to tvb_get_string_XXX");
1727 }
1728 
1729 GByteArray*
tvb_get_string_bytes(tvbuff_t * tvb,const gint offset,const gint length,const guint encoding,GByteArray * bytes,gint * endoff)1730 tvb_get_string_bytes(tvbuff_t *tvb, const gint offset, const gint length,
1731 		     const guint encoding, GByteArray *bytes, gint *endoff)
1732 {
1733 	gchar *ptr;
1734 	const gchar *begin;
1735 	const gchar *end    = NULL;
1736 	GByteArray  *retval = NULL;
1737 
1738 	errno = EDOM;
1739 
1740 	validate_single_byte_ascii_encoding(encoding);
1741 
1742 	ptr = (gchar*) tvb_get_raw_string(NULL, tvb, offset, length);
1743 	begin = ptr;
1744 
1745 	if (endoff) *endoff = 0;
1746 
1747 	while (*begin == ' ') begin++;
1748 
1749 	if (*begin && bytes) {
1750 		if (hex_str_to_bytes_encoding(begin, bytes, &end, encoding, FALSE)) {
1751 			if (bytes->len > 0) {
1752 				if (endoff) *endoff = offset + (gint)(end - ptr);
1753 				errno = 0;
1754 				retval = bytes;
1755 			}
1756 		}
1757 	}
1758 
1759 	wmem_free(NULL, ptr);
1760 
1761 	return retval;
1762 }
1763 
1764 static gboolean
parse_month_name(const char * name,int * tm_mon)1765 parse_month_name(const char *name, int *tm_mon)
1766 {
1767 	static const char months[][4] = { "Jan", "Feb", "Mar", "Apr", "May",
1768 		"Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
1769 	for (int i = 0; i < 12; i++) {
1770 		if (memcmp(months[i], name, 4) == 0) {
1771 			*tm_mon = i;
1772 			return TRUE;
1773 		}
1774 	}
1775 	return FALSE;
1776 }
1777 
1778 /* support hex-encoded time values? */
1779 nstime_t*
tvb_get_string_time(tvbuff_t * tvb,const gint offset,const gint length,const guint encoding,nstime_t * ns,gint * endoff)1780 tvb_get_string_time(tvbuff_t *tvb, const gint offset, const gint length,
1781 		    const guint encoding, nstime_t *ns, gint *endoff)
1782 {
1783 	gchar *begin;
1784 	const gchar *ptr;
1785 	const gchar *end       = NULL;
1786 	struct tm    tm;
1787 	nstime_t*    retval    = NULL;
1788 	char	     sign      = '+';
1789 	int	     off_hr    = 0;
1790 	int	     off_min   = 0;
1791 	int	     num_chars = 0;
1792 	gboolean     matched   = FALSE;
1793 
1794 	errno = EDOM;
1795 
1796 	validate_single_byte_ascii_encoding(encoding);
1797 
1798 	DISSECTOR_ASSERT(ns);
1799 
1800 	begin = (gchar*) tvb_get_raw_string(NULL, tvb, offset, length);
1801 	ptr = begin;
1802 
1803 	memset(&tm, 0, sizeof(tm));
1804 	tm.tm_isdst = -1;
1805 	ns->secs    = 0;
1806 	ns->nsecs   = 0;
1807 
1808 	while (*ptr == ' ') ptr++;
1809 
1810 	if (*ptr) {
1811 		/* note: sscanf is known to be inconsistent across platforms with respect
1812 		   to whether a %n is counted as a return value or not, so we have to use
1813 		   '>=' a lot */
1814 		if ((encoding & ENC_ISO_8601_DATE_TIME) == ENC_ISO_8601_DATE_TIME) {
1815 			/* TODO: using sscanf this many times is probably slow; might want
1816 			   to parse it by hand in the future */
1817 			/* 2014-04-07T05:41:56+00:00 */
1818 			if (sscanf(ptr, "%d-%d-%d%*c%d:%d:%d%c%d:%d%n",
1819 			    &tm.tm_year,
1820 			    &tm.tm_mon,
1821 			    &tm.tm_mday,
1822 			    &tm.tm_hour,
1823 			    &tm.tm_min,
1824 			    &tm.tm_sec,
1825 			    &sign,
1826 			    &off_hr,
1827 			    &off_min,
1828 			    &num_chars) >= 9)
1829 			{
1830 				matched = TRUE;
1831 			}
1832 			/* no seconds is ok */
1833 			else if (sscanf(ptr, "%d-%d-%d%*c%d:%d%c%d:%d%n",
1834 			    &tm.tm_year,
1835 			    &tm.tm_mon,
1836 			    &tm.tm_mday,
1837 			    &tm.tm_hour,
1838 			    &tm.tm_min,
1839 			    &sign,
1840 			    &off_hr,
1841 			    &off_min,
1842 			    &num_chars) >= 8)
1843 			{
1844 				matched = TRUE;
1845 			}
1846 			/* 2007-04-05T14:30:56Z */
1847 			else if (sscanf(ptr, "%d-%d-%d%*c%d:%d:%dZ%n",
1848 			    &tm.tm_year,
1849 			    &tm.tm_mon,
1850 			    &tm.tm_mday,
1851 			    &tm.tm_hour,
1852 			    &tm.tm_min,
1853 			    &tm.tm_sec,
1854 			    &num_chars) >= 6)
1855 			{
1856 				matched = TRUE;
1857 				off_hr = 0;
1858 				off_min = 0;
1859 			}
1860 			/* 2007-04-05T14:30Z no seconds is ok */
1861 			else if (sscanf(ptr, "%d-%d-%d%*c%d:%dZ%n",
1862 			    &tm.tm_year,
1863 			    &tm.tm_mon,
1864 			    &tm.tm_mday,
1865 			    &tm.tm_hour,
1866 			    &tm.tm_min,
1867 			    &num_chars) >= 5)
1868 			{
1869 				matched = TRUE;
1870 				off_hr = 0;
1871 				off_min = 0;
1872 			}
1873 
1874 			if (matched) {
1875 				errno = 0;
1876 				end = ptr + num_chars;
1877 				tm.tm_mon--;
1878 				if (tm.tm_year > 1900) tm.tm_year -= 1900;
1879 				if (sign == '-') off_hr = -off_hr;
1880 			}
1881 		}
1882 		else if (encoding & ENC_ISO_8601_DATE) {
1883 			/* 2014-04-07 */
1884 			if (sscanf(ptr, "%d-%d-%d%n",
1885 			    &tm.tm_year,
1886 			    &tm.tm_mon,
1887 			    &tm.tm_mday,
1888 			    &num_chars) >= 3)
1889 			{
1890 				errno = 0;
1891 				end = ptr + num_chars;
1892 				tm.tm_mon--;
1893 				if (tm.tm_year > 1900) tm.tm_year -= 1900;
1894 			}
1895 		}
1896 		else if (encoding & ENC_ISO_8601_TIME) {
1897 			/* 2014-04-07 */
1898 			if (sscanf(ptr, "%d:%d:%d%n",
1899 			    &tm.tm_hour,
1900 			    &tm.tm_min,
1901 			    &tm.tm_sec,
1902 			    &num_chars) >= 2)
1903 			{
1904 				/* what should we do about day/month/year? */
1905 				/* setting it to "now" for now */
1906 				time_t time_now = time(NULL);
1907 				struct tm *tm_now = gmtime(&time_now);
1908 				if (tm_now != NULL) {
1909 					tm.tm_year = tm_now->tm_year;
1910 					tm.tm_mon  = tm_now->tm_mon;
1911 					tm.tm_mday = tm_now->tm_mday;
1912 				} else {
1913 					/* The second before the Epoch */
1914 					tm.tm_year = 69;
1915 					tm.tm_mon = 12;
1916 					tm.tm_mday = 31;
1917 				}
1918 				end = ptr + num_chars;
1919 				errno = 0;
1920 
1921 			}
1922 		}
1923 		else if (encoding & ENC_RFC_822 || encoding & ENC_RFC_1123) {
1924 			/*
1925 			 * Match [dow,] day month year hh:mm[:ss] with two-digit
1926 			 * years (RFC 822) or four-digit years (RFC 1123). Skip
1927 			 * the day of week since it is locale dependent and does
1928 			 * not affect the resulting date anyway.
1929 			 */
1930 			if (g_ascii_isalpha(ptr[0]) && g_ascii_isalpha(ptr[1]) && g_ascii_isalpha(ptr[2]) && ptr[3] == ',')
1931 				ptr += 4;   /* Skip day of week. */
1932 			char month_name[4] = { 0 };
1933 			if (sscanf(ptr, "%d %3s %d %d:%d%n:%d%n",
1934 			    &tm.tm_mday,
1935 			    month_name,
1936 			    &tm.tm_year,
1937 			    &tm.tm_hour,
1938 			    &tm.tm_min,
1939 			    &num_chars,
1940 			    &tm.tm_sec,
1941 			    &num_chars) >= 5)
1942 			{
1943 				if (encoding & ENC_RFC_822) {
1944 					/* Match strptime behavior: years 00-68
1945 					 * are in the 21th century. */
1946 					if (tm.tm_year <= 68) {
1947 						tm.tm_year += 100;
1948 						matched = TRUE;
1949 					} else if (tm.tm_year <= 99) {
1950 						matched = TRUE;
1951 					}
1952 				} else if (encoding & ENC_RFC_1123) {
1953 					tm.tm_year -= 1900;
1954 					matched = TRUE;
1955 				}
1956 				if (!parse_month_name(month_name, &tm.tm_mon))
1957 					matched = FALSE;
1958 				if (matched)
1959 					end = ptr + num_chars;
1960 			}
1961 			if (end) {
1962 				errno = 0;
1963 				if (*end == ' ') end++;
1964 				if (g_ascii_strncasecmp(end, "UT", 2) == 0)
1965 				{
1966 					end += 2;
1967 				}
1968 				else if (g_ascii_strncasecmp(end, "GMT", 3) == 0)
1969 				{
1970 					end += 3;
1971 				}
1972 				else if (sscanf(end, "%c%2d%2d%n",
1973 				    &sign,
1974 				    &off_hr,
1975 				    &off_min,
1976 				    &num_chars) < 3)
1977 				{
1978 					errno = ERANGE;
1979 				}
1980 				if (sign == '-') off_hr = -off_hr;
1981 			}
1982 		}
1983 	}
1984 
1985 	if (errno == 0) {
1986 		ns->secs = mktime_utc (&tm);
1987 		if (off_hr > 0)
1988 			ns->secs += (off_hr * 3600) + (off_min * 60);
1989 		else if (off_hr < 0)
1990 			ns->secs -= ((-off_hr) * 3600) + (off_min * 60);
1991 		retval = ns;
1992 		if (endoff)
1993 		    *endoff = (gint)(offset + (end - begin));
1994 	}
1995 
1996 	wmem_free(NULL, begin);
1997 
1998 	return retval;
1999 }
2000 
2001 /* Fetch an IPv4 address, in network byte order.
2002  * We do *not* convert them to host byte order; we leave them in
2003  * network byte order. */
2004 guint32
tvb_get_ipv4(tvbuff_t * tvb,const gint offset)2005 tvb_get_ipv4(tvbuff_t *tvb, const gint offset)
2006 {
2007 	const guint8 *ptr;
2008 	guint32       addr;
2009 
2010 	ptr = fast_ensure_contiguous(tvb, offset, sizeof(guint32));
2011 	memcpy(&addr, ptr, sizeof addr);
2012 	return addr;
2013 }
2014 
2015 /* Fetch an IPv6 address. */
2016 void
tvb_get_ipv6(tvbuff_t * tvb,const gint offset,ws_in6_addr * addr)2017 tvb_get_ipv6(tvbuff_t *tvb, const gint offset, ws_in6_addr *addr)
2018 {
2019 	const guint8 *ptr;
2020 
2021 	ptr = ensure_contiguous(tvb, offset, sizeof(*addr));
2022 	memcpy(addr, ptr, sizeof *addr);
2023 }
2024 
2025 /* Fetch a GUID. */
2026 void
tvb_get_ntohguid(tvbuff_t * tvb,const gint offset,e_guid_t * guid)2027 tvb_get_ntohguid(tvbuff_t *tvb, const gint offset, e_guid_t *guid)
2028 {
2029 	const guint8 *ptr = ensure_contiguous(tvb, offset, GUID_LEN);
2030 
2031 	guid->data1 = pntoh32(ptr + 0);
2032 	guid->data2 = pntoh16(ptr + 4);
2033 	guid->data3 = pntoh16(ptr + 6);
2034 	memcpy(guid->data4, ptr + 8, sizeof guid->data4);
2035 }
2036 
2037 void
tvb_get_letohguid(tvbuff_t * tvb,const gint offset,e_guid_t * guid)2038 tvb_get_letohguid(tvbuff_t *tvb, const gint offset, e_guid_t *guid)
2039 {
2040 	const guint8 *ptr = ensure_contiguous(tvb, offset, GUID_LEN);
2041 
2042 	guid->data1 = pletoh32(ptr + 0);
2043 	guid->data2 = pletoh16(ptr + 4);
2044 	guid->data3 = pletoh16(ptr + 6);
2045 	memcpy(guid->data4, ptr + 8, sizeof guid->data4);
2046 }
2047 
2048 /*
2049  * NOTE: to support code written when proto_tree_add_item() took a
2050  * gboolean as its last argument, with FALSE meaning "big-endian"
2051  * and TRUE meaning "little-endian", we treat any non-zero value of
2052  * "encoding" as meaning "little-endian".
2053  */
2054 void
tvb_get_guid(tvbuff_t * tvb,const gint offset,e_guid_t * guid,const guint encoding)2055 tvb_get_guid(tvbuff_t *tvb, const gint offset, e_guid_t *guid, const guint encoding)
2056 {
2057 	if (encoding) {
2058 		tvb_get_letohguid(tvb, offset, guid);
2059 	} else {
2060 		tvb_get_ntohguid(tvb, offset, guid);
2061 	}
2062 }
2063 
2064 static const guint8 bit_mask8[] = {
2065 	0x00,
2066 	0x01,
2067 	0x03,
2068 	0x07,
2069 	0x0f,
2070 	0x1f,
2071 	0x3f,
2072 	0x7f,
2073 	0xff
2074 };
2075 
2076 
2077 /* Get a variable ammount of bits
2078  *
2079  * Return a byte array with bit limited data.
2080  * When encoding is ENC_BIG_ENDIAN, the data is aligned to the left.
2081  * When encoding is ENC_LITTLE_ENDIAN, the data is aligned to the right.
2082  */
2083 guint8 *
tvb_get_bits_array(wmem_allocator_t * scope,tvbuff_t * tvb,const gint bit_offset,size_t no_of_bits,size_t * data_length,const guint encoding)2084 tvb_get_bits_array(wmem_allocator_t *scope, tvbuff_t *tvb, const gint bit_offset,
2085 		   size_t no_of_bits, size_t *data_length, const guint encoding)
2086 {
2087 	tvbuff_t *sub_tvb;
2088 	if (encoding & ENC_LITTLE_ENDIAN) {
2089 		sub_tvb = tvb_new_octet_right_aligned(tvb, bit_offset, (gint32) no_of_bits);
2090 	} else {
2091 		sub_tvb = tvb_new_octet_aligned(tvb, bit_offset, (gint32) no_of_bits);
2092 	}
2093 	*data_length = tvb_reported_length(sub_tvb);
2094 	return (guint8*)tvb_memdup(scope, sub_tvb, 0, *data_length);
2095 }
2096 
2097 /* Get 1 - 8 bits */
2098 guint8
tvb_get_bits8(tvbuff_t * tvb,guint bit_offset,const gint no_of_bits)2099 tvb_get_bits8(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits)
2100 {
2101 	return (guint8)_tvb_get_bits64(tvb, bit_offset, no_of_bits);
2102 }
2103 
2104 /* Get 1 - 16 bits */
2105 guint16
tvb_get_bits16(tvbuff_t * tvb,guint bit_offset,const gint no_of_bits,const guint encoding)2106 tvb_get_bits16(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits, const guint encoding)
2107 {
2108 	return (guint16)tvb_get_bits64(tvb, bit_offset, no_of_bits, encoding);
2109 }
2110 
2111 /* Get 1 - 32 bits */
2112 guint32
tvb_get_bits32(tvbuff_t * tvb,guint bit_offset,const gint no_of_bits,const guint encoding)2113 tvb_get_bits32(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits, const guint encoding)
2114 {
2115 	return (guint32)tvb_get_bits64(tvb, bit_offset, no_of_bits, encoding);
2116 }
2117 
2118 /* Get 1 - 64 bits */
2119 guint64
tvb_get_bits64(tvbuff_t * tvb,guint bit_offset,const gint no_of_bits,const guint encoding)2120 tvb_get_bits64(tvbuff_t *tvb, guint bit_offset, const gint no_of_bits, const guint encoding)
2121 {
2122 	/* encoding determines bit numbering within octet array */
2123 	if (encoding & ENC_LITTLE_ENDIAN) {
2124 		return _tvb_get_bits64_le(tvb, bit_offset, no_of_bits);
2125 	} else {
2126 		return _tvb_get_bits64(tvb, bit_offset, no_of_bits);
2127 	}
2128 }
2129 
2130 /*
2131  * This function will dissect a sequence of bits that does not need to be byte aligned; the bits
2132  * set will be shown in the tree as ..10 10.. and the integer value returned if return_value is set.
2133  * Offset should be given in bits from the start of the tvb.
2134  * Bits within octet are numbered from MSB (0) to LSB (7). Bit at bit_offset is return value most significant bit.
2135  * The function tolerates requests for more than 64 bits, but will only return the least significant 64 bits.
2136  */
2137 static guint64
_tvb_get_bits64(tvbuff_t * tvb,guint bit_offset,const gint total_no_of_bits)2138 _tvb_get_bits64(tvbuff_t *tvb, guint bit_offset, const gint total_no_of_bits)
2139 {
2140 	guint64 value;
2141 	guint	octet_offset = bit_offset >> 3;
2142 	guint8	required_bits_in_first_octet = 8 - (bit_offset % 8);
2143 
2144 	if(required_bits_in_first_octet > total_no_of_bits)
2145 	{
2146 		/* the required bits don't extend to the end of the first octet */
2147 		guint8 right_shift = required_bits_in_first_octet - total_no_of_bits;
2148 		value = (tvb_get_guint8(tvb, octet_offset) >> right_shift) & bit_mask8[total_no_of_bits % 8];
2149 	}
2150 	else
2151 	{
2152 		guint8 remaining_bit_length = total_no_of_bits;
2153 
2154 		/* get the bits up to the first octet boundary */
2155 		value = 0;
2156 		required_bits_in_first_octet %= 8;
2157 		if(required_bits_in_first_octet != 0)
2158 		{
2159 			value = tvb_get_guint8(tvb, octet_offset) & bit_mask8[required_bits_in_first_octet];
2160 			remaining_bit_length -= required_bits_in_first_octet;
2161 			octet_offset ++;
2162 		}
2163 		/* take the biggest words, shorts or octets that we can */
2164 		while (remaining_bit_length > 7)
2165 		{
2166 			switch (remaining_bit_length >> 4)
2167 			{
2168 			case 0:
2169 				/* 8 - 15 bits. (note that 0 - 7 would have dropped out of the while() loop) */
2170 				value <<= 8;
2171 				value += tvb_get_guint8(tvb, octet_offset);
2172 				remaining_bit_length -= 8;
2173 				octet_offset ++;
2174 				break;
2175 
2176 			case 1:
2177 				/* 16 - 31 bits */
2178 				value <<= 16;
2179 				value += tvb_get_ntohs(tvb, octet_offset);
2180 				remaining_bit_length -= 16;
2181 				octet_offset += 2;
2182 				break;
2183 
2184 			case 2:
2185 			case 3:
2186 				/* 32 - 63 bits */
2187 				value <<= 32;
2188 				value += tvb_get_ntohl(tvb, octet_offset);
2189 				remaining_bit_length -= 32;
2190 				octet_offset += 4;
2191 				break;
2192 
2193 			default:
2194 				/* 64 bits (or more???) */
2195 				value = tvb_get_ntoh64(tvb, octet_offset);
2196 				remaining_bit_length -= 64;
2197 				octet_offset += 8;
2198 				break;
2199 			}
2200 		}
2201 		/* get bits from any partial octet at the tail */
2202 		if(remaining_bit_length)
2203 		{
2204 			value <<= remaining_bit_length;
2205 			value += (tvb_get_guint8(tvb, octet_offset) >> (8 - remaining_bit_length));
2206 		}
2207 	}
2208 	return value;
2209 }
2210 
2211 /*
2212  * Offset should be given in bits from the start of the tvb.
2213  * Bits within octet are numbered from LSB (0) to MSB (7). Bit at bit_offset is return value least significant bit.
2214  * The function tolerates requests for more than 64 bits, but will only return the least significant 64 bits.
2215  */
2216 static guint64
_tvb_get_bits64_le(tvbuff_t * tvb,guint bit_offset,const gint total_no_of_bits)2217 _tvb_get_bits64_le(tvbuff_t *tvb, guint bit_offset, const gint total_no_of_bits)
2218 {
2219 	guint64 value = 0;
2220 	guint octet_offset = bit_offset / 8;
2221 	gint remaining_bits = total_no_of_bits;
2222 	gint shift = 0;
2223 
2224 	if (remaining_bits > 64)
2225 	{
2226 		remaining_bits = 64;
2227 	}
2228 
2229 	if (bit_offset % 8)
2230 	{
2231 		/* not aligned, extract bits from first octet */
2232 		shift = 8 - (bit_offset % 8);
2233 		value = tvb_get_guint8(tvb, octet_offset) >> (bit_offset % 8);
2234 		if (shift > total_no_of_bits)
2235 		{
2236 			/* keep only the requested bits */
2237 			value &= (G_GUINT64_CONSTANT(1) << total_no_of_bits) - 1;
2238 			remaining_bits = 0;
2239 		}
2240 		else
2241 		{
2242 			remaining_bits = total_no_of_bits - shift;
2243 		}
2244 		octet_offset++;
2245 	}
2246 
2247 	while (remaining_bits > 0)
2248 	{
2249 		/* take the biggest words, shorts or octets that we can */
2250 		if (remaining_bits >= 32)
2251 		{
2252 			value |= ((guint64)tvb_get_letohl(tvb, octet_offset) << shift);
2253 			shift += 32;
2254 			remaining_bits -= 32;
2255 			octet_offset += 4;
2256 		}
2257 		else if (remaining_bits >= 16)
2258 		{
2259 			value |= ((guint64)tvb_get_letohs(tvb, octet_offset) << shift);
2260 			shift += 16;
2261 			remaining_bits -= 16;
2262 			octet_offset += 2;
2263 		}
2264 		else if (remaining_bits >= 8)
2265 		{
2266 			value |= ((guint64)tvb_get_guint8(tvb, octet_offset) << shift);
2267 			shift += 8;
2268 			remaining_bits -= 8;
2269 			octet_offset += 1;
2270 		}
2271 		else
2272 		{
2273 			guint mask = (1 << remaining_bits) - 1;
2274 			value |= (((guint64)tvb_get_guint8(tvb, octet_offset) & mask) << shift);
2275 			shift += remaining_bits;
2276 			remaining_bits = 0;
2277 			octet_offset += 1;
2278 		}
2279 	}
2280 	return value;
2281 }
2282 
2283 /* Get 1 - 32 bits (should be deprecated as same as tvb_get_bits32??) */
2284 guint32
tvb_get_bits(tvbuff_t * tvb,const guint bit_offset,const gint no_of_bits,const guint encoding)2285 tvb_get_bits(tvbuff_t *tvb, const guint bit_offset, const gint no_of_bits, const guint encoding)
2286 {
2287 	return (guint32)tvb_get_bits64(tvb, bit_offset, no_of_bits, encoding);
2288 }
2289 
2290 static gint
tvb_find_guint8_generic(tvbuff_t * tvb,guint abs_offset,guint limit,guint8 needle)2291 tvb_find_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, guint8 needle)
2292 {
2293 	const guint8 *ptr;
2294 	const guint8 *result;
2295 
2296 	ptr = ensure_contiguous(tvb, abs_offset, limit); /* tvb_get_ptr() */
2297 
2298 	result = (const guint8 *) memchr(ptr, needle, limit);
2299 	if (!result)
2300 		return -1;
2301 
2302 	return (gint) ((result - ptr) + abs_offset);
2303 }
2304 
2305 /* Find first occurrence of needle in tvbuff, starting at offset. Searches
2306  * at most maxlength number of bytes; if maxlength is -1, searches to
2307  * end of tvbuff.
2308  * Returns the offset of the found needle, or -1 if not found.
2309  * Will not throw an exception, even if maxlength exceeds boundary of tvbuff;
2310  * in that case, -1 will be returned if the boundary is reached before
2311  * finding needle. */
2312 gint
tvb_find_guint8(tvbuff_t * tvb,const gint offset,const gint maxlength,const guint8 needle)2313 tvb_find_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength, const guint8 needle)
2314 {
2315 	const guint8 *result;
2316 	guint	      abs_offset = 0;
2317 	guint	      limit = 0;
2318 	int           exception;
2319 
2320 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2321 
2322 	exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &limit);
2323 	if (exception)
2324 		THROW(exception);
2325 
2326 	/* Only search to end of tvbuff, w/o throwing exception. */
2327 	if (maxlength >= 0 && limit > (guint) maxlength) {
2328 		/* Maximum length doesn't go past end of tvbuff; search
2329 		   to that value. */
2330 		limit = (guint) maxlength;
2331 	}
2332 
2333 	/* If we have real data, perform our search now. */
2334 	if (tvb->real_data) {
2335 		result = (const guint8 *)memchr(tvb->real_data + abs_offset, needle, limit);
2336 		if (result == NULL) {
2337 			return -1;
2338 		}
2339 		else {
2340 			return (gint) (result - tvb->real_data);
2341 		}
2342 	}
2343 
2344 	if (tvb->ops->tvb_find_guint8)
2345 		return tvb->ops->tvb_find_guint8(tvb, abs_offset, limit, needle);
2346 
2347 	return tvb_find_guint8_generic(tvb, offset, limit, needle);
2348 }
2349 
2350 /* Same as tvb_find_guint8() with 16bit needle. */
2351 gint
tvb_find_guint16(tvbuff_t * tvb,const gint offset,const gint maxlength,const guint16 needle)2352 tvb_find_guint16(tvbuff_t *tvb, const gint offset, const gint maxlength,
2353 		 const guint16 needle)
2354 {
2355 	const guint8 needle1 = ((needle & 0xFF00) >> 8);
2356 	const guint8 needle2 = ((needle & 0x00FF) >> 0);
2357 	gint searched_bytes = 0;
2358 	gint pos = offset;
2359 
2360 	do {
2361 		gint offset1 =
2362 			tvb_find_guint8(tvb, pos, maxlength - searched_bytes, needle1);
2363 		gint offset2 = -1;
2364 
2365 		if (offset1 == -1) {
2366 			return -1;
2367 		}
2368 
2369 		searched_bytes = offset - pos + 1;
2370 
2371 		if ((maxlength != -1) && (searched_bytes >= maxlength)) {
2372 			return -1;
2373 		}
2374 
2375 		offset2 = tvb_find_guint8(tvb, offset1 + 1, 1, needle2);
2376 
2377 		searched_bytes += 1;
2378 
2379 		if (offset2 != -1) {
2380 			if ((maxlength != -1) && (searched_bytes > maxlength)) {
2381 				return -1;
2382 			}
2383 			return offset1;
2384 		}
2385 
2386 		pos = offset1 + 1;
2387 	} while (searched_bytes < maxlength);
2388 
2389 	return -1;
2390 }
2391 
2392 static inline gint
tvb_ws_mempbrk_guint8_generic(tvbuff_t * tvb,guint abs_offset,guint limit,const ws_mempbrk_pattern * pattern,guchar * found_needle)2393 tvb_ws_mempbrk_guint8_generic(tvbuff_t *tvb, guint abs_offset, guint limit, const ws_mempbrk_pattern* pattern, guchar *found_needle)
2394 {
2395 	const guint8 *ptr;
2396 	const guint8 *result;
2397 
2398 	ptr = ensure_contiguous(tvb, abs_offset, limit); /* tvb_get_ptr */
2399 
2400 	result = ws_mempbrk_exec(ptr, limit, pattern, found_needle);
2401 	if (!result)
2402 		return -1;
2403 
2404 	return (gint) ((result - ptr) + abs_offset);
2405 }
2406 
2407 
2408 /* Find first occurrence of any of the pattern chars in tvbuff, starting at offset.
2409  * Searches at most maxlength number of bytes; if maxlength is -1, searches
2410  * to end of tvbuff.
2411  * Returns the offset of the found needle, or -1 if not found.
2412  * Will not throw an exception, even if maxlength exceeds boundary of tvbuff;
2413  * in that case, -1 will be returned if the boundary is reached before
2414  * finding needle. */
2415 gint
tvb_ws_mempbrk_pattern_guint8(tvbuff_t * tvb,const gint offset,const gint maxlength,const ws_mempbrk_pattern * pattern,guchar * found_needle)2416 tvb_ws_mempbrk_pattern_guint8(tvbuff_t *tvb, const gint offset, const gint maxlength,
2417 			const ws_mempbrk_pattern* pattern, guchar *found_needle)
2418 {
2419 	const guint8 *result;
2420 	guint	      abs_offset = 0;
2421 	guint	      limit = 0;
2422 	int           exception;
2423 
2424 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2425 
2426 	exception = compute_offset_and_remaining(tvb, offset, &abs_offset, &limit);
2427 	if (exception)
2428 		THROW(exception);
2429 
2430 	/* Only search to end of tvbuff, w/o throwing exception. */
2431 	if (limit > (guint) maxlength) {
2432 		/* Maximum length doesn't go past end of tvbuff; search
2433 		   to that value. */
2434 		limit = maxlength;
2435 	}
2436 
2437 	/* If we have real data, perform our search now. */
2438 	if (tvb->real_data) {
2439 		result = ws_mempbrk_exec(tvb->real_data + abs_offset, limit, pattern, found_needle);
2440 		if (result == NULL) {
2441 			return -1;
2442 		}
2443 		else {
2444 			return (gint) (result - tvb->real_data);
2445 		}
2446 	}
2447 
2448 	if (tvb->ops->tvb_ws_mempbrk_pattern_guint8)
2449 		return tvb->ops->tvb_ws_mempbrk_pattern_guint8(tvb, abs_offset, limit, pattern, found_needle);
2450 
2451 	return tvb_ws_mempbrk_guint8_generic(tvb, abs_offset, limit, pattern, found_needle);
2452 }
2453 
2454 /* Find size of stringz (NUL-terminated string) by looking for terminating
2455  * NUL.  The size of the string includes the terminating NUL.
2456  *
2457  * If the NUL isn't found, it throws the appropriate exception.
2458  */
2459 guint
tvb_strsize(tvbuff_t * tvb,const gint offset)2460 tvb_strsize(tvbuff_t *tvb, const gint offset)
2461 {
2462 	guint abs_offset = 0, junk_length;
2463 	gint  nul_offset;
2464 
2465 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2466 
2467 	check_offset_length(tvb, offset, 0, &abs_offset, &junk_length);
2468 	nul_offset = tvb_find_guint8(tvb, abs_offset, -1, 0);
2469 	if (nul_offset == -1) {
2470 		/*
2471 		 * OK, we hit the end of the tvbuff, so we should throw
2472 		 * an exception.
2473 		 */
2474 		if (tvb->length < tvb->contained_length) {
2475 			THROW(BoundsError);
2476 		} else if (tvb->length < tvb->reported_length) {
2477 			THROW(ContainedBoundsError);
2478 		} else if (tvb->flags & TVBUFF_FRAGMENT) {
2479 			THROW(FragmentBoundsError);
2480 		} else {
2481 			THROW(ReportedBoundsError);
2482 		}
2483 	}
2484 	return (nul_offset - abs_offset) + 1;
2485 }
2486 
2487 /* UTF-16/UCS-2 version of tvb_strsize */
2488 /* Returns number of bytes including the (two-bytes) null terminator */
2489 guint
tvb_unicode_strsize(tvbuff_t * tvb,const gint offset)2490 tvb_unicode_strsize(tvbuff_t *tvb, const gint offset)
2491 {
2492 	guint     i = 0;
2493 	gunichar2 uchar;
2494 
2495 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2496 
2497 	do {
2498 		/* Endianness doesn't matter when looking for null */
2499 		uchar = tvb_get_ntohs(tvb, offset + i);
2500 		i += 2;
2501 	} while(uchar != 0);
2502 
2503 	return i;
2504 }
2505 
2506 /* Find length of string by looking for end of string ('\0'), up to
2507  * 'maxlength' characters'; if 'maxlength' is -1, searches to end
2508  * of tvbuff.
2509  * Returns -1 if 'maxlength' reached before finding EOS. */
2510 gint
tvb_strnlen(tvbuff_t * tvb,const gint offset,const guint maxlength)2511 tvb_strnlen(tvbuff_t *tvb, const gint offset, const guint maxlength)
2512 {
2513 	gint  result_offset;
2514 	guint abs_offset = 0, junk_length;
2515 
2516 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2517 
2518 	check_offset_length(tvb, offset, 0, &abs_offset, &junk_length);
2519 
2520 	result_offset = tvb_find_guint8(tvb, abs_offset, maxlength, 0);
2521 
2522 	if (result_offset == -1) {
2523 		return -1;
2524 	}
2525 	else {
2526 		return result_offset - abs_offset;
2527 	}
2528 }
2529 
2530 /*
2531  * Implement strneql etc
2532  */
2533 
2534 /*
2535  * Call strncmp after checking if enough chars left, returning 0 if
2536  * it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
2537  */
2538 gint
tvb_strneql(tvbuff_t * tvb,const gint offset,const gchar * str,const size_t size)2539 tvb_strneql(tvbuff_t *tvb, const gint offset, const gchar *str, const size_t size)
2540 {
2541 	const guint8 *ptr;
2542 
2543 	ptr = ensure_contiguous_no_exception(tvb, offset, (gint)size, NULL);
2544 
2545 	if (ptr) {
2546 		int cmp = strncmp((const char *)ptr, str, size);
2547 
2548 		/*
2549 		 * Return 0 if equal, -1 otherwise.
2550 		 */
2551 		return (cmp == 0 ? 0 : -1);
2552 	} else {
2553 		/*
2554 		 * Not enough characters in the tvbuff to match the
2555 		 * string.
2556 		 */
2557 		return -1;
2558 	}
2559 }
2560 
2561 /*
2562  * Call g_ascii_strncasecmp after checking if enough chars left, returning
2563  * 0 if it returns 0 (meaning "equal") and -1 otherwise, otherwise return -1.
2564  */
2565 gint
tvb_strncaseeql(tvbuff_t * tvb,const gint offset,const gchar * str,const size_t size)2566 tvb_strncaseeql(tvbuff_t *tvb, const gint offset, const gchar *str, const size_t size)
2567 {
2568 	const guint8 *ptr;
2569 
2570 	ptr = ensure_contiguous_no_exception(tvb, offset, (gint)size, NULL);
2571 
2572 	if (ptr) {
2573 		int cmp = g_ascii_strncasecmp((const char *)ptr, str, size);
2574 
2575 		/*
2576 		 * Return 0 if equal, -1 otherwise.
2577 		 */
2578 		return (cmp == 0 ? 0 : -1);
2579 	} else {
2580 		/*
2581 		 * Not enough characters in the tvbuff to match the
2582 		 * string.
2583 		 */
2584 		return -1;
2585 	}
2586 }
2587 
2588 /*
2589  * Check that the tvbuff contains at least size bytes, starting at
2590  * offset, and that those bytes are equal to str. Return 0 for success
2591  * and -1 for error. This function does not throw an exception.
2592  */
2593 gint
tvb_memeql(tvbuff_t * tvb,const gint offset,const guint8 * str,size_t size)2594 tvb_memeql(tvbuff_t *tvb, const gint offset, const guint8 *str, size_t size)
2595 {
2596 	const guint8 *ptr;
2597 
2598 	ptr = ensure_contiguous_no_exception(tvb, offset, (gint) size, NULL);
2599 
2600 	if (ptr) {
2601 		int cmp = memcmp(ptr, str, size);
2602 
2603 		/*
2604 		 * Return 0 if equal, -1 otherwise.
2605 		 */
2606 		return (cmp == 0 ? 0 : -1);
2607 	} else {
2608 		/*
2609 		 * Not enough characters in the tvbuff to match the
2610 		 * string.
2611 		 */
2612 		return -1;
2613 	}
2614 }
2615 
2616 /**
2617  * Format the data in the tvb from offset for size.  Returned string is
2618  * wmem packet_scoped so call must be in that scope.
2619  */
2620 gchar *
tvb_format_text(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint size)2621 tvb_format_text(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint size)
2622 {
2623 	const guint8 *ptr;
2624 	gint          len;
2625 
2626 	len = (size > 0) ? size : 0;
2627 
2628 	ptr = ensure_contiguous(tvb, offset, size);
2629 	return format_text(scope, ptr, len);
2630 }
2631 
2632 /*
2633  * Format the data in the tvb from offset for length ...
2634  */
2635 gchar *
tvb_format_text_wsp(wmem_allocator_t * allocator,tvbuff_t * tvb,const gint offset,const gint size)2636 tvb_format_text_wsp(wmem_allocator_t* allocator, tvbuff_t *tvb, const gint offset, const gint size)
2637 {
2638 	const guint8 *ptr;
2639 	gint          len;
2640 
2641 	len = (size > 0) ? size : 0;
2642 
2643 	ptr = ensure_contiguous(tvb, offset, size);
2644 	return format_text_wsp(allocator, ptr, len);
2645 }
2646 
2647 /**
2648  * Like "tvb_format_text()", but for null-padded strings; don't show
2649  * the null padding characters as "\000".  Returned string is wmem packet_scoped
2650  * so call must be in that scope.
2651  */
2652 gchar *
tvb_format_stringzpad(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint size)2653 tvb_format_stringzpad(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint size)
2654 {
2655 	const guint8 *ptr, *p;
2656 	gint          len;
2657 	gint          stringlen;
2658 
2659 	len = (size > 0) ? size : 0;
2660 
2661 	ptr = ensure_contiguous(tvb, offset, size);
2662 	for (p = ptr, stringlen = 0; stringlen < len && *p != '\0'; p++, stringlen++)
2663 		;
2664 	return format_text(scope, ptr, stringlen);
2665 }
2666 
2667 /*
2668  * Like "tvb_format_text_wsp()", but for null-padded strings; don't show
2669  * the null padding characters as "\000".
2670  */
2671 gchar *
tvb_format_stringzpad_wsp(wmem_allocator_t * allocator,tvbuff_t * tvb,const gint offset,const gint size)2672 tvb_format_stringzpad_wsp(wmem_allocator_t* allocator, tvbuff_t *tvb, const gint offset, const gint size)
2673 {
2674 	const guint8 *ptr, *p;
2675 	gint          len;
2676 	gint          stringlen;
2677 
2678 	len = (size > 0) ? size : 0;
2679 
2680 	ptr = ensure_contiguous(tvb, offset, size);
2681 	for (p = ptr, stringlen = 0; stringlen < len && *p != '\0'; p++, stringlen++)
2682 		;
2683 	return format_text_wsp(allocator, ptr, stringlen);
2684 }
2685 
2686 /* Unicode REPLACEMENT CHARACTER */
2687 #define UNREPL 0x00FFFD
2688 
2689 /*
2690  * All string functions below take a scope as an argument.
2691  *
2692  *
2693  * If scope is NULL, memory is allocated with g_malloc() and user must
2694  * explicitly free it with g_free().
2695  * If scope is not NULL, memory is allocated with the corresponding pool
2696  * lifetime.
2697  *
2698  * All functions throw an exception if the tvbuff ends before the string
2699  * does.
2700  */
2701 
2702 /*
2703  * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2704  * of bytes referred to by the tvbuff, offset, and length as an ASCII string,
2705  * with all bytes with the high-order bit set being invalid, and return a
2706  * pointer to a UTF-8 string, allocated using the wmem scope.
2707  *
2708  * Octets with the highest bit set will be converted to the Unicode
2709  * REPLACEMENT CHARACTER.
2710  */
2711 static guint8 *
tvb_get_ascii_string(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length)2712 tvb_get_ascii_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
2713 {
2714 	const guint8  *ptr;
2715 
2716 	ptr = ensure_contiguous(tvb, offset, length);
2717 	return get_ascii_string(scope, ptr, length);
2718 }
2719 
2720 /*
2721  * Given a wmem scope, a tvbuff, an offset, a length, and a translation table,
2722  * treat the string of bytes referred to by the tvbuff, offset, and length
2723  * as a string encoded using one octet per character, with octets with the
2724  * high-order bit clear being mapped by the translation table to 2-byte
2725  * Unicode Basic Multilingual Plane characters (including REPLACEMENT
2726  * CHARACTER) and octets with the high-order bit set being mapped to
2727  * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string,
2728  * allocated using the wmem scope.
2729  *
2730  * Octets with the highest bit set will be converted to the Unicode
2731  * REPLACEMENT CHARACTER.
2732  */
2733 static guint8 *
tvb_get_iso_646_string(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length,const gunichar2 table[0x80])2734 tvb_get_iso_646_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80])
2735 {
2736 	const guint8  *ptr;
2737 
2738 	ptr = ensure_contiguous(tvb, offset, length);
2739 	return get_iso_646_string(scope, ptr, length, table);
2740 }
2741 
2742 /*
2743  * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2744  * of bytes referred to by the tvbuff, the offset. and the length as a UTF-8
2745  * string, and return a pointer to a UTF-8 string, allocated using the wmem
2746  * scope, with all ill-formed sequences replaced with the Unicode REPLACEMENT
2747  * CHARACTER according to the recommended "best practices" given in the Unicode
2748  * Standard and specified by W3C/WHATWG.
2749  *
2750  * Note that in conformance with the Unicode Standard, this treats three
2751  * byte sequences corresponding to UTF-16 surrogate halves (paired or unpaired)
2752  * and two byte overlong encodings of 7-bit ASCII characters as invalid and
2753  * substitutes REPLACEMENT CHARACTER for them. Explicit support for nonstandard
2754  * derivative encoding formats (e.g. CESU-8, Java Modified UTF-8, WTF-8) could
2755  * be added later.
2756  */
2757 static guint8 *
tvb_get_utf_8_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint length)2758 tvb_get_utf_8_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint length)
2759 {
2760 	const guint8  *ptr;
2761 
2762 	ptr = ensure_contiguous(tvb, offset, length);
2763 	return get_utf_8_string(scope, ptr, length);
2764 }
2765 
2766 /*
2767  * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2768  * of bytes referred to by the tvbuff, the offset, and the length as a
2769  * raw string, and return a pointer to that string, allocated using the
2770  * wmem scope. This means a null is appended at the end, but no replacement
2771  * checking is done otherwise, unlike tvb_get_utf_8_string().
2772  *
2773  * Also, this one allows a length of -1 to mean get all, but does not
2774  * allow a negative offset.
2775  */
2776 static inline guint8 *
tvb_get_raw_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint length)2777 tvb_get_raw_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint length)
2778 {
2779 	guint8 *strbuf;
2780 	gint    abs_length = length;
2781 
2782 	DISSECTOR_ASSERT(offset     >=  0);
2783 	DISSECTOR_ASSERT(abs_length >= -1);
2784 
2785 	if (abs_length < 0)
2786 		abs_length = tvb->length - offset;
2787 
2788 	tvb_ensure_bytes_exist(tvb, offset, abs_length);
2789 	strbuf = (guint8 *)wmem_alloc(scope, abs_length + 1);
2790 	tvb_memcpy(tvb, strbuf, offset, abs_length);
2791 	strbuf[abs_length] = '\0';
2792 	return strbuf;
2793 }
2794 
2795 /*
2796  * Given a wmem scope, a tvbuff, an offset, and a length, treat the string
2797  * of bytes referred to by the tvbuff, the offset, and the length as an
2798  * ISO 8859/1 string, and return a pointer to a UTF-8 string, allocated
2799  * using the wmem scope.
2800  */
2801 static guint8 *
tvb_get_string_8859_1(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length)2802 tvb_get_string_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
2803 {
2804 	const guint8  *ptr;
2805 
2806 	ptr = ensure_contiguous(tvb, offset, length);
2807 	return get_8859_1_string(scope, ptr, length);
2808 }
2809 
2810 /*
2811  * Given a wmem scope, a tvbuff, an offset, a length, and a translation
2812  * table, treat the string of bytes referred to by the tvbuff, the offset,
2813  * and the length as a string encoded using one octet per character, with
2814  * octets with the high-order bit clear being ASCII and octets with the
2815  * high-order bit set being mapped by the translation table to 2-byte
2816  * Unicode Basic Multilingual Plane characters (including REPLACEMENT
2817  * CHARACTER), and return a pointer to a UTF-8 string, allocated with the
2818  * wmem scope.
2819  */
2820 static guint8 *
tvb_get_string_unichar2(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length,const gunichar2 table[0x80])2821 tvb_get_string_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[0x80])
2822 {
2823 	const guint8  *ptr;
2824 
2825 	ptr = ensure_contiguous(tvb, offset, length);
2826 	return get_unichar2_string(scope, ptr, length, table);
2827 }
2828 
2829 /*
2830  * Given a wmem scope, a tvbuff, an offset, a length, and an encoding
2831  * giving the byte order, treat the string of bytes referred to by the
2832  * tvbuff, the offset, and the length as a UCS-2 encoded string in
2833  * the byte order in question, containing characters from the Basic
2834  * Multilingual Plane (plane 0) of Unicode, and return a pointer to a
2835  * UTF-8 string, allocated with the wmem scope.
2836  *
2837  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN.
2838  *
2839  * Specify length in bytes.
2840  *
2841  * XXX - should map lead and trail surrogate values to REPLACEMENT
2842  * CHARACTERs (0xFFFD)?
2843  * XXX - if there are an odd number of bytes, should put a
2844  * REPLACEMENT CHARACTER at the end.
2845  */
2846 static guint8 *
tvb_get_ucs_2_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint length,const guint encoding)2847 tvb_get_ucs_2_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
2848 {
2849 	const guint8  *ptr;
2850 
2851 	ptr = ensure_contiguous(tvb, offset, length);
2852 	return get_ucs_2_string(scope, ptr, length, encoding);
2853 }
2854 
2855 /*
2856  * Given a wmem scope, a tvbuff, an offset, a length, and an encoding
2857  * giving the byte order, treat the string of bytes referred to by the
2858  * tvbuff, the offset, and the length as a UTF-16 encoded string in
2859  * the byte order in question, and return a pointer to a UTF-8 string,
2860  * allocated with the wmem scope.
2861  *
2862  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN.
2863  *
2864  * Specify length in bytes.
2865  *
2866  * XXX - should map surrogate errors to REPLACEMENT CHARACTERs (0xFFFD).
2867  * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
2868  * XXX - if there are an odd number of bytes, should put a
2869  * REPLACEMENT CHARACTER at the end.
2870  */
2871 static guint8 *
tvb_get_utf_16_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint length,const guint encoding)2872 tvb_get_utf_16_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
2873 {
2874 	const guint8  *ptr;
2875 
2876 	ptr = ensure_contiguous(tvb, offset, length);
2877 	return get_utf_16_string(scope, ptr, length, encoding);
2878 }
2879 
2880 /*
2881  * Given a wmem scope, a tvbuff, an offset, a length, and an encoding
2882  * giving the byte order, treat the string of bytes referred to by the
2883  * tvbuff, the offset, and the length as a UCS-4 encoded string in
2884  * the byte order in question, and return a pointer to a UTF-8 string,
2885  * allocated with the wmem scope.
2886  *
2887  * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN
2888  *
2889  * Specify length in bytes
2890  *
2891  * XXX - should map lead and trail surrogate values to a "substitute"
2892  * UTF-8 character?
2893  * XXX - should map code points > 10FFFF to REPLACEMENT CHARACTERs.
2894  * XXX - if the number of bytes isn't a multiple of 4, should put a
2895  * REPLACEMENT CHARACTER at the end.
2896  */
2897 static gchar *
tvb_get_ucs_4_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint length,const guint encoding)2898 tvb_get_ucs_4_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint length, const guint encoding)
2899 {
2900 	const guint8 *ptr;
2901 
2902 	ptr = ensure_contiguous(tvb, offset, length);
2903 	return get_ucs_4_string(scope, ptr, length, encoding);
2904 }
2905 
2906 gchar *
tvb_get_ts_23_038_7bits_string_packed(wmem_allocator_t * scope,tvbuff_t * tvb,const gint bit_offset,gint no_of_chars)2907 tvb_get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, tvbuff_t *tvb,
2908 	const gint bit_offset, gint no_of_chars)
2909 {
2910 	gint           in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
2911 	gint           length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3;
2912 	const guint8  *ptr;
2913 
2914 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2915 
2916 	ptr = ensure_contiguous(tvb, in_offset, length);
2917 	return get_ts_23_038_7bits_string_packed(scope, ptr, bit_offset, no_of_chars);
2918 }
2919 
2920 gchar *
tvb_get_ts_23_038_7bits_string_unpacked(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint length)2921 tvb_get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, tvbuff_t *tvb,
2922 	const gint offset, gint length)
2923 {
2924 	const guint8  *ptr;
2925 
2926 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2927 
2928 	ptr = ensure_contiguous(tvb, offset, length);
2929 	return get_ts_23_038_7bits_string_unpacked(scope, ptr, length);
2930 }
2931 
2932 gchar *
tvb_get_etsi_ts_102_221_annex_a_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint length)2933 tvb_get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, tvbuff_t *tvb,
2934 	const gint offset, gint length)
2935 {
2936 	const guint8  *ptr;
2937 
2938 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2939 
2940 	ptr = ensure_contiguous(tvb, offset, length);
2941 	return get_etsi_ts_102_221_annex_a_string(scope, ptr, length);
2942 }
2943 
2944 gchar *
tvb_get_ascii_7bits_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint bit_offset,gint no_of_chars)2945 tvb_get_ascii_7bits_string(wmem_allocator_t *scope, tvbuff_t *tvb,
2946 	const gint bit_offset, gint no_of_chars)
2947 {
2948 	gint           in_offset = bit_offset >> 3; /* Current pointer to the input buffer */
2949 	gint           length = ((no_of_chars + 1) * 7 + (bit_offset & 0x07)) >> 3;
2950 	const guint8  *ptr;
2951 
2952 	DISSECTOR_ASSERT(tvb && tvb->initialized);
2953 
2954 	ptr = ensure_contiguous(tvb, in_offset, length);
2955 	return get_ascii_7bits_string(scope, ptr, bit_offset, no_of_chars);
2956 }
2957 
2958 /*
2959  * Given a wmem scope, a tvbuff, an offset, a length, and a translation
2960  * table, treat the string of bytes referred to by the tvbuff, the offset,
2961  * and the length as a string encoded using one octet per character, with
2962  * octets being mapped by the translation table to 2-byte Unicode Basic
2963  * Multilingual Plane characters (including REPLACEMENT CHARACTER), and
2964  * return a pointer to a UTF-8 string, allocated with the wmem scope.
2965  */
2966 static guint8 *
tvb_get_nonascii_unichar2_string(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length,const gunichar2 table[256])2967 tvb_get_nonascii_unichar2_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length, const gunichar2 table[256])
2968 {
2969 	const guint8  *ptr;
2970 
2971 	ptr = ensure_contiguous(tvb, offset, length);
2972 	return get_nonascii_unichar2_string(scope, ptr, length, table);
2973 }
2974 
2975 /*
2976  * Given a wmem scope, a tvbuff, an offset, and a length, treat the bytes
2977  * referred to by the tvbuff, offset, and length as a GB18030 encoded string,
2978  * and return a pointer to a UTF-8 string, allocated with the wmem scope,
2979  * converted having substituted REPLACEMENT CHARACTER according to the
2980  * Unicode Standard 5.22 U+FFFD Substitution for Conversion.
2981  * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
2982  *
2983  * As expected, this will also decode GBK and GB2312 strings.
2984  */
2985 static guint8 *
tvb_get_gb18030_string(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length)2986 tvb_get_gb18030_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
2987 {
2988 	const guint8  *ptr;
2989 
2990 	ptr = ensure_contiguous(tvb, offset, length);
2991 	return get_gb18030_string(scope, ptr, length);
2992 }
2993 
2994 /*
2995  * Given a wmem scope, a tvbuff, an offset, and a length, treat the bytes
2996  * referred to by the tvbuff, offset, and length as a EUC-KR encoded string,
2997  * and return a pointer to a UTF-8 string, allocated with the wmem scope,
2998  * converted having substituted REPLACEMENT CHARACTER according to the
2999  * Unicode Standard 5.22 U+FFFD Substitution for Conversion.
3000  * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf )
3001  */
3002 static guint8 *
tvb_get_euc_kr_string(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length)3003 tvb_get_euc_kr_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
3004 {
3005 	const guint8  *ptr;
3006 
3007 	ptr = ensure_contiguous(tvb, offset, length);
3008 	return get_euc_kr_string(scope, ptr, length);
3009 }
3010 
3011 static guint8 *
tvb_get_t61_string(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint length)3012 tvb_get_t61_string(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint length)
3013 {
3014 	const guint8  *ptr;
3015 
3016 	ptr = ensure_contiguous(tvb, offset, length);
3017 	return get_t61_string(scope, ptr, length);
3018 }
3019 
3020 /*
3021  * Encoding tables for BCD strings.
3022  */
3023 static const dgt_set_t Dgt0_9_bcd = {
3024 	{
3025 		/*  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e  f */
3026 		   '0','1','2','3','4','5','6','7','8','9','?','?','?','?','?','?'
3027 	}
3028 };
3029 
3030 static const dgt_set_t Dgt_keypad_abc_tbcd = {
3031 	{
3032 		/*  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e  f */
3033 		   '0','1','2','3','4','5','6','7','8','9','*','#','a','b','c','?'
3034 	}
3035 };
3036 
3037 static const dgt_set_t Dgt_ansi_tbcd = {
3038 	{
3039 		/*  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e  f */
3040 		   '0','1','2','3','4','5','6','7','8','9','?','B','C','*','#','?'
3041 	}
3042 };
3043 
3044 static guint8 *
tvb_get_apn_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint length)3045 tvb_get_apn_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
3046 			     gint length)
3047 {
3048 	wmem_strbuf_t *str;
3049 
3050 	/*
3051 	 * This is a domain name.
3052 	 *
3053 	 * 3GPP TS 23.003, section 19.4.2 "Fully Qualified Domain Names
3054 	 * (FQDNs)", subsection 19.4.2.1 "General", says:
3055 	 *
3056 	 *    The encoding of any identifier used as part of a Fully
3057 	 *    Qualifed Domain Name (FQDN) shall follow the Name Syntax
3058 	 *    defined in IETF RFC 2181 [18], IETF RFC 1035 [19] and
3059 	 *    IETF RFC 1123 [20].  An FQDN consists of one or more
3060 	 *    labels. Each label is coded as a one octet length field
3061 	 *    followed by that number of octets coded as 8 bit ASCII
3062 	 *    characters.
3063 	 *
3064 	 * so this does not appear to use full-blown DNS compression -
3065 	 * the upper 2 bits of the length don't indicate that it's a
3066 	 * pointer or an extended label (RFC 2673).
3067 	 */
3068 	str = wmem_strbuf_sized_new(scope, length + 1, 0);
3069 	if (length > 0) {
3070 		const guint8 *ptr;
3071 
3072 		ptr = ensure_contiguous(tvb, offset, length);
3073 
3074 		for (;;) {
3075 			guint label_len;
3076 
3077 			/*
3078 			 * Process this label.
3079 			 */
3080 			label_len = *ptr;
3081 			ptr++;
3082 			length--;
3083 
3084 			while (label_len != 0) {
3085 				guint8 ch;
3086 
3087 				if (length == 0)
3088 					goto end;
3089 
3090 				ch = *ptr;
3091 				if (ch < 0x80)
3092 					wmem_strbuf_append_c(str, ch);
3093 				else
3094 					wmem_strbuf_append_unichar(str, UNREPL);
3095 				ptr++;
3096 				label_len--;
3097 				length--;
3098 			}
3099 
3100 			if (length == 0)
3101 				goto end;
3102 
3103 			wmem_strbuf_append_c(str, '.');
3104 		}
3105 	}
3106 
3107 end:
3108 	return (guint8 *) wmem_strbuf_finalize(str);
3109 }
3110 
3111 /*
3112  * Given a tvbuff, an offset, a length, and an encoding, allocate a
3113  * buffer big enough to hold a non-null-terminated string of that length
3114  * at that offset, plus a trailing '\0', copy into the buffer the
3115  * string as converted from the appropriate encoding to UTF-8, and
3116  * return a pointer to the string.
3117  */
3118 guint8 *
tvb_get_string_enc(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint length,const guint encoding)3119 tvb_get_string_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
3120 			     const gint length, const guint encoding)
3121 {
3122 	guint8 *strptr;
3123 	gboolean odd, skip_first;
3124 
3125 	DISSECTOR_ASSERT(tvb && tvb->initialized);
3126 
3127 	/* make sure length = -1 fails */
3128 	if (length < 0) {
3129 		THROW(ReportedBoundsError);
3130 	}
3131 
3132 	switch (encoding & ENC_CHARENCODING_MASK) {
3133 
3134 	case ENC_ASCII:
3135 	default:
3136 		/*
3137 		 * For now, we treat bogus values as meaning
3138 		 * "ASCII" rather than reporting an error,
3139 		 * for the benefit of old dissectors written
3140 		 * when the last argument to proto_tree_add_item()
3141 		 * was a gboolean for the byte order, not an
3142 		 * encoding value, and passed non-zero values
3143 		 * other than TRUE to mean "little-endian".
3144 		 */
3145 		strptr = tvb_get_ascii_string(scope, tvb, offset, length);
3146 		break;
3147 
3148 	case ENC_UTF_8:
3149 		strptr = tvb_get_utf_8_string(scope, tvb, offset, length);
3150 		break;
3151 
3152 	case ENC_UTF_16:
3153 		strptr = tvb_get_utf_16_string(scope, tvb, offset, length,
3154 		    encoding & ENC_LITTLE_ENDIAN);
3155 		break;
3156 
3157 	case ENC_UCS_2:
3158 		strptr = tvb_get_ucs_2_string(scope, tvb, offset, length,
3159 		    encoding & ENC_LITTLE_ENDIAN);
3160 		break;
3161 
3162 	case ENC_UCS_4:
3163 		strptr = tvb_get_ucs_4_string(scope, tvb, offset, length,
3164 		    encoding & ENC_LITTLE_ENDIAN);
3165 		break;
3166 
3167 	case ENC_ISO_8859_1:
3168 		/*
3169 		 * ISO 8859-1 printable code point values are equal
3170 		 * to the equivalent Unicode code point value, so
3171 		 * no translation table is needed.
3172 		 */
3173 		strptr = tvb_get_string_8859_1(scope, tvb, offset, length);
3174 		break;
3175 
3176 	case ENC_ISO_8859_2:
3177 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_2);
3178 		break;
3179 
3180 	case ENC_ISO_8859_3:
3181 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_3);
3182 		break;
3183 
3184 	case ENC_ISO_8859_4:
3185 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_4);
3186 		break;
3187 
3188 	case ENC_ISO_8859_5:
3189 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_5);
3190 		break;
3191 
3192 	case ENC_ISO_8859_6:
3193 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_6);
3194 		break;
3195 
3196 	case ENC_ISO_8859_7:
3197 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_7);
3198 		break;
3199 
3200 	case ENC_ISO_8859_8:
3201 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_8);
3202 		break;
3203 
3204 	case ENC_ISO_8859_9:
3205 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_9);
3206 		break;
3207 
3208 	case ENC_ISO_8859_10:
3209 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_10);
3210 		break;
3211 
3212 	case ENC_ISO_8859_11:
3213 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_11);
3214 		break;
3215 
3216 	case ENC_ISO_8859_13:
3217 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_13);
3218 		break;
3219 
3220 	case ENC_ISO_8859_14:
3221 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_14);
3222 		break;
3223 
3224 	case ENC_ISO_8859_15:
3225 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_15);
3226 		break;
3227 
3228 	case ENC_ISO_8859_16:
3229 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_iso_8859_16);
3230 		break;
3231 
3232 	case ENC_WINDOWS_1250:
3233 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1250);
3234 		break;
3235 
3236 	case ENC_WINDOWS_1251:
3237 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1251);
3238 		break;
3239 
3240 	case ENC_WINDOWS_1252:
3241 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp1252);
3242 		break;
3243 
3244 	case ENC_MAC_ROMAN:
3245 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_mac_roman);
3246 		break;
3247 
3248 	case ENC_CP437:
3249 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp437);
3250 		break;
3251 
3252 	case ENC_CP855:
3253 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp855);
3254 		break;
3255 
3256 	case ENC_CP866:
3257 		strptr = tvb_get_string_unichar2(scope, tvb, offset, length, charset_table_cp866);
3258 		break;
3259 
3260 	case ENC_ISO_646_BASIC:
3261 		strptr = tvb_get_iso_646_string(scope, tvb, offset, length, charset_table_iso_646_basic);
3262 		break;
3263 
3264 	case ENC_3GPP_TS_23_038_7BITS_PACKED:
3265 		{
3266 			gint bit_offset  = offset << 3;
3267 			gint no_of_chars = (length << 3) / 7;
3268 			strptr = tvb_get_ts_23_038_7bits_string_packed(scope, tvb, bit_offset, no_of_chars);
3269 		}
3270 		break;
3271 
3272 	case ENC_ASCII_7BITS:
3273 		{
3274 			gint bit_offset  = offset << 3;
3275 			gint no_of_chars = (length << 3) / 7;
3276 			strptr = tvb_get_ascii_7bits_string(scope, tvb, bit_offset, no_of_chars);
3277 		}
3278 		break;
3279 
3280 	case ENC_EBCDIC:
3281 		/*
3282 		 * "Common" EBCDIC, covering all characters with the
3283 		 * same code point in all Roman-alphabet EBCDIC code
3284 		 * pages.
3285 		 */
3286 		strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic);
3287 		break;
3288 
3289 	case ENC_EBCDIC_CP037:
3290 		/*
3291 		 * EBCDIC code page 037.
3292 		 */
3293 		strptr = tvb_get_nonascii_unichar2_string(scope, tvb, offset, length, charset_table_ebcdic_cp037);
3294 		break;
3295 
3296 	case ENC_T61:
3297 		strptr = tvb_get_t61_string(scope, tvb, offset, length);
3298 		break;
3299 
3300 	case ENC_BCD_DIGITS_0_9:
3301 		/*
3302 		 * Packed BCD, with digits 0-9.
3303 		 */
3304 		odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3305 		skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3306 		strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt0_9_bcd, skip_first, odd, FALSE);
3307 		break;
3308 
3309 	case ENC_KEYPAD_ABC_TBCD:
3310 		/*
3311 		 * Keypad-with-a/b/c "telephony BCD" - packed BCD, with
3312 		 * digits 0-9 and symbols *, #, a, b, and c.
3313 		 */
3314 		odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3315 		skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3316 		strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt_keypad_abc_tbcd, skip_first, odd, FALSE);
3317 		break;
3318 
3319 	case ENC_KEYPAD_BC_TBCD:
3320 		/*
3321 		 * Keypad-with-B/C "telephony BCD" - packed BCD, with
3322 		 * digits 0-9 and symbols B, C, *, and #.
3323 		 */
3324 		odd = (encoding & ENC_BCD_ODD_NUM_DIG) >> 16;
3325 		skip_first = (encoding & ENC_BCD_SKIP_FIRST) >> 17;
3326 		strptr = tvb_get_bcd_string(scope, tvb, offset, length, &Dgt_ansi_tbcd, skip_first, odd, FALSE);
3327 		break;
3328 
3329 	case ENC_3GPP_TS_23_038_7BITS_UNPACKED:
3330 		strptr = tvb_get_ts_23_038_7bits_string_unpacked(scope, tvb, offset, length);
3331 		break;
3332 
3333 	case ENC_ETSI_TS_102_221_ANNEX_A:
3334 		strptr = tvb_get_etsi_ts_102_221_annex_a_string(scope, tvb, offset, length);
3335 		break;
3336 
3337 	case ENC_GB18030:
3338 		strptr = tvb_get_gb18030_string(scope, tvb, offset, length);
3339 		break;
3340 
3341 	case ENC_EUC_KR:
3342 		strptr = tvb_get_euc_kr_string(scope, tvb, offset, length);
3343 		break;
3344 
3345 	case ENC_APN_STR:
3346 		strptr = tvb_get_apn_string(scope, tvb, offset, length);
3347 		break;
3348 	}
3349 	return strptr;
3350 }
3351 
3352 /*
3353  * This is like tvb_get_string_enc(), except that it handles null-padded
3354  * strings.
3355  *
3356  * Currently, string values are stored as UTF-8 null-terminated strings,
3357  * so nothing needs to be done differently for null-padded strings; we
3358  * could save a little memory by not storing the null padding.
3359  *
3360  * If we ever store string values differently, in a fashion that doesn't
3361  * involve null termination, that might change.
3362  */
3363 guint8 *
tvb_get_stringzpad(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint length,const guint encoding)3364 tvb_get_stringzpad(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset,
3365 		   const gint length, const guint encoding)
3366 {
3367 	return tvb_get_string_enc(scope, tvb, offset, length, encoding);
3368 }
3369 
3370 /*
3371  * These routines are like the above routines, except that they handle
3372  * null-terminated strings.  They find the length of that string (and
3373  * throw an exception if the tvbuff ends before we find the null), and
3374  * also return through a pointer the length of the string, in bytes,
3375  * including the terminating null (the terminating null being 2 bytes
3376  * for UCS-2 and UTF-16, 4 bytes for UCS-4, and 1 byte for other
3377  * encodings).
3378  */
3379 static guint8 *
tvb_get_ascii_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp)3380 tvb_get_ascii_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
3381 {
3382 	guint	       size;
3383 	const guint8  *ptr;
3384 
3385 	size = tvb_strsize(tvb, offset);
3386 	ptr  = ensure_contiguous(tvb, offset, size);
3387 	/* XXX, conversion between signed/unsigned integer */
3388 	if (lengthp)
3389 		*lengthp = size;
3390 	return get_ascii_string(scope, ptr, size);
3391 }
3392 
3393 static guint8 *
tvb_get_iso_646_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp,const gunichar2 table[0x80])3394 tvb_get_iso_646_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80])
3395 {
3396 	guint	       size;
3397 	const guint8  *ptr;
3398 
3399 	size = tvb_strsize(tvb, offset);
3400 	ptr  = ensure_contiguous(tvb, offset, size);
3401 	/* XXX, conversion between signed/unsigned integer */
3402 	if (lengthp)
3403 		*lengthp = size;
3404 	return get_iso_646_string(scope, ptr, size, table);
3405 }
3406 
3407 static guint8 *
tvb_get_utf_8_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint * lengthp)3408 tvb_get_utf_8_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp)
3409 {
3410 	guint   size;
3411 	const guint8  *ptr;
3412 
3413 	size   = tvb_strsize(tvb, offset);
3414 	ptr = ensure_contiguous(tvb, offset, size);
3415 	/* XXX, conversion between signed/unsigned integer */
3416 	if (lengthp)
3417 		*lengthp = size;
3418 	return get_utf_8_string(scope, ptr, size);
3419 }
3420 
3421 static guint8 *
tvb_get_stringz_8859_1(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp)3422 tvb_get_stringz_8859_1(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
3423 {
3424 	guint size;
3425 	const guint8  *ptr;
3426 
3427 	size = tvb_strsize(tvb, offset);
3428 	ptr = ensure_contiguous(tvb, offset, size);
3429 	/* XXX, conversion between signed/unsigned integer */
3430 	if (lengthp)
3431 		*lengthp = size;
3432 	return get_8859_1_string(scope, ptr, size);
3433 }
3434 
3435 static guint8 *
tvb_get_stringz_unichar2(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp,const gunichar2 table[0x80])3436 tvb_get_stringz_unichar2(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[0x80])
3437 {
3438 	guint size;
3439 	const guint8  *ptr;
3440 
3441 	size = tvb_strsize(tvb, offset);
3442 	ptr = ensure_contiguous(tvb, offset, size);
3443 	/* XXX, conversion between signed/unsigned integer */
3444 	if (lengthp)
3445 		*lengthp = size;
3446 	return get_unichar2_string(scope, ptr, size, table);
3447 }
3448 
3449 /*
3450  * Given a tvbuff and an offset, with the offset assumed to refer to
3451  * a null-terminated string, find the length of that string (and throw
3452  * an exception if the tvbuff ends before we find the null), ensure that
3453  * the TVB is flat, and return a pointer to the string (in the TVB).
3454  * Also return the length of the string (including the terminating null)
3455  * through a pointer.
3456  *
3457  * As long as we aren't using composite TVBs, this saves the cycles used
3458  * (often unnecessariliy) in allocating a buffer and copying the string into
3459  * it.  (If we do start using composite TVBs, we may want to replace this
3460  * function with the _ephemeral version.)
3461  */
3462 const guint8 *
tvb_get_const_stringz(tvbuff_t * tvb,const gint offset,gint * lengthp)3463 tvb_get_const_stringz(tvbuff_t *tvb, const gint offset, gint *lengthp)
3464 {
3465 	guint         size;
3466 	const guint8 *strptr;
3467 
3468 	size   = tvb_strsize(tvb, offset);
3469 	strptr = ensure_contiguous(tvb, offset, size);
3470 	if (lengthp)
3471 		*lengthp = size;
3472 	return strptr;
3473 }
3474 
3475 static gchar *
tvb_get_ucs_2_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint * lengthp,const guint encoding)3476 tvb_get_ucs_2_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
3477 {
3478 	gint           size;    /* Number of bytes in string */
3479 	const guint8  *ptr;
3480 
3481 	size = tvb_unicode_strsize(tvb, offset);
3482 	ptr = ensure_contiguous(tvb, offset, size);
3483 	/* XXX, conversion between signed/unsigned integer */
3484 	if (lengthp)
3485 		*lengthp = size;
3486 	return get_ucs_2_string(scope, ptr, size, encoding);
3487 }
3488 
3489 static gchar *
tvb_get_utf_16_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint * lengthp,const guint encoding)3490 tvb_get_utf_16_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
3491 {
3492 	gint           size;
3493 	const guint8  *ptr;
3494 
3495 	size = tvb_unicode_strsize(tvb, offset);
3496 	ptr = ensure_contiguous(tvb, offset, size);
3497 	/* XXX, conversion between signed/unsigned integer */
3498 	if (lengthp)
3499 		*lengthp = size;
3500 	return get_utf_16_string(scope, ptr, size, encoding);
3501 }
3502 
3503 static gchar *
tvb_get_ucs_4_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint * lengthp,const guint encoding)3504 tvb_get_ucs_4_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
3505 {
3506 	gint           size;
3507 	gunichar       uchar;
3508 	const guint8  *ptr;
3509 
3510 	size = 0;
3511 	do {
3512 		/* Endianness doesn't matter when looking for null */
3513 		uchar = tvb_get_ntohl(tvb, offset + size);
3514 		size += 4;
3515 	} while(uchar != 0);
3516 
3517 	ptr = ensure_contiguous(tvb, offset, size);
3518 	/* XXX, conversion between signed/unsigned integer */
3519 	if (lengthp)
3520 		*lengthp = size;
3521 	return get_ucs_4_string(scope, ptr, size, encoding);
3522 }
3523 
3524 static guint8 *
tvb_get_nonascii_unichar2_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp,const gunichar2 table[256])3525 tvb_get_nonascii_unichar2_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp, const gunichar2 table[256])
3526 {
3527 	guint	       size;
3528 	const guint8  *ptr;
3529 
3530 	size = tvb_strsize(tvb, offset);
3531 	ptr  = ensure_contiguous(tvb, offset, size);
3532 	/* XXX, conversion between signed/unsigned integer */
3533 	if (lengthp)
3534 		*lengthp = size;
3535 	return get_nonascii_unichar2_string(scope, ptr, size, table);
3536 }
3537 
3538 static guint8 *
tvb_get_t61_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp)3539 tvb_get_t61_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
3540 {
3541 	guint	       size;
3542 	const guint8  *ptr;
3543 
3544 	size = tvb_strsize(tvb, offset);
3545 	ptr  = ensure_contiguous(tvb, offset, size);
3546 	/* XXX, conversion between signed/unsigned integer */
3547 	if (lengthp)
3548 		*lengthp = size;
3549 	return get_t61_string(scope, ptr, size);
3550 }
3551 
3552 static guint8 *
tvb_get_gb18030_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp)3553 tvb_get_gb18030_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint *lengthp)
3554 {
3555 	guint          size;
3556 	const guint8  *ptr;
3557 
3558 	size = tvb_strsize(tvb, offset);
3559 	ptr  = ensure_contiguous(tvb, offset, size);
3560 	/* XXX, conversion between signed/unsigned integer */
3561 	if (lengthp)
3562 		*lengthp = size;
3563 	return get_gb18030_string(scope, ptr, size);
3564 }
3565 
3566 static guint8 *
tvb_get_euc_kr_stringz(wmem_allocator_t * scope,tvbuff_t * tvb,gint offset,gint * lengthp)3567 tvb_get_euc_kr_stringz(wmem_allocator_t *scope, tvbuff_t *tvb, gint offset, gint  *lengthp)
3568 {
3569 	guint          size;
3570 	const guint8  *ptr;
3571 
3572 	size = tvb_strsize(tvb, offset);
3573 	ptr  = ensure_contiguous(tvb, offset, size);
3574 	/* XXX, conversion between signed/unsigned integer */
3575 	if (lengthp)
3576 		*lengthp = size;
3577 	return get_euc_kr_string(scope, ptr, size);
3578 }
3579 
3580 guint8 *
tvb_get_stringz_enc(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint * lengthp,const guint encoding)3581 tvb_get_stringz_enc(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint *lengthp, const guint encoding)
3582 {
3583 	guint8 *strptr;
3584 
3585 	DISSECTOR_ASSERT(tvb && tvb->initialized);
3586 
3587 	switch (encoding & ENC_CHARENCODING_MASK) {
3588 
3589 	case ENC_ASCII:
3590 	default:
3591 		/*
3592 		 * For now, we treat bogus values as meaning
3593 		 * "ASCII" rather than reporting an error,
3594 		 * for the benefit of old dissectors written
3595 		 * when the last argument to proto_tree_add_item()
3596 		 * was a gboolean for the byte order, not an
3597 		 * encoding value, and passed non-zero values
3598 		 * other than TRUE to mean "little-endian".
3599 		 */
3600 		strptr = tvb_get_ascii_stringz(scope, tvb, offset, lengthp);
3601 		break;
3602 
3603 	case ENC_UTF_8:
3604 		/*
3605 		 * XXX - should map all invalid UTF-8 sequences
3606 		 * to a "substitute" UTF-8 character.
3607 		 * XXX - should map code points > 10FFFF to REPLACEMENT
3608 		 * CHARACTERs.
3609 		 */
3610 		strptr = tvb_get_utf_8_stringz(scope, tvb, offset, lengthp);
3611 		break;
3612 
3613 	case ENC_UTF_16:
3614 		strptr = tvb_get_utf_16_stringz(scope, tvb, offset, lengthp,
3615 		    encoding & ENC_LITTLE_ENDIAN);
3616 		break;
3617 
3618 	case ENC_UCS_2:
3619 		strptr = tvb_get_ucs_2_stringz(scope, tvb, offset, lengthp,
3620 		    encoding & ENC_LITTLE_ENDIAN);
3621 		break;
3622 
3623 	case ENC_UCS_4:
3624 		strptr = tvb_get_ucs_4_stringz(scope, tvb, offset, lengthp,
3625 		    encoding & ENC_LITTLE_ENDIAN);
3626 		break;
3627 
3628 	case ENC_ISO_8859_1:
3629 		/*
3630 		 * ISO 8859-1 printable code point values are equal
3631 		 * to the equivalent Unicode code point value, so
3632 		 * no translation table is needed.
3633 		 */
3634 		strptr = tvb_get_stringz_8859_1(scope, tvb, offset, lengthp);
3635 		break;
3636 
3637 	case ENC_ISO_8859_2:
3638 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_2);
3639 		break;
3640 
3641 	case ENC_ISO_8859_3:
3642 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_3);
3643 		break;
3644 
3645 	case ENC_ISO_8859_4:
3646 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_4);
3647 		break;
3648 
3649 	case ENC_ISO_8859_5:
3650 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_5);
3651 		break;
3652 
3653 	case ENC_ISO_8859_6:
3654 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_6);
3655 		break;
3656 
3657 	case ENC_ISO_8859_7:
3658 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_7);
3659 		break;
3660 
3661 	case ENC_ISO_8859_8:
3662 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_8);
3663 		break;
3664 
3665 	case ENC_ISO_8859_9:
3666 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_9);
3667 		break;
3668 
3669 	case ENC_ISO_8859_10:
3670 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_10);
3671 		break;
3672 
3673 	case ENC_ISO_8859_11:
3674 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_11);
3675 		break;
3676 
3677 	case ENC_ISO_8859_13:
3678 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_13);
3679 		break;
3680 
3681 	case ENC_ISO_8859_14:
3682 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_14);
3683 		break;
3684 
3685 	case ENC_ISO_8859_15:
3686 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_15);
3687 		break;
3688 
3689 	case ENC_ISO_8859_16:
3690 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_iso_8859_16);
3691 		break;
3692 
3693 	case ENC_WINDOWS_1250:
3694 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1250);
3695 		break;
3696 
3697 	case ENC_WINDOWS_1251:
3698 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1251);
3699 		break;
3700 
3701 	case ENC_WINDOWS_1252:
3702 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp1252);
3703 		break;
3704 
3705 	case ENC_MAC_ROMAN:
3706 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_mac_roman);
3707 		break;
3708 
3709 	case ENC_CP437:
3710 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp437);
3711 		break;
3712 
3713 	case ENC_CP855:
3714 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp855);
3715 		break;
3716 
3717 	case ENC_CP866:
3718 		strptr = tvb_get_stringz_unichar2(scope, tvb, offset, lengthp, charset_table_cp866);
3719 		break;
3720 
3721 	case ENC_ISO_646_BASIC:
3722 		strptr = tvb_get_iso_646_stringz(scope, tvb, offset, lengthp, charset_table_iso_646_basic);
3723 		break;
3724 
3725 	case ENC_3GPP_TS_23_038_7BITS_PACKED:
3726 	case ENC_3GPP_TS_23_038_7BITS_UNPACKED:
3727 	case ENC_ETSI_TS_102_221_ANNEX_A:
3728 		REPORT_DISSECTOR_BUG("TS 23.038 7bits has no null character and doesn't support null-terminated strings");
3729 		break;
3730 
3731 	case ENC_ASCII_7BITS:
3732 		REPORT_DISSECTOR_BUG("tvb_get_stringz_enc function with ENC_ASCII_7BITS not implemented yet");
3733 		break;
3734 
3735 	case ENC_EBCDIC:
3736 		/*
3737 		 * "Common" EBCDIC, covering all characters with the
3738 		 * same code point in all Roman-alphabet EBCDIC code
3739 		 * pages.
3740 		 */
3741 		strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic);
3742 		break;
3743 
3744 	case ENC_EBCDIC_CP037:
3745 		/*
3746 		 * EBCDIC code page 037.
3747 		 */
3748 		strptr = tvb_get_nonascii_unichar2_stringz(scope, tvb, offset, lengthp, charset_table_ebcdic_cp037);
3749 		break;
3750 
3751 	case ENC_T61:
3752 		strptr = tvb_get_t61_stringz(scope, tvb, offset, lengthp);
3753 		break;
3754 
3755 	case ENC_GB18030:
3756 		strptr = tvb_get_gb18030_stringz(scope, tvb, offset, lengthp);
3757 		break;
3758 
3759 	case ENC_EUC_KR:
3760 		strptr = tvb_get_euc_kr_stringz(scope, tvb, offset, lengthp);
3761 		break;
3762 	}
3763 
3764 	return strptr;
3765 }
3766 
3767 /* Looks for a stringz (NUL-terminated string) in tvbuff and copies
3768  * no more than bufsize number of bytes, including terminating NUL, to buffer.
3769  * Returns length of string (not including terminating NUL), or -1 if the string was
3770  * truncated in the buffer due to not having reached the terminating NUL.
3771  * In this way, it acts like g_snprintf().
3772  *
3773  * bufsize MUST be greater than 0.
3774  *
3775  * When processing a packet where the remaining number of bytes is less
3776  * than bufsize, an exception is not thrown if the end of the packet
3777  * is reached before the NUL is found. If no NUL is found before reaching
3778  * the end of the short packet, -1 is still returned, and the string
3779  * is truncated with a NUL, albeit not at buffer[bufsize - 1], but
3780  * at the correct spot, terminating the string.
3781  *
3782  * *bytes_copied will contain the number of bytes actually copied,
3783  * including the terminating-NUL.
3784  */
3785 static gint
_tvb_get_nstringz(tvbuff_t * tvb,const gint offset,const guint bufsize,guint8 * buffer,gint * bytes_copied)3786 _tvb_get_nstringz(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8* buffer, gint *bytes_copied)
3787 {
3788 	gint     stringlen;
3789 	guint    abs_offset = 0;
3790 	gint     limit, len = 0;
3791 	gboolean decreased_max = FALSE;
3792 
3793 	/* Only read to end of tvbuff, w/o throwing exception. */
3794 	check_offset_length(tvb, offset, -1, &abs_offset, &len);
3795 
3796 	/* There must at least be room for the terminating NUL. */
3797 	DISSECTOR_ASSERT(bufsize != 0);
3798 
3799 	/* If there's no room for anything else, just return the NUL. */
3800 	if (bufsize == 1) {
3801 		buffer[0] = 0;
3802 		*bytes_copied = 1;
3803 		return 0;
3804 	}
3805 
3806 	/* check_offset_length() won't throw an exception if we're
3807 	 * looking at the byte immediately after the end of the tvbuff. */
3808 	if (len == 0) {
3809 		THROW(ReportedBoundsError);
3810 	}
3811 
3812 	/* This should not happen because check_offset_length() would
3813 	 * have already thrown an exception if 'offset' were out-of-bounds.
3814 	 */
3815 	DISSECTOR_ASSERT(len != -1);
3816 
3817 	/*
3818 	 * If we've been passed a negative number, bufsize will
3819 	 * be huge.
3820 	 */
3821 	DISSECTOR_ASSERT(bufsize <= G_MAXINT);
3822 
3823 	if ((guint)len < bufsize) {
3824 		limit = len;
3825 		decreased_max = TRUE;
3826 	}
3827 	else {
3828 		limit = bufsize;
3829 	}
3830 
3831 	stringlen = tvb_strnlen(tvb, abs_offset, limit - 1);
3832 	/* If NUL wasn't found, copy the data and return -1 */
3833 	if (stringlen == -1) {
3834 		tvb_memcpy(tvb, buffer, abs_offset, limit);
3835 		if (decreased_max) {
3836 			buffer[limit] = 0;
3837 			/* Add 1 for the extra NUL that we set at buffer[limit],
3838 			 * pretending that it was copied as part of the string. */
3839 			*bytes_copied = limit + 1;
3840 		}
3841 		else {
3842 			*bytes_copied = limit;
3843 		}
3844 		return -1;
3845 	}
3846 
3847 	/* Copy the string to buffer */
3848 	tvb_memcpy(tvb, buffer, abs_offset, stringlen + 1);
3849 	*bytes_copied = stringlen + 1;
3850 	return stringlen;
3851 }
3852 
3853 /* Looks for a stringz (NUL-terminated string) in tvbuff and copies
3854  * no more than bufsize number of bytes, including terminating NUL, to buffer.
3855  * Returns length of string (not including terminating NUL), or -1 if the string was
3856  * truncated in the buffer due to not having reached the terminating NUL.
3857  * In this way, it acts like g_snprintf().
3858  *
3859  * When processing a packet where the remaining number of bytes is less
3860  * than bufsize, an exception is not thrown if the end of the packet
3861  * is reached before the NUL is found. If no NUL is found before reaching
3862  * the end of the short packet, -1 is still returned, and the string
3863  * is truncated with a NUL, albeit not at buffer[bufsize - 1], but
3864  * at the correct spot, terminating the string.
3865  */
3866 gint
tvb_get_nstringz(tvbuff_t * tvb,const gint offset,const guint bufsize,guint8 * buffer)3867 tvb_get_nstringz(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8 *buffer)
3868 {
3869 	gint bytes_copied;
3870 
3871 	DISSECTOR_ASSERT(tvb && tvb->initialized);
3872 
3873 	return _tvb_get_nstringz(tvb, offset, bufsize, buffer, &bytes_copied);
3874 }
3875 
3876 /* Like tvb_get_nstringz(), but never returns -1. The string is guaranteed to
3877  * have a terminating NUL. If the string was truncated when copied into buffer,
3878  * a NUL is placed at the end of buffer to terminate it.
3879  */
3880 gint
tvb_get_nstringz0(tvbuff_t * tvb,const gint offset,const guint bufsize,guint8 * buffer)3881 tvb_get_nstringz0(tvbuff_t *tvb, const gint offset, const guint bufsize, guint8* buffer)
3882 {
3883 	gint	len, bytes_copied;
3884 
3885 	DISSECTOR_ASSERT(tvb && tvb->initialized);
3886 
3887 	len = _tvb_get_nstringz(tvb, offset, bufsize, buffer, &bytes_copied);
3888 
3889 	if (len == -1) {
3890 		buffer[bufsize - 1] = 0;
3891 		return bytes_copied - 1;
3892 	}
3893 	else {
3894 		return len;
3895 	}
3896 }
3897 
3898 /*
3899  * Given a tvbuff, an offset into the tvbuff, a buffer, and a buffer size,
3900  * extract as many raw bytes from the tvbuff, starting at the offset,
3901  * as 1) are available in the tvbuff and 2) will fit in the buffer, leaving
3902  * room for a terminating NUL.
3903  */
3904 gint
tvb_get_raw_bytes_as_string(tvbuff_t * tvb,const gint offset,char * buffer,size_t bufsize)3905 tvb_get_raw_bytes_as_string(tvbuff_t *tvb, const gint offset, char *buffer, size_t bufsize)
3906 {
3907 	gint     len = 0;
3908 
3909 	DISSECTOR_ASSERT(tvb && tvb->initialized);
3910 
3911 	/* There must be room for the string and the terminating NUL. */
3912 	DISSECTOR_ASSERT(bufsize > 0);
3913 
3914 	DISSECTOR_ASSERT(bufsize - 1 < G_MAXINT);
3915 
3916 	len = tvb_captured_length_remaining(tvb, offset);
3917 	if (len <= 0) {
3918 		buffer[0] = '\0';
3919 		return 0;
3920 	}
3921 	if (len > (gint)(bufsize - 1))
3922 		len = (gint)(bufsize - 1);
3923 
3924 	/* Copy the string to buffer */
3925 	tvb_memcpy(tvb, buffer, offset, len);
3926 	buffer[len] = '\0';
3927 	return len;
3928 }
3929 
tvb_ascii_isprint(tvbuff_t * tvb,const gint offset,const gint length)3930 gboolean tvb_ascii_isprint(tvbuff_t *tvb, const gint offset, const gint length)
3931 {
3932 	const guint8* buf = tvb_get_ptr(tvb, offset, length);
3933 
3934 	for (int i = 0; i < length; i++, buf++)
3935 		if (!g_ascii_isprint(*buf))
3936 			return FALSE;
3937 
3938 	return TRUE;
3939 }
3940 
3941 
3942 static ws_mempbrk_pattern pbrk_crlf;
3943 /*
3944  * Given a tvbuff, an offset into the tvbuff, and a length that starts
3945  * at that offset (which may be -1 for "all the way to the end of the
3946  * tvbuff"), find the end of the (putative) line that starts at the
3947  * specified offset in the tvbuff, going no further than the specified
3948  * length.
3949  *
3950  * Return the length of the line (not counting the line terminator at
3951  * the end), or, if we don't find a line terminator:
3952  *
3953  * if "desegment" is true, return -1;
3954  *
3955  * if "desegment" is false, return the amount of data remaining in
3956  * the buffer.
3957  *
3958  * If "next_offset" is not NULL, set "*next_offset" to the offset of the
3959  * character past the line terminator, or past the end of the buffer if
3960  * we don't find a line terminator.  (It's not set if we return -1.)
3961  */
3962 gint
tvb_find_line_end(tvbuff_t * tvb,const gint offset,int len,gint * next_offset,const gboolean desegment)3963 tvb_find_line_end(tvbuff_t *tvb, const gint offset, int len, gint *next_offset, const gboolean desegment)
3964 {
3965 	gint   eob_offset;
3966 	gint   eol_offset;
3967 	int    linelen;
3968 	guchar found_needle = 0;
3969 	static gboolean compiled = FALSE;
3970 
3971 	DISSECTOR_ASSERT(tvb && tvb->initialized);
3972 
3973 	if (len == -1) {
3974 		len = _tvb_captured_length_remaining(tvb, offset);
3975 		/* if offset is past the end of the tvbuff, len is now 0 */
3976 	}
3977 
3978 	eob_offset = offset + len;
3979 
3980 	if (!compiled) {
3981 		ws_mempbrk_compile(&pbrk_crlf, "\r\n");
3982 		compiled = TRUE;
3983 	}
3984 
3985 	/*
3986 	 * Look either for a CR or an LF.
3987 	 */
3988 	eol_offset = tvb_ws_mempbrk_pattern_guint8(tvb, offset, len, &pbrk_crlf, &found_needle);
3989 	if (eol_offset == -1) {
3990 		/*
3991 		 * No CR or LF - line is presumably continued in next packet.
3992 		 */
3993 		if (desegment) {
3994 			/*
3995 			 * Tell our caller we saw no EOL, so they can
3996 			 * try to desegment and get the entire line
3997 			 * into one tvbuff.
3998 			 */
3999 			return -1;
4000 		} else {
4001 			/*
4002 			 * Pretend the line runs to the end of the tvbuff.
4003 			 */
4004 			linelen = eob_offset - offset;
4005 			if (next_offset)
4006 				*next_offset = eob_offset;
4007 		}
4008 	} else {
4009 		/*
4010 		 * Find the number of bytes between the starting offset
4011 		 * and the CR or LF.
4012 		 */
4013 		linelen = eol_offset - offset;
4014 
4015 		/*
4016 		 * Is it a CR?
4017 		 */
4018 		if (found_needle == '\r') {
4019 			/*
4020 			 * Yes - is it followed by an LF?
4021 			 */
4022 			if (eol_offset + 1 >= eob_offset) {
4023 				/*
4024 				 * Dunno - the next byte isn't in this
4025 				 * tvbuff.
4026 				 */
4027 				if (desegment) {
4028 					/*
4029 					 * We'll return -1, although that
4030 					 * runs the risk that if the line
4031 					 * really *is* terminated with a CR,
4032 					 * we won't properly dissect this
4033 					 * tvbuff.
4034 					 *
4035 					 * It's probably more likely that
4036 					 * the line ends with CR-LF than
4037 					 * that it ends with CR by itself.
4038 					 */
4039 					return -1;
4040 				}
4041 			} else {
4042 				/*
4043 				 * Well, we can at least look at the next
4044 				 * byte.
4045 				 */
4046 				if (tvb_get_guint8(tvb, eol_offset + 1) == '\n') {
4047 					/*
4048 					 * It's an LF; skip over the CR.
4049 					 */
4050 					eol_offset++;
4051 				}
4052 			}
4053 		}
4054 
4055 		/*
4056 		 * Return the offset of the character after the last
4057 		 * character in the line, skipping over the last character
4058 		 * in the line terminator.
4059 		 */
4060 		if (next_offset)
4061 			*next_offset = eol_offset + 1;
4062 	}
4063 	return linelen;
4064 }
4065 
4066 static ws_mempbrk_pattern pbrk_crlf_dquote;
4067 /*
4068  * Given a tvbuff, an offset into the tvbuff, and a length that starts
4069  * at that offset (which may be -1 for "all the way to the end of the
4070  * tvbuff"), find the end of the (putative) line that starts at the
4071  * specified offset in the tvbuff, going no further than the specified
4072  * length.
4073  *
4074  * However, treat quoted strings inside the buffer specially - don't
4075  * treat newlines in quoted strings as line terminators.
4076  *
4077  * Return the length of the line (not counting the line terminator at
4078  * the end), or the amount of data remaining in the buffer if we don't
4079  * find a line terminator.
4080  *
4081  * If "next_offset" is not NULL, set "*next_offset" to the offset of the
4082  * character past the line terminator, or past the end of the buffer if
4083  * we don't find a line terminator.
4084  */
4085 gint
tvb_find_line_end_unquoted(tvbuff_t * tvb,const gint offset,int len,gint * next_offset)4086 tvb_find_line_end_unquoted(tvbuff_t *tvb, const gint offset, int len, gint *next_offset)
4087 {
4088 	gint     cur_offset, char_offset;
4089 	gboolean is_quoted;
4090 	guchar   c = 0;
4091 	gint     eob_offset;
4092 	int      linelen;
4093 	static gboolean compiled = FALSE;
4094 
4095 	DISSECTOR_ASSERT(tvb && tvb->initialized);
4096 
4097 	if (len == -1)
4098 		len = _tvb_captured_length_remaining(tvb, offset);
4099 
4100 	if (!compiled) {
4101 		ws_mempbrk_compile(&pbrk_crlf_dquote, "\r\n\"");
4102 		compiled = TRUE;
4103 	}
4104 
4105 	/*
4106 	 * XXX - what if "len" is still -1, meaning "offset is past the
4107 	 * end of the tvbuff"?
4108 	 */
4109 	eob_offset = offset + len;
4110 
4111 	cur_offset = offset;
4112 	is_quoted  = FALSE;
4113 	for (;;) {
4114 			/*
4115 		 * Is this part of the string quoted?
4116 		 */
4117 		if (is_quoted) {
4118 			/*
4119 			 * Yes - look only for the terminating quote.
4120 			 */
4121 			char_offset = tvb_find_guint8(tvb, cur_offset, len,
4122 				'"');
4123 		} else {
4124 			/*
4125 			 * Look either for a CR, an LF, or a '"'.
4126 			 */
4127 			char_offset = tvb_ws_mempbrk_pattern_guint8(tvb, cur_offset, len, &pbrk_crlf_dquote, &c);
4128 		}
4129 		if (char_offset == -1) {
4130 			/*
4131 			 * Not found - line is presumably continued in
4132 			 * next packet.
4133 			 * We pretend the line runs to the end of the tvbuff.
4134 			 */
4135 			linelen = eob_offset - offset;
4136 			if (next_offset)
4137 				*next_offset = eob_offset;
4138 			break;
4139 		}
4140 
4141 		if (is_quoted) {
4142 			/*
4143 			 * We're processing a quoted string.
4144 			 * We only looked for ", so we know it's a ";
4145 			 * as we're processing a quoted string, it's a
4146 			 * closing quote.
4147 			 */
4148 			is_quoted = FALSE;
4149 		} else {
4150 			/*
4151 			 * OK, what is it?
4152 			 */
4153 			if (c == '"') {
4154 				/*
4155 				 * Un-quoted "; it begins a quoted
4156 				 * string.
4157 				 */
4158 				is_quoted = TRUE;
4159 			} else {
4160 				/*
4161 				 * It's a CR or LF; we've found a line
4162 				 * terminator.
4163 				 *
4164 				 * Find the number of bytes between the
4165 				 * starting offset and the CR or LF.
4166 				 */
4167 				linelen = char_offset - offset;
4168 
4169 				/*
4170 				 * Is it a CR?
4171 				 */
4172 				if (c == '\r') {
4173 					/*
4174 					 * Yes; is it followed by an LF?
4175 					 */
4176 					if (char_offset + 1 < eob_offset &&
4177 						tvb_get_guint8(tvb, char_offset + 1)
4178 						  == '\n') {
4179 						/*
4180 						 * Yes; skip over the CR.
4181 						 */
4182 						char_offset++;
4183 					}
4184 				}
4185 
4186 				/*
4187 				 * Return the offset of the character after
4188 				 * the last character in the line, skipping
4189 				 * over the last character in the line
4190 				 * terminator, and quit.
4191 				 */
4192 				if (next_offset)
4193 					*next_offset = char_offset + 1;
4194 				break;
4195 			}
4196 		}
4197 
4198 		/*
4199 		 * Step past the character we found.
4200 		 */
4201 		cur_offset = char_offset + 1;
4202 		if (cur_offset >= eob_offset) {
4203 			/*
4204 			 * The character we found was the last character
4205 			 * in the tvbuff - line is presumably continued in
4206 			 * next packet.
4207 			 * We pretend the line runs to the end of the tvbuff.
4208 			 */
4209 			linelen = eob_offset - offset;
4210 			if (next_offset)
4211 				*next_offset = eob_offset;
4212 			break;
4213 		}
4214 	}
4215 	return linelen;
4216 }
4217 
4218 /*
4219  * Copied from the mgcp dissector. (This function should be moved to /epan )
4220  * tvb_skip_wsp - Returns the position in tvb of the first non-whitespace
4221  *				  character following offset or offset + maxlength -1 whichever
4222  *				  is smaller.
4223  *
4224  * Parameters:
4225  * tvb - The tvbuff in which we are skipping whitespace.
4226  * offset - The offset in tvb from which we begin trying to skip whitespace.
4227  * maxlength - The maximum distance from offset that we may try to skip
4228  * whitespace.
4229  *
4230  * Returns: The position in tvb of the first non-whitespace
4231  *			character following offset or offset + maxlength -1 whichever
4232  *			is smaller.
4233  */
4234 gint
tvb_skip_wsp(tvbuff_t * tvb,const gint offset,const gint maxlength)4235 tvb_skip_wsp(tvbuff_t *tvb, const gint offset, const gint maxlength)
4236 {
4237 	gint   counter = offset;
4238 	gint   end, tvb_len;
4239 	guint8 tempchar;
4240 
4241 	DISSECTOR_ASSERT(tvb && tvb->initialized);
4242 
4243 	/* Get the length remaining */
4244 	/*tvb_len = tvb_captured_length(tvb);*/
4245 	tvb_len = tvb->length;
4246 
4247 	end     = offset + maxlength;
4248 	if (end >= tvb_len)
4249 	{
4250 		end = tvb_len;
4251 	}
4252 
4253 	/* Skip past spaces, tabs, CRs and LFs until run out or meet something else */
4254 	for (counter = offset;
4255 		 counter < end &&
4256 		  ((tempchar = tvb_get_guint8(tvb,counter)) == ' ' ||
4257 		  tempchar == '\t' || tempchar == '\r' || tempchar == '\n');
4258 		 counter++);
4259 
4260 	return (counter);
4261 }
4262 
4263 gint
tvb_skip_wsp_return(tvbuff_t * tvb,const gint offset)4264 tvb_skip_wsp_return(tvbuff_t *tvb, const gint offset)
4265 {
4266 	gint   counter = offset;
4267 	guint8 tempchar;
4268 
4269 	DISSECTOR_ASSERT(tvb && tvb->initialized);
4270 
4271 	for (counter = offset; counter > 0 &&
4272 		((tempchar = tvb_get_guint8(tvb,counter)) == ' ' ||
4273 		tempchar == '\t' || tempchar == '\n' || tempchar == '\r'); counter--);
4274 	counter++;
4275 
4276 	return (counter);
4277 }
4278 
4279 int
tvb_skip_guint8(tvbuff_t * tvb,int offset,const int maxlength,const guint8 ch)4280 tvb_skip_guint8(tvbuff_t *tvb, int offset, const int maxlength, const guint8 ch)
4281 {
4282 	int end, tvb_len;
4283 
4284 	DISSECTOR_ASSERT(tvb && tvb->initialized);
4285 
4286 	/* Get the length remaining */
4287 	/*tvb_len = tvb_captured_length(tvb);*/
4288 	tvb_len = tvb->length;
4289 
4290 	end     = offset + maxlength;
4291 	if (end >= tvb_len)
4292 		end = tvb_len;
4293 
4294 	while (offset < end) {
4295 		guint8 tempch = tvb_get_guint8(tvb, offset);
4296 
4297 		if (tempch != ch)
4298 			break;
4299 		offset++;
4300 	}
4301 
4302 	return offset;
4303 }
4304 
4305 static ws_mempbrk_pattern pbrk_whitespace;
4306 
tvb_get_token_len(tvbuff_t * tvb,const gint offset,int len,gint * next_offset,const gboolean desegment)4307 int tvb_get_token_len(tvbuff_t *tvb, const gint offset, int len, gint *next_offset, const gboolean desegment)
4308 {
4309 	gint   eob_offset;
4310 	gint   eot_offset;
4311 	int    tokenlen;
4312 	guchar found_needle = 0;
4313 	static gboolean compiled = FALSE;
4314 
4315 	DISSECTOR_ASSERT(tvb && tvb->initialized);
4316 
4317 	if (len == -1) {
4318 		len = _tvb_captured_length_remaining(tvb, offset);
4319 		/* if offset is past the end of the tvbuff, len is now 0 */
4320 	}
4321 
4322 	eob_offset = offset + len;
4323 
4324 	if (!compiled) {
4325 		ws_mempbrk_compile(&pbrk_whitespace, " \r\n");
4326 		compiled = TRUE;
4327 	}
4328 
4329 	/*
4330 	* Look either for a space, CR, or LF.
4331 	*/
4332 	eot_offset = tvb_ws_mempbrk_pattern_guint8(tvb, offset, len, &pbrk_whitespace, &found_needle);
4333 	if (eot_offset == -1) {
4334 		/*
4335 		* No space, CR or LF - token is presumably continued in next packet.
4336 		*/
4337 		if (desegment) {
4338 			/*
4339 			* Tell our caller we saw no whitespace, so they can
4340 			* try to desegment and get the entire line
4341 			* into one tvbuff.
4342 			*/
4343 			return -1;
4344 		}
4345 		else {
4346 			/*
4347 			* Pretend the token runs to the end of the tvbuff.
4348 			*/
4349 			tokenlen = eob_offset - offset;
4350 			if (next_offset)
4351 				*next_offset = eob_offset;
4352 		}
4353 	}
4354 	else {
4355 		/*
4356 		* Find the number of bytes between the starting offset
4357 		* and the space, CR or LF.
4358 		*/
4359 		tokenlen = eot_offset - offset;
4360 
4361 		/*
4362 		* Return the offset of the character after the last
4363 		* character in the line, skipping over the last character
4364 		* in the line terminator.
4365 		*/
4366 		if (next_offset)
4367 			*next_offset = eot_offset + 1;
4368 	}
4369 	return tokenlen;
4370 }
4371 
4372 /*
4373  * Format a bunch of data from a tvbuff as bytes, returning a pointer
4374  * to the string with the formatted data, with "punct" as a byte
4375  * separator.
4376  */
4377 gchar *
tvb_bytes_to_str_punct(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint len,const gchar punct)4378 tvb_bytes_to_str_punct(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint len, const gchar punct)
4379 {
4380 	return bytes_to_str_punct(scope, ensure_contiguous(tvb, offset, len), len, punct);
4381 }
4382 
4383 /*
4384  * Given a wmem scope, a tvbuff, an offset, a length, an input digit
4385  * set, and a boolean indicator, fetch BCD-encoded digits from a
4386  * tvbuff starting from either the low or high half byte of the
4387  * first byte depending on the boolean indicator (TRUE means "start
4388  * with the high half byte, ignoring the low half byte", and FALSE
4389  * means "start with the low half byte and proceed to the high half
4390  * byte), formating the digits into characters according to the
4391  * input digit set, and return a pointer to a UTF-8 string, allocated
4392  * using the wmem scope.  A high-order nibble of 0xf is considered a
4393  * 'filler' and will end the conversion. Similarrily if odd is set the last
4394  * high nibble will be omitted.
4395  */
4396 gchar *
tvb_get_bcd_string(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,gint len,const dgt_set_t * dgt,gboolean skip_first,gboolean odd,gboolean bigendian)4397 tvb_get_bcd_string(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, gint len, const dgt_set_t *dgt, gboolean skip_first, gboolean odd, gboolean bigendian)
4398 {
4399 	const guint8 *ptr;
4400 	int           i = 0;
4401 	char         *digit_str;
4402 	guint8        octet;
4403 
4404 	DISSECTOR_ASSERT(tvb && tvb->initialized);
4405 
4406 	if (len == -1) {
4407 		/*
4408 		 * Run to the end of the captured data.
4409 		 *
4410 		 * XXX - captured, or total?
4411 		 */
4412 		/*length = tvb_captured_length(tvb);*/
4413 		len = tvb->length;
4414 		if (len < offset) {
4415 			return (char *)"";
4416 		}
4417 		len -= offset;
4418 	}
4419 
4420 	ptr = ensure_contiguous(tvb, offset, len);
4421 
4422 	/*
4423 	 * XXX - map illegal digits (digits that map to 0) to REPLACEMENT
4424 	 * CHARACTER, and have all the tables in epan/tvbuff.c use 0 rather
4425 	 * than '?'?
4426 	 */
4427 	digit_str = (char *)wmem_alloc(scope, len*2 + 1);
4428 
4429 	while (len > 0) {
4430 		octet = *ptr;
4431 		if (!skip_first) {
4432 			if (bigendian) {
4433 				digit_str[i] = dgt->out[(octet >> 4) & 0x0f];
4434 			} else {
4435 				digit_str[i] = dgt->out[octet & 0x0f];
4436 			}
4437 			i++;
4438 		}
4439 		skip_first = FALSE;
4440 
4441 		/*
4442 		 * unpack second value in byte
4443 		 */
4444 		if (!bigendian) {
4445 			octet = octet >> 4;
4446 		}
4447 
4448 		if (octet == 0x0f) {
4449 			/*
4450 			 * This is the stop digit or a filler digit.  Ignore
4451 			 * it.
4452 			 */
4453 			break;
4454 		}
4455 		if ((len == 1) && (odd == TRUE )){
4456 			/* Last octet, skipp last high nibble incase of odd number of digits*/
4457 			break;
4458 		}
4459 		digit_str[i] = dgt->out[octet & 0x0f];
4460 		i++;
4461 
4462 		ptr++;
4463 		len--;
4464 	}
4465 	digit_str[i] = '\0';
4466 	return digit_str;
4467 }
4468 
4469 /* XXXX Fix me - needs odd indicator added */
4470 const gchar *
tvb_bcd_dig_to_str(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint len,const dgt_set_t * dgt,gboolean skip_first)4471 tvb_bcd_dig_to_str(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint len, const dgt_set_t *dgt, gboolean skip_first)
4472 {
4473 	if (!dgt)
4474 		dgt = &Dgt0_9_bcd;
4475 
4476 	return tvb_get_bcd_string(scope, tvb, offset, len, dgt, skip_first, FALSE, FALSE);
4477 }
4478 
4479 const gchar *
tvb_bcd_dig_to_str_be(wmem_allocator_t * scope,tvbuff_t * tvb,const gint offset,const gint len,const dgt_set_t * dgt,gboolean skip_first)4480 tvb_bcd_dig_to_str_be(wmem_allocator_t *scope, tvbuff_t *tvb, const gint offset, const gint len, const dgt_set_t *dgt, gboolean skip_first)
4481 {
4482 	if (!dgt)
4483 		dgt = &Dgt0_9_bcd;
4484 
4485 	return tvb_get_bcd_string(scope, tvb, offset, len, dgt, skip_first, FALSE, TRUE);
4486 }
4487 
4488 /*
4489  * Format a bunch of data from a tvbuff as bytes, returning a pointer
4490  * to the string with the formatted data.
4491  */
tvb_bytes_to_str(wmem_allocator_t * allocator,tvbuff_t * tvb,const gint offset,const gint len)4492 gchar *tvb_bytes_to_str(wmem_allocator_t *allocator, tvbuff_t *tvb,
4493     const gint offset, const gint len)
4494 {
4495 	DISSECTOR_ASSERT(len > 0);
4496 	return bytes_to_str(allocator, ensure_contiguous(tvb, offset, len), len);
4497 }
4498 
4499 /* Find a needle tvbuff within a haystack tvbuff. */
4500 gint
tvb_find_tvb(tvbuff_t * haystack_tvb,tvbuff_t * needle_tvb,const gint haystack_offset)4501 tvb_find_tvb(tvbuff_t *haystack_tvb, tvbuff_t *needle_tvb, const gint haystack_offset)
4502 {
4503 	guint	      haystack_abs_offset = 0, haystack_abs_length = 0;
4504 	const guint8 *haystack_data;
4505 	const guint8 *needle_data;
4506 	const guint   needle_len = needle_tvb->length;
4507 	const guint8 *location;
4508 
4509 	DISSECTOR_ASSERT(haystack_tvb && haystack_tvb->initialized);
4510 
4511 	if (haystack_tvb->length < 1 || needle_tvb->length < 1) {
4512 		return -1;
4513 	}
4514 
4515 	/* Get pointers to the tvbuffs' data. */
4516 	haystack_data = ensure_contiguous(haystack_tvb, 0, -1);
4517 	needle_data   = ensure_contiguous(needle_tvb, 0, -1);
4518 
4519 	check_offset_length(haystack_tvb, haystack_offset, -1,
4520 			&haystack_abs_offset, &haystack_abs_length);
4521 
4522 	location = epan_memmem(haystack_data + haystack_abs_offset, haystack_abs_length,
4523 			needle_data, needle_len);
4524 
4525 	if (location) {
4526 		return (gint) (location - haystack_data);
4527 	}
4528 
4529 	return -1;
4530 }
4531 
4532 gint
tvb_raw_offset(tvbuff_t * tvb)4533 tvb_raw_offset(tvbuff_t *tvb)
4534 {
4535 	return ((tvb->raw_offset==-1) ? (tvb->raw_offset = tvb_offset_from_real_beginning(tvb)) : tvb->raw_offset);
4536 }
4537 
4538 void
tvb_set_fragment(tvbuff_t * tvb)4539 tvb_set_fragment(tvbuff_t *tvb)
4540 {
4541 	tvb->flags |= TVBUFF_FRAGMENT;
4542 }
4543 
4544 struct tvbuff *
tvb_get_ds_tvb(tvbuff_t * tvb)4545 tvb_get_ds_tvb(tvbuff_t *tvb)
4546 {
4547 	return(tvb->ds_tvb);
4548 }
4549 
4550 guint
tvb_get_varint(tvbuff_t * tvb,guint offset,guint maxlen,guint64 * value,const guint encoding)4551 tvb_get_varint(tvbuff_t *tvb, guint offset, guint maxlen, guint64 *value, const guint encoding)
4552 {
4553 	*value = 0;
4554 
4555 	if (encoding & ENC_VARINT_PROTOBUF) {
4556 		guint i;
4557 		guint64 b; /* current byte */
4558 
4559 		for (i = 0; ((i < FT_VARINT_MAX_LEN) && (i < maxlen)); ++i) {
4560 			b = tvb_get_guint8(tvb, offset++);
4561 			*value |= ((b & 0x7F) << (i * 7)); /* add lower 7 bits to val */
4562 
4563 			if (b < 0x80) {
4564 				/* end successfully becauseof last byte's msb(most significant bit) is zero */
4565 				return i + 1;
4566 			}
4567 		}
4568 	} else if (encoding & ENC_VARINT_ZIGZAG) {
4569 		guint i;
4570 		guint64 b; /* current byte */
4571 
4572 		for (i = 0; ((i < FT_VARINT_MAX_LEN) && (i < maxlen)); ++i) {
4573 			b = tvb_get_guint8(tvb, offset++);
4574 			*value |= ((b & 0x7F) << (i * 7)); /* add lower 7 bits to val */
4575 
4576 			if (b < 0x80) {
4577 				/* end successfully becauseof last byte's msb(most significant bit) is zero */
4578 				*value = (*value >> 1) ^ ((*value & 1) ? -1 : 0);
4579 				return i + 1;
4580 			}
4581 		}
4582 	}
4583 	else if (encoding & ENC_VARINT_QUIC) {
4584 
4585 		/* calculate variable length */
4586 		*value = tvb_get_guint8(tvb, offset);
4587 		switch((*value) >> 6) {
4588 		case 0: /* 0b00 => 1 byte length (6 bits Usable) */
4589 			(*value) &= 0x3F;
4590 			return 1;
4591 		case 1: /* 0b01 => 2 bytes length (14 bits Usable) */
4592 			*value = tvb_get_ntohs(tvb, offset) & 0x3FFF;
4593 			return 2;
4594 		case 2: /* 0b10 => 4 bytes length (30 bits Usable) */
4595 			*value = tvb_get_ntohl(tvb, offset) & 0x3FFFFFFF;
4596 			return 4;
4597 		case 3: /* 0b11 => 8 bytes length (62 bits Usable) */
4598 			*value = tvb_get_ntoh64(tvb, offset) & G_GUINT64_CONSTANT(0x3FFFFFFFFFFFFFFF);
4599 			return 8;
4600 		default: /* No Possible */
4601 			ws_assert_not_reached();
4602 			break;
4603 		}
4604 
4605 	}
4606 
4607 	return 0; /* 10 bytes scanned, but no bytes' msb is zero */
4608 }
4609 
4610 /*
4611  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
4612  *
4613  * Local variables:
4614  * c-basic-offset: 8
4615  * tab-width: 8
4616  * indent-tabs-mode: t
4617  * End:
4618  *
4619  * vi: set shiftwidth=8 tabstop=8 noexpandtab:
4620  * :indentSize=8:tabSize=8:noTabs=false:
4621  */
4622