1 #include <assert.h>
2 
3 #include "ducet.h"
4 #include "udb.h"
5 
6 #ifdef NU_WITH_DUCET
7 
8 #include "gen/_ducet.c"
9 
10 #ifndef NU_DISABLE_CONTRACTIONS
11 #	include "gen/_ducet_switch.c"
12 #else
13 	const size_t _NU_DUCET_CONTRACTIONS = 0;
14 #endif
15 
_nu_ducet_weights_count()16 static size_t _nu_ducet_weights_count() {
17 	return NU_DUCET_G_SIZE + _NU_DUCET_CONTRACTIONS;
18 }
19 
nu_ducet_weight(uint32_t codepoint,int32_t * weight,void * context)20 int32_t nu_ducet_weight(uint32_t codepoint, int32_t *weight, void *context) {
21 	(void)(weight);
22 	(void)(context);
23 
24 	assert(_nu_ducet_weights_count() < 0x7FFFFFFF - 0x10FFFF);
25 
26 #ifndef NU_DISABLE_CONTRACTIONS
27 	int32_t switch_value = _nu_ducet_weight_switch(codepoint, weight, context);
28 	/* weight switch should return weight (if any) and fill value of *weight
29 	 * with fallback (if needed). returned value of 0 is impossible result - this
30 	 * special case is already handled above, this return value indicates that switch
31 	 * couldn't find weight for a codepoint */
32 	if (switch_value != 0) {
33 		return switch_value;
34 	}
35 #endif
36 
37 	/* special case switch after contractions switch
38 	 * to let state-machine figure out its state on abort */
39 	if (codepoint == 0) {
40 		return 0;
41 	}
42 
43 	uint32_t mph_value = nu_udb_lookup_value(codepoint, NU_DUCET_G, NU_DUCET_G_SIZE,
44 		NU_DUCET_VALUES_C, NU_DUCET_VALUES_I);
45 
46 	return (mph_value != 0
47 		? (int32_t)(mph_value)
48 		: (int32_t)(codepoint + _nu_ducet_weights_count()));
49 
50 	/* ISO/IEC 14651 requests that codepoints with undefined weight should be
51 	 * sorted before max weight in collation table. This way all codepoints
52 	 * defined in ducet would have weight under a value of _nu_ducet_weights_count(),
53 	 * all undefined codepoints would have weight under
54 	 * 0x10FFFF + _nu_ducet_weights_count() - 1, max weight will be
55 	 * 0x10FFFF + _nu_ducet_weights_count() */
56 
57 	/* Regarding integer overflow:
58 	 *
59 	 * int32_t can hold 0xFFFFFFFF / 2 = 0x7FFFFFFF positive numbers, this
60 	 * function can safely offset codepoint value up to +2146369536 without
61 	 * risk of overflow. Thus max collation table size supported is
62 	 * 2146369536 (0x7FFFFFFF - 0x10FFFF) */
63 }
64 
65 #endif /* NU_WITH_DUCET */
66