1 #include <assert.h>
2
3 #include "ducet.h"
4 #include "udb.h"
5
6 #ifdef NU_WITH_DUCET
7
8 #include "gen/_ducet.c"
9
10 #ifndef NU_DISABLE_CONTRACTIONS
11 # include "gen/_ducet_switch.c"
12 #else
13 const size_t _NU_DUCET_CONTRACTIONS = 0;
14 #endif
15
_nu_ducet_weights_count()16 static size_t _nu_ducet_weights_count() {
17 return NU_DUCET_G_SIZE + _NU_DUCET_CONTRACTIONS;
18 }
19
nu_ducet_weight(uint32_t codepoint,int32_t * weight,void * context)20 int32_t nu_ducet_weight(uint32_t codepoint, int32_t *weight, void *context) {
21 (void)(weight);
22 (void)(context);
23
24 assert(_nu_ducet_weights_count() < 0x7FFFFFFF - 0x10FFFF);
25
26 #ifndef NU_DISABLE_CONTRACTIONS
27 int32_t switch_value = _nu_ducet_weight_switch(codepoint, weight, context);
28 /* weight switch should return weight (if any) and fill value of *weight
29 * with fallback (if needed). returned value of 0 is impossible result - this
30 * special case is already handled above, this return value indicates that switch
31 * couldn't find weight for a codepoint */
32 if (switch_value != 0) {
33 return switch_value;
34 }
35 #endif
36
37 /* special case switch after contractions switch
38 * to let state-machine figure out its state on abort */
39 if (codepoint == 0) {
40 return 0;
41 }
42
43 uint32_t mph_value = nu_udb_lookup_value(codepoint, NU_DUCET_G, NU_DUCET_G_SIZE,
44 NU_DUCET_VALUES_C, NU_DUCET_VALUES_I);
45
46 return (mph_value != 0
47 ? (int32_t)(mph_value)
48 : (int32_t)(codepoint + _nu_ducet_weights_count()));
49
50 /* ISO/IEC 14651 requests that codepoints with undefined weight should be
51 * sorted before max weight in collation table. This way all codepoints
52 * defined in ducet would have weight under a value of _nu_ducet_weights_count(),
53 * all undefined codepoints would have weight under
54 * 0x10FFFF + _nu_ducet_weights_count() - 1, max weight will be
55 * 0x10FFFF + _nu_ducet_weights_count() */
56
57 /* Regarding integer overflow:
58 *
59 * int32_t can hold 0xFFFFFFFF / 2 = 0x7FFFFFFF positive numbers, this
60 * function can safely offset codepoint value up to +2146369536 without
61 * risk of overflow. Thus max collation table size supported is
62 * 2146369536 (0x7FFFFFFF - 0x10FFFF) */
63 }
64
65 #endif /* NU_WITH_DUCET */
66