1 /* 2 * Heap string representation. 3 * 4 * Strings are byte sequences ordinarily stored in extended UTF-8 format, 5 * allowing values larger than the official UTF-8 range (used internally) 6 * and also allowing UTF-8 encoding of surrogate pairs (CESU-8 format). 7 * Strings may also be invalid UTF-8 altogether which is the case e.g. with 8 * strings used as internal property names and raw buffers converted to 9 * strings. In such cases the 'clen' field contains an inaccurate value. 10 * 11 * ECMAScript requires support for 32-bit long strings. However, since each 12 * 16-bit codepoint can take 3 bytes in CESU-8, this representation can only 13 * support about 1.4G codepoint long strings in extreme cases. This is not 14 * really a practical issue. 15 */ 16 17 #if !defined(DUK_HSTRING_H_INCLUDED) 18 #define DUK_HSTRING_H_INCLUDED 19 20 /* Impose a maximum string length for now. Restricted artificially to 21 * ensure adding a heap header length won't overflow size_t. The limit 22 * should be synchronized with DUK_HBUFFER_MAX_BYTELEN. 23 * 24 * E5.1 makes provisions to support strings longer than 4G characters. 25 * This limit should be eliminated on 64-bit platforms (and increased 26 * closer to maximum support on 32-bit platforms). 27 */ 28 29 #if defined(DUK_USE_STRLEN16) 30 #define DUK_HSTRING_MAX_BYTELEN (0x0000ffffUL) 31 #else 32 #define DUK_HSTRING_MAX_BYTELEN (0x7fffffffUL) 33 #endif 34 35 /* XXX: could add flags for "is valid CESU-8" (ECMAScript compatible strings), 36 * "is valid UTF-8", "is valid extended UTF-8" (internal strings are not, 37 * regexp bytecode is), and "contains non-BMP characters". These are not 38 * needed right now. 39 */ 40 41 /* With lowmem builds the high 16 bits of duk_heaphdr are used for other 42 * purposes, so this leaves 7 duk_heaphdr flags and 9 duk_hstring flags. 43 */ 44 #define DUK_HSTRING_FLAG_ASCII DUK_HEAPHDR_USER_FLAG(0) /* string is ASCII, clen == blen */ 45 #define DUK_HSTRING_FLAG_ARRIDX DUK_HEAPHDR_USER_FLAG(1) /* string is a valid array index */ 46 #define DUK_HSTRING_FLAG_SYMBOL DUK_HEAPHDR_USER_FLAG(2) /* string is a symbol (invalid utf-8) */ 47 #define DUK_HSTRING_FLAG_HIDDEN DUK_HEAPHDR_USER_FLAG(3) /* string is a hidden symbol (implies symbol, Duktape 1.x internal string) */ 48 #define DUK_HSTRING_FLAG_RESERVED_WORD DUK_HEAPHDR_USER_FLAG(4) /* string is a reserved word (non-strict) */ 49 #define DUK_HSTRING_FLAG_STRICT_RESERVED_WORD DUK_HEAPHDR_USER_FLAG(5) /* string is a reserved word (strict) */ 50 #define DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS DUK_HEAPHDR_USER_FLAG(6) /* string is 'eval' or 'arguments' */ 51 #define DUK_HSTRING_FLAG_EXTDATA DUK_HEAPHDR_USER_FLAG(7) /* string data is external (duk_hstring_external) */ 52 #define DUK_HSTRING_FLAG_PINNED_LITERAL DUK_HEAPHDR_USER_FLAG(8) /* string is a literal, and pinned */ 53 54 #define DUK_HSTRING_HAS_ASCII(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII) 55 #define DUK_HSTRING_HAS_ARRIDX(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX) 56 #define DUK_HSTRING_HAS_SYMBOL(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_SYMBOL) 57 #define DUK_HSTRING_HAS_HIDDEN(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_HIDDEN) 58 #define DUK_HSTRING_HAS_RESERVED_WORD(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD) 59 #define DUK_HSTRING_HAS_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD) 60 #define DUK_HSTRING_HAS_EVAL_OR_ARGUMENTS(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS) 61 #define DUK_HSTRING_HAS_EXTDATA(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA) 62 #define DUK_HSTRING_HAS_PINNED_LITERAL(x) DUK_HEAPHDR_CHECK_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_PINNED_LITERAL) 63 64 #define DUK_HSTRING_SET_ASCII(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII) 65 #define DUK_HSTRING_SET_ARRIDX(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX) 66 #define DUK_HSTRING_SET_SYMBOL(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_SYMBOL) 67 #define DUK_HSTRING_SET_HIDDEN(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_HIDDEN) 68 #define DUK_HSTRING_SET_RESERVED_WORD(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD) 69 #define DUK_HSTRING_SET_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD) 70 #define DUK_HSTRING_SET_EVAL_OR_ARGUMENTS(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS) 71 #define DUK_HSTRING_SET_EXTDATA(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA) 72 #define DUK_HSTRING_SET_PINNED_LITERAL(x) DUK_HEAPHDR_SET_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_PINNED_LITERAL) 73 74 #define DUK_HSTRING_CLEAR_ASCII(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ASCII) 75 #define DUK_HSTRING_CLEAR_ARRIDX(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_ARRIDX) 76 #define DUK_HSTRING_CLEAR_SYMBOL(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_SYMBOL) 77 #define DUK_HSTRING_CLEAR_HIDDEN(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_HIDDEN) 78 #define DUK_HSTRING_CLEAR_RESERVED_WORD(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_RESERVED_WORD) 79 #define DUK_HSTRING_CLEAR_STRICT_RESERVED_WORD(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_STRICT_RESERVED_WORD) 80 #define DUK_HSTRING_CLEAR_EVAL_OR_ARGUMENTS(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EVAL_OR_ARGUMENTS) 81 #define DUK_HSTRING_CLEAR_EXTDATA(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_EXTDATA) 82 #define DUK_HSTRING_CLEAR_PINNED_LITERAL(x) DUK_HEAPHDR_CLEAR_FLAG_BITS(&(x)->hdr, DUK_HSTRING_FLAG_PINNED_LITERAL) 83 84 #if 0 /* Slightly smaller code without explicit flag, but explicit flag 85 * is very useful when 'clen' is dropped. 86 */ 87 #define DUK_HSTRING_IS_ASCII(x) (DUK_HSTRING_GET_BYTELEN((x)) == DUK_HSTRING_GET_CHARLEN((x))) 88 #endif 89 #define DUK_HSTRING_IS_ASCII(x) DUK_HSTRING_HAS_ASCII((x)) /* lazily set! */ 90 #define DUK_HSTRING_IS_EMPTY(x) (DUK_HSTRING_GET_BYTELEN((x)) == 0) 91 92 #if defined(DUK_USE_STRHASH16) 93 #define DUK_HSTRING_GET_HASH(x) ((x)->hdr.h_flags >> 16) 94 #define DUK_HSTRING_SET_HASH(x,v) do { \ 95 (x)->hdr.h_flags = ((x)->hdr.h_flags & 0x0000ffffUL) | ((v) << 16); \ 96 } while (0) 97 #else 98 #define DUK_HSTRING_GET_HASH(x) ((x)->hash) 99 #define DUK_HSTRING_SET_HASH(x,v) do { \ 100 (x)->hash = (v); \ 101 } while (0) 102 #endif 103 104 #if defined(DUK_USE_STRLEN16) 105 #define DUK_HSTRING_GET_BYTELEN(x) ((x)->hdr.h_strextra16) 106 #define DUK_HSTRING_SET_BYTELEN(x,v) do { \ 107 (x)->hdr.h_strextra16 = (v); \ 108 } while (0) 109 #if defined(DUK_USE_HSTRING_CLEN) 110 #define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x)) 111 #define DUK_HSTRING_SET_CHARLEN(x,v) do { \ 112 (x)->clen16 = (v); \ 113 } while (0) 114 #else 115 #define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x)) 116 #define DUK_HSTRING_SET_CHARLEN(x,v) do { \ 117 DUK_ASSERT(0); /* should never be called */ \ 118 } while (0) 119 #endif 120 #else 121 #define DUK_HSTRING_GET_BYTELEN(x) ((x)->blen) 122 #define DUK_HSTRING_SET_BYTELEN(x,v) do { \ 123 (x)->blen = (v); \ 124 } while (0) 125 #define DUK_HSTRING_GET_CHARLEN(x) duk_hstring_get_charlen((x)) 126 #define DUK_HSTRING_SET_CHARLEN(x,v) do { \ 127 (x)->clen = (v); \ 128 } while (0) 129 #endif 130 131 #if defined(DUK_USE_HSTRING_EXTDATA) 132 #define DUK_HSTRING_GET_EXTDATA(x) \ 133 ((x)->extdata) 134 #define DUK_HSTRING_GET_DATA(x) \ 135 (DUK_HSTRING_HAS_EXTDATA((x)) ? \ 136 DUK_HSTRING_GET_EXTDATA((const duk_hstring_external *) (x)) : ((const duk_uint8_t *) ((x) + 1))) 137 #else 138 #define DUK_HSTRING_GET_DATA(x) \ 139 ((const duk_uint8_t *) ((x) + 1)) 140 #endif 141 142 #define DUK_HSTRING_GET_DATA_END(x) \ 143 (DUK_HSTRING_GET_DATA((x)) + (x)->blen) 144 145 /* Marker value; in E5 2^32-1 is not a valid array index (2^32-2 is highest 146 * valid). 147 */ 148 #define DUK_HSTRING_NO_ARRAY_INDEX (0xffffffffUL) 149 150 #if defined(DUK_USE_HSTRING_ARRIDX) 151 #define DUK_HSTRING_GET_ARRIDX_FAST(h) ((h)->arridx) 152 #define DUK_HSTRING_GET_ARRIDX_SLOW(h) ((h)->arridx) 153 #else 154 /* Get array index related to string (or return DUK_HSTRING_NO_ARRAY_INDEX); 155 * avoids helper call if string has no array index value. 156 */ 157 #define DUK_HSTRING_GET_ARRIDX_FAST(h) \ 158 (DUK_HSTRING_HAS_ARRIDX((h)) ? duk_js_to_arrayindex_hstring_fast_known((h)) : DUK_HSTRING_NO_ARRAY_INDEX) 159 160 /* Slower but more compact variant. */ 161 #define DUK_HSTRING_GET_ARRIDX_SLOW(h) \ 162 (duk_js_to_arrayindex_hstring_fast((h))) 163 #endif 164 165 /* XXX: these actually fit into duk_hstring */ 166 #define DUK_SYMBOL_TYPE_HIDDEN 0 167 #define DUK_SYMBOL_TYPE_GLOBAL 1 168 #define DUK_SYMBOL_TYPE_LOCAL 2 169 #define DUK_SYMBOL_TYPE_WELLKNOWN 3 170 171 /* Assertion for duk_hstring validity. */ 172 #if defined(DUK_USE_ASSERTIONS) 173 DUK_INTERNAL_DECL void duk_hstring_assert_valid(duk_hstring *h); 174 #define DUK_HSTRING_ASSERT_VALID(h) do { duk_hstring_assert_valid((h)); } while (0) 175 #else 176 #define DUK_HSTRING_ASSERT_VALID(h) do {} while (0) 177 #endif 178 179 /* 180 * Misc 181 */ 182 183 struct duk_hstring { 184 /* Smaller heaphdr than for other objects, because strings are held 185 * in string intern table which requires no link pointers. Much of 186 * the 32-bit flags field is unused by flags, so we can stuff a 16-bit 187 * field in there. 188 */ 189 duk_heaphdr_string hdr; 190 191 /* String hash. */ 192 #if defined(DUK_USE_STRHASH16) 193 /* If 16-bit hash is in use, stuff it into duk_heaphdr_string flags. */ 194 #else 195 duk_uint32_t hash; 196 #endif 197 198 /* Precomputed array index (or DUK_HSTRING_NO_ARRAY_INDEX). */ 199 #if defined(DUK_USE_HSTRING_ARRIDX) 200 duk_uarridx_t arridx; 201 #endif 202 203 /* Length in bytes (not counting NUL term). */ 204 #if defined(DUK_USE_STRLEN16) 205 /* placed in duk_heaphdr_string */ 206 #else 207 duk_uint32_t blen; 208 #endif 209 210 /* Length in codepoints (must be E5 compatible). */ 211 #if defined(DUK_USE_STRLEN16) 212 #if defined(DUK_USE_HSTRING_CLEN) 213 duk_uint16_t clen16; 214 #else 215 /* computed live */ 216 #endif 217 #else 218 duk_uint32_t clen; 219 #endif 220 221 /* 222 * String data of 'blen+1' bytes follows (+1 for NUL termination 223 * convenience for C API). No alignment needs to be guaranteed 224 * for strings, but fields above should guarantee alignment-by-4 225 * (but not alignment-by-8). 226 */ 227 }; 228 229 /* The external string struct is defined even when the feature is inactive. */ 230 struct duk_hstring_external { 231 duk_hstring str; 232 233 /* 234 * For an external string, the NUL-terminated string data is stored 235 * externally. The user must guarantee that data behind this pointer 236 * doesn't change while it's used. 237 */ 238 239 const duk_uint8_t *extdata; 240 }; 241 242 /* 243 * Prototypes 244 */ 245 246 DUK_INTERNAL_DECL duk_ucodepoint_t duk_hstring_char_code_at_raw(duk_hthread *thr, duk_hstring *h, duk_uint_t pos, duk_bool_t surrogate_aware); 247 DUK_INTERNAL_DECL duk_bool_t duk_hstring_equals_ascii_cstring(duk_hstring *h, const char *cstr); 248 DUK_INTERNAL_DECL duk_size_t duk_hstring_get_charlen(duk_hstring *h); 249 #if !defined(DUK_USE_HSTRING_LAZY_CLEN) 250 DUK_INTERNAL_DECL void duk_hstring_init_charlen(duk_hstring *h); 251 #endif 252 253 #endif /* DUK_HSTRING_H_INCLUDED */ 254