1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 1996-2020. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21 #ifdef HAVE_CONFIG_H
22 # include "config.h"
23 #endif
24
25 #include "sys.h"
26 #include "erl_sys_driver.h"
27 #include "erl_vm.h"
28 #include "global.h"
29 #include "hash.h"
30 #include "atom.h"
31
32
33 #define ATOM_SIZE 3000
34
35 IndexTable erts_atom_table; /* The index table */
36
37 static erts_rwmtx_t atom_table_lock;
38
39 #define atom_read_lock() erts_rwmtx_rlock(&atom_table_lock)
40 #define atom_read_unlock() erts_rwmtx_runlock(&atom_table_lock)
41 #define atom_write_lock() erts_rwmtx_rwlock(&atom_table_lock)
42 #define atom_write_unlock() erts_rwmtx_rwunlock(&atom_table_lock)
43
44 #if 0
45 #define ERTS_ATOM_PUT_OPS_STAT
46 #endif
47 #ifdef ERTS_ATOM_PUT_OPS_STAT
48 static erts_atomic_t atom_put_ops;
49 #endif
50
51 /* Functions for allocating space for the ext of atoms. We do not
52 * use malloc for each atom to prevent excessive memory fragmentation
53 */
54
55 typedef struct _atom_text {
56 struct _atom_text* next;
57 unsigned char text[ATOM_TEXT_SIZE];
58 } AtomText;
59
60 static AtomText* text_list; /* List of text buffers */
61 static byte *atom_text_pos;
62 static byte *atom_text_end;
63 static Uint reserved_atom_space; /* Total amount of atom text space */
64 static Uint atom_space; /* Amount of atom text space used */
65
66 /*
67 * Print info about atom tables
68 */
atom_info(fmtfn_t to,void * to_arg)69 void atom_info(fmtfn_t to, void *to_arg)
70 {
71 int lock = !ERTS_IS_CRASH_DUMPING;
72 if (lock)
73 atom_read_lock();
74 index_info(to, to_arg, &erts_atom_table);
75 #ifdef ERTS_ATOM_PUT_OPS_STAT
76 erts_print(to, to_arg, "atom_put_ops: %ld\n",
77 erts_atomic_read_nob(&atom_put_ops));
78 #endif
79
80 if (lock)
81 atom_read_unlock();
82 }
83
84 /*
85 * Allocate an atom text segment.
86 */
87 static void
more_atom_space(void)88 more_atom_space(void)
89 {
90 AtomText* ptr;
91
92 ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));
93
94 ptr->next = text_list;
95 text_list = ptr;
96
97 atom_text_pos = ptr->text;
98 atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
99 reserved_atom_space += sizeof(AtomText);
100
101 VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
102 }
103
104 /*
105 * Allocate string space within an atom text segment.
106 */
107
108 static byte*
atom_text_alloc(int bytes)109 atom_text_alloc(int bytes)
110 {
111 byte *res;
112
113 ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
114 if (atom_text_pos + bytes >= atom_text_end) {
115 more_atom_space();
116 }
117 res = atom_text_pos;
118 atom_text_pos += bytes;
119 atom_space += bytes;
120 return res;
121 }
122
123 /*
124 * Calculate atom hash value (using the hash algorithm
125 * hashpjw from the Dragon Book).
126 */
127
128 static HashValue
atom_hash(Atom * obj)129 atom_hash(Atom* obj)
130 {
131 byte* p = obj->name;
132 int len = obj->len;
133 HashValue h = 0, g;
134 byte v;
135
136 while(len--) {
137 v = *p++;
138 /* latin1 clutch for r16 */
139 if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80) {
140 v = (v << 6) | (*p & 0x3F);
141 p++; len--;
142 }
143 /* normal hashpjw follows for v */
144 h = (h << 4) + v;
145 if ((g = h & 0xf0000000)) {
146 h ^= (g >> 24);
147 h ^= g;
148 }
149 }
150 return h;
151 }
152
153
154 static int
atom_cmp(Atom * tmpl,Atom * obj)155 atom_cmp(Atom* tmpl, Atom* obj)
156 {
157 if (tmpl->len == obj->len &&
158 sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
159 return 0;
160 return 1;
161 }
162
163
164 static Atom*
atom_alloc(Atom * tmpl)165 atom_alloc(Atom* tmpl)
166 {
167 Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
168
169 obj->name = atom_text_alloc(tmpl->len);
170 sys_memcpy(obj->name, tmpl->name, tmpl->len);
171 obj->len = tmpl->len;
172 obj->latin1_chars = tmpl->latin1_chars;
173 obj->slot.index = -1;
174
175 /*
176 * Precompute ordinal value of first 3 bytes + 7 bits.
177 * This is used by erl_utils.h:erts_cmp_atoms().
178 * We cannot use the full 32 bits of the first 4 bytes,
179 * since we use the sign of the difference between two
180 * ordinal values to represent their relative order.
181 */
182 {
183 unsigned char c[4];
184 int i;
185 int j;
186
187 j = (tmpl->len < 4) ? tmpl->len : 4;
188 for(i = 0; i < j; ++i)
189 c[i] = tmpl->name[i];
190 for(; i < 4; ++i)
191 c[i] = '\0';
192 obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
193 }
194 return obj;
195 }
196
197 static void
atom_free(Atom * obj)198 atom_free(Atom* obj)
199 {
200 ASSERT(obj->slot.index == atom_val(am_ErtsSecretAtom));
201 }
202
latin1_to_utf8(byte * conv_buf,Uint buf_sz,const byte ** srcp,Uint * lenp)203 static void latin1_to_utf8(byte* conv_buf, Uint buf_sz,
204 const byte** srcp, Uint* lenp)
205 {
206 byte* dst;
207 const byte* src = *srcp;
208 Uint i, len = *lenp;
209
210 ASSERT(len <= MAX_ATOM_CHARACTERS);
211 ASSERT(buf_sz >= MAX_ATOM_SZ_FROM_LATIN1);
212
213 for (i=0 ; i < len; ++i) {
214 if (src[i] & 0x80) {
215 goto need_convertion;
216 }
217 }
218 return;
219
220 need_convertion:
221 sys_memcpy(conv_buf, src, i);
222 dst = conv_buf + i;
223 for ( ; i < len; ++i) {
224 unsigned char chr = src[i];
225 if (!(chr & 0x80)) {
226 *dst++ = chr;
227 }
228 else {
229 *dst++ = 0xC0 | (chr >> 6);
230 *dst++ = 0x80 | (chr & 0x3F);
231 }
232 }
233 *srcp = conv_buf;
234 *lenp = dst - conv_buf;
235 }
236
237 /*
238 * erts_atom_put_index() may fail. Returns negative indexes for errors.
239 */
240 int
erts_atom_put_index(const byte * name,Sint len,ErtsAtomEncoding enc,int trunc)241 erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
242 {
243 byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
244 const byte *text = name;
245 Uint tlen;
246 Sint no_latin1_chars;
247 Atom a;
248 int aix;
249
250 #ifdef ERTS_ATOM_PUT_OPS_STAT
251 erts_atomic_inc_nob(&atom_put_ops);
252 #endif
253
254 if (len < 0) {
255 if (trunc) {
256 len = 0;
257 } else {
258 return ATOM_MAX_CHARS_ERROR;
259 }
260 }
261
262 tlen = len;
263
264 switch (enc) {
265 case ERTS_ATOM_ENC_7BIT_ASCII:
266 if (tlen > MAX_ATOM_CHARACTERS) {
267 if (trunc)
268 tlen = MAX_ATOM_CHARACTERS;
269 else
270 return ATOM_MAX_CHARS_ERROR;
271 }
272 #ifdef DEBUG
273 for (aix = 0; aix < len; aix++) {
274 ASSERT((name[aix] & 0x80) == 0);
275 }
276 #endif
277 no_latin1_chars = tlen;
278 break;
279 case ERTS_ATOM_ENC_LATIN1:
280 if (tlen > MAX_ATOM_CHARACTERS) {
281 if (trunc)
282 tlen = MAX_ATOM_CHARACTERS;
283 else
284 return ATOM_MAX_CHARS_ERROR;
285 }
286 no_latin1_chars = tlen;
287 latin1_to_utf8(utf8_copy, sizeof(utf8_copy), &text, &tlen);
288 break;
289 case ERTS_ATOM_ENC_UTF8:
290 /* First sanity check; need to verify later */
291 if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
292 return ATOM_MAX_CHARS_ERROR;
293 break;
294 }
295
296 a.len = tlen;
297 a.name = (byte *) text;
298 atom_read_lock();
299 aix = index_get(&erts_atom_table, (void*) &a);
300 atom_read_unlock();
301 if (aix >= 0) {
302 /* Already in table no need to verify it */
303 return aix;
304 }
305
306 if (enc == ERTS_ATOM_ENC_UTF8) {
307 /* Need to verify encoding and length */
308 byte *err_pos;
309 Uint no_chars;
310 switch (erts_analyze_utf8_x((byte *) text,
311 (Uint) tlen,
312 &err_pos,
313 &no_chars, NULL,
314 &no_latin1_chars,
315 MAX_ATOM_CHARACTERS)) {
316 case ERTS_UTF8_OK:
317 ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
318 break;
319 case ERTS_UTF8_OK_MAX_CHARS:
320 /* Truncated... */
321 if (!trunc)
322 return ATOM_MAX_CHARS_ERROR;
323 ASSERT(no_chars == MAX_ATOM_CHARACTERS);
324 tlen = err_pos - text;
325 break;
326 default:
327 /* Bad utf8... */
328 return ATOM_BAD_ENCODING_ERROR;
329 }
330 }
331
332 ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
333 ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
334
335 a.len = tlen;
336 a.latin1_chars = (Sint16) no_latin1_chars;
337 a.name = (byte *) text;
338 atom_write_lock();
339 aix = index_put(&erts_atom_table, (void*) &a);
340 atom_write_unlock();
341 return aix;
342 }
343
344 /*
345 * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned!
346 */
347 Eterm
erts_atom_put(const byte * name,Sint len,ErtsAtomEncoding enc,int trunc)348 erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
349 {
350 int aix = erts_atom_put_index(name, len, enc, trunc);
351 if (aix >= 0)
352 return make_atom(aix);
353 else
354 return THE_NON_VALUE;
355 }
356
357 Eterm
am_atom_put(const char * name,Sint len)358 am_atom_put(const char* name, Sint len)
359 {
360 /* Assumes 7-bit ascii; use erts_atom_put() for other encodings... */
361 return erts_atom_put((byte *) name, len, ERTS_ATOM_ENC_7BIT_ASCII, 1);
362 }
363
atom_table_size(void)364 int atom_table_size(void)
365 {
366 int ret;
367 int lock = !ERTS_IS_CRASH_DUMPING;
368 if (lock)
369 atom_read_lock();
370 ret = erts_atom_table.entries;
371 if (lock)
372 atom_read_unlock();
373 return ret;
374 }
375
atom_table_sz(void)376 int atom_table_sz(void)
377 {
378 int ret;
379 int lock = !ERTS_IS_CRASH_DUMPING;
380 if (lock)
381 atom_read_lock();
382 ret = index_table_sz(&erts_atom_table);
383 if (lock)
384 atom_read_unlock();
385 return ret;
386 }
387
388 int
erts_atom_get(const char * name,Uint len,Eterm * ap,ErtsAtomEncoding enc)389 erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
390 {
391 byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
392 Atom a;
393 int i;
394 int res;
395
396 switch (enc) {
397 case ERTS_ATOM_ENC_LATIN1:
398 if (len > MAX_ATOM_CHARACTERS) {
399 return 0;
400 }
401
402 latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len);
403
404 a.name = (byte*)name;
405 a.len = (Sint16)len;
406 break;
407 case ERTS_ATOM_ENC_7BIT_ASCII:
408 if (len > MAX_ATOM_CHARACTERS) {
409 return 0;
410 }
411
412 for (i = 0; i < len; i++) {
413 if (name[i] & 0x80) {
414 return 0;
415 }
416 }
417
418 a.len = (Sint16)len;
419 a.name = (byte*)name;
420 break;
421 case ERTS_ATOM_ENC_UTF8:
422 if (len > MAX_ATOM_SZ_LIMIT) {
423 return 0;
424 }
425
426 /* We don't need to check whether the encoding is legal as all atom
427 * names are stored as UTF-8 and we know a lookup with a badly encoded
428 * name will fail. */
429
430 a.len = (Sint16)len;
431 a.name = (byte*)name;
432 break;
433 }
434
435 atom_read_lock();
436 i = index_get(&erts_atom_table, (void*) &a);
437 res = i < 0 ? 0 : (*ap = make_atom(i), 1);
438 atom_read_unlock();
439
440 return res;
441 }
442
443 void
erts_atom_get_text_space_sizes(Uint * reserved,Uint * used)444 erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
445 {
446 int lock = !ERTS_IS_CRASH_DUMPING;
447 if (lock)
448 atom_read_lock();
449 if (reserved)
450 *reserved = reserved_atom_space;
451 if (used)
452 *used = atom_space;
453 if (lock)
454 atom_read_unlock();
455 }
456
457 void
init_atom_table(void)458 init_atom_table(void)
459 {
460 HashFunctions f;
461 int i;
462 Atom a;
463 erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER;
464
465 rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ;
466 rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED;
467
468 #ifdef ERTS_ATOM_PUT_OPS_STAT
469 erts_atomic_init_nob(&atom_put_ops, 0);
470 #endif
471
472 erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL,
473 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
474
475 f.hash = (H_FUN) atom_hash;
476 f.cmp = (HCMP_FUN) atom_cmp;
477 f.alloc = (HALLOC_FUN) atom_alloc;
478 f.free = (HFREE_FUN) atom_free;
479 f.meta_alloc = (HMALLOC_FUN) erts_alloc;
480 f.meta_free = (HMFREE_FUN) erts_free;
481 f.meta_print = (HMPRINT_FUN) erts_print;
482
483 atom_text_pos = NULL;
484 atom_text_end = NULL;
485 reserved_atom_space = 0;
486 atom_space = 0;
487 text_list = NULL;
488
489 erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
490 "atom_tab", ATOM_SIZE, erts_atom_table_size, f);
491 more_atom_space();
492
493 /* Ordinary atoms */
494 for (i = 0; erl_atom_names[i] != 0; i++) {
495 int ix;
496 a.len = sys_strlen(erl_atom_names[i]);
497 a.latin1_chars = a.len;
498 a.name = (byte*)erl_atom_names[i];
499 a.slot.index = i;
500 #ifdef DEBUG
501 /* Verify 7-bit ascii */
502 for (ix = 0; ix < a.len; ix++) {
503 ASSERT((a.name[ix] & 0x80) == 0);
504 }
505 #endif
506 ix = index_put(&erts_atom_table, (void*) &a);
507 atom_text_pos -= a.len;
508 atom_space -= a.len;
509 atom_tab(ix)->name = (byte*)erl_atom_names[i];
510 }
511
512 /* Hide am_ErtsSecretAtom */
513 hash_erase(&erts_atom_table.htable, atom_tab(atom_val(am_ErtsSecretAtom)));
514 }
515
516 void
dump_atoms(fmtfn_t to,void * to_arg)517 dump_atoms(fmtfn_t to, void *to_arg)
518 {
519 int i = erts_atom_table.entries;
520
521 /*
522 * Print out the atom table starting from the end.
523 */
524 while (--i >= 0) {
525 if (erts_index_lookup(&erts_atom_table, i)) {
526 erts_print(to, to_arg, "%T\n", make_atom(i));
527 }
528 }
529 }
530
531 Uint
erts_get_atom_limit(void)532 erts_get_atom_limit(void)
533 {
534 return erts_atom_table.limit;
535 }
536