1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 1996-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24 
25 #include "sys.h"
26 #include "erl_sys_driver.h"
27 #include "erl_vm.h"
28 #include "global.h"
29 #include "hash.h"
30 #include "atom.h"
31 
32 
33 #define ATOM_SIZE  3000
34 
35 IndexTable erts_atom_table;	/* The index table */
36 
37 static erts_rwmtx_t atom_table_lock;
38 
39 #define atom_read_lock()	erts_rwmtx_rlock(&atom_table_lock)
40 #define atom_read_unlock()	erts_rwmtx_runlock(&atom_table_lock)
41 #define atom_write_lock()	erts_rwmtx_rwlock(&atom_table_lock)
42 #define atom_write_unlock()	erts_rwmtx_rwunlock(&atom_table_lock)
43 
44 #if 0
45 #define ERTS_ATOM_PUT_OPS_STAT
46 #endif
47 #ifdef ERTS_ATOM_PUT_OPS_STAT
48 static erts_atomic_t atom_put_ops;
49 #endif
50 
51 /* Functions for allocating space for the ext of atoms. We do not
52  * use malloc for each atom to prevent excessive memory fragmentation
53  */
54 
55 typedef struct _atom_text {
56     struct _atom_text* next;
57     unsigned char text[ATOM_TEXT_SIZE];
58 } AtomText;
59 
60 static AtomText* text_list;	/* List of text buffers */
61 static byte *atom_text_pos;
62 static byte *atom_text_end;
63 static Uint reserved_atom_space;	/* Total amount of atom text space */
64 static Uint atom_space;		/* Amount of atom text space used */
65 
66 /*
67  * Print info about atom tables
68  */
atom_info(fmtfn_t to,void * to_arg)69 void atom_info(fmtfn_t to, void *to_arg)
70 {
71     int lock = !ERTS_IS_CRASH_DUMPING;
72     if (lock)
73 	atom_read_lock();
74     index_info(to, to_arg, &erts_atom_table);
75 #ifdef ERTS_ATOM_PUT_OPS_STAT
76     erts_print(to, to_arg, "atom_put_ops: %ld\n",
77 	       erts_atomic_read_nob(&atom_put_ops));
78 #endif
79 
80     if (lock)
81 	atom_read_unlock();
82 }
83 
84 /*
85  * Allocate an atom text segment.
86  */
87 static void
more_atom_space(void)88 more_atom_space(void)
89 {
90     AtomText* ptr;
91 
92     ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));
93 
94     ptr->next = text_list;
95     text_list = ptr;
96 
97     atom_text_pos = ptr->text;
98     atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
99     reserved_atom_space += sizeof(AtomText);
100 
101     VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
102 }
103 
104 /*
105  * Allocate string space within an atom text segment.
106  */
107 
108 static byte*
atom_text_alloc(int bytes)109 atom_text_alloc(int bytes)
110 {
111     byte *res;
112 
113     ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
114     if (atom_text_pos + bytes >= atom_text_end) {
115 	more_atom_space();
116     }
117     res = atom_text_pos;
118     atom_text_pos += bytes;
119     atom_space    += bytes;
120     return res;
121 }
122 
123 /*
124  * Calculate atom hash value (using the hash algorithm
125  * hashpjw from the Dragon Book).
126  */
127 
128 static HashValue
atom_hash(Atom * obj)129 atom_hash(Atom* obj)
130 {
131     byte* p = obj->name;
132     int len = obj->len;
133     HashValue h = 0, g;
134     byte v;
135 
136     while(len--) {
137 	v = *p++;
138 	/* latin1 clutch for r16 */
139 	if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80) {
140 	    v = (v << 6) | (*p & 0x3F);
141 	    p++; len--;
142 	}
143 	/* normal hashpjw follows for v */
144 	h = (h << 4) + v;
145 	if ((g = h & 0xf0000000)) {
146 	    h ^= (g >> 24);
147 	    h ^= g;
148 	}
149     }
150     return h;
151 }
152 
153 
154 static int
atom_cmp(Atom * tmpl,Atom * obj)155 atom_cmp(Atom* tmpl, Atom* obj)
156 {
157     if (tmpl->len == obj->len &&
158 	sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
159 	return 0;
160     return 1;
161 }
162 
163 
164 static Atom*
atom_alloc(Atom * tmpl)165 atom_alloc(Atom* tmpl)
166 {
167     Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
168 
169     obj->name = atom_text_alloc(tmpl->len);
170     sys_memcpy(obj->name, tmpl->name, tmpl->len);
171     obj->len = tmpl->len;
172     obj->latin1_chars = tmpl->latin1_chars;
173     obj->slot.index = -1;
174 
175     /*
176      * Precompute ordinal value of first 3 bytes + 7 bits.
177      * This is used by erl_utils.h:erts_cmp_atoms().
178      * We cannot use the full 32 bits of the first 4 bytes,
179      * since we use the sign of the difference between two
180      * ordinal values to represent their relative order.
181      */
182     {
183 	unsigned char c[4];
184 	int i;
185 	int j;
186 
187 	j = (tmpl->len < 4) ? tmpl->len : 4;
188 	for(i = 0; i < j; ++i)
189 	    c[i] = tmpl->name[i];
190 	for(; i < 4; ++i)
191 	    c[i] = '\0';
192 	obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
193     }
194     return obj;
195 }
196 
197 static void
atom_free(Atom * obj)198 atom_free(Atom* obj)
199 {
200     ASSERT(obj->slot.index == atom_val(am_ErtsSecretAtom));
201 }
202 
latin1_to_utf8(byte * conv_buf,Uint buf_sz,const byte ** srcp,Uint * lenp)203 static void latin1_to_utf8(byte* conv_buf, Uint buf_sz,
204                            const byte** srcp, Uint* lenp)
205 {
206     byte* dst;
207     const byte* src = *srcp;
208     Uint i, len = *lenp;
209 
210     ASSERT(len <= MAX_ATOM_CHARACTERS);
211     ASSERT(buf_sz >= MAX_ATOM_SZ_FROM_LATIN1);
212 
213     for (i=0 ; i < len; ++i) {
214 	if (src[i] & 0x80) {
215 	    goto need_convertion;
216 	}
217     }
218     return;
219 
220 need_convertion:
221     sys_memcpy(conv_buf, src, i);
222     dst = conv_buf + i;
223     for ( ; i < len; ++i) {
224 	unsigned char chr = src[i];
225 	if (!(chr & 0x80)) {
226 	    *dst++ = chr;
227 	}
228 	else {
229 	    *dst++ = 0xC0 | (chr >> 6);
230 	    *dst++ = 0x80 | (chr & 0x3F);
231 	}
232     }
233     *srcp = conv_buf;
234     *lenp = dst - conv_buf;
235 }
236 
237 /*
238  * erts_atom_put_index() may fail. Returns negative indexes for errors.
239  */
240 int
erts_atom_put_index(const byte * name,Sint len,ErtsAtomEncoding enc,int trunc)241 erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
242 {
243     byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
244     const byte *text = name;
245     Uint tlen;
246     Sint no_latin1_chars;
247     Atom a;
248     int aix;
249 
250 #ifdef ERTS_ATOM_PUT_OPS_STAT
251     erts_atomic_inc_nob(&atom_put_ops);
252 #endif
253 
254     if (len < 0) {
255         if (trunc) {
256             len = 0;
257         } else {
258             return ATOM_MAX_CHARS_ERROR;
259         }
260     }
261 
262     tlen = len;
263 
264     switch (enc) {
265     case ERTS_ATOM_ENC_7BIT_ASCII:
266 	if (tlen > MAX_ATOM_CHARACTERS) {
267 	    if (trunc)
268 		tlen = MAX_ATOM_CHARACTERS;
269 	    else
270 		return ATOM_MAX_CHARS_ERROR;
271 	}
272 #ifdef DEBUG
273 	for (aix = 0; aix < len; aix++) {
274 	    ASSERT((name[aix] & 0x80) == 0);
275 	}
276 #endif
277 	no_latin1_chars = tlen;
278 	break;
279     case ERTS_ATOM_ENC_LATIN1:
280 	if (tlen > MAX_ATOM_CHARACTERS) {
281 	    if (trunc)
282 		tlen = MAX_ATOM_CHARACTERS;
283 	    else
284 		return ATOM_MAX_CHARS_ERROR;
285 	}
286 	no_latin1_chars = tlen;
287 	latin1_to_utf8(utf8_copy, sizeof(utf8_copy), &text, &tlen);
288 	break;
289     case ERTS_ATOM_ENC_UTF8:
290 	/* First sanity check; need to verify later */
291 	if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
292 	    return ATOM_MAX_CHARS_ERROR;
293 	break;
294     }
295 
296     a.len = tlen;
297     a.name = (byte *) text;
298     atom_read_lock();
299     aix = index_get(&erts_atom_table, (void*) &a);
300     atom_read_unlock();
301     if (aix >= 0) {
302 	/* Already in table no need to verify it */
303 	return aix;
304     }
305 
306     if (enc == ERTS_ATOM_ENC_UTF8) {
307 	/* Need to verify encoding and length */
308 	byte *err_pos;
309 	Uint no_chars;
310 	switch (erts_analyze_utf8_x((byte *) text,
311 				    (Uint) tlen,
312 				    &err_pos,
313 				    &no_chars, NULL,
314 				    &no_latin1_chars,
315 				    MAX_ATOM_CHARACTERS)) {
316 	case ERTS_UTF8_OK:
317 	    ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
318 	    break;
319 	case ERTS_UTF8_OK_MAX_CHARS:
320 	    /* Truncated... */
321 	    if (!trunc)
322 		return ATOM_MAX_CHARS_ERROR;
323 	    ASSERT(no_chars == MAX_ATOM_CHARACTERS);
324 	    tlen = err_pos - text;
325 	    break;
326 	default:
327 	    /* Bad utf8... */
328 	    return ATOM_BAD_ENCODING_ERROR;
329 	}
330     }
331 
332     ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
333     ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
334 
335     a.len = tlen;
336     a.latin1_chars = (Sint16) no_latin1_chars;
337     a.name = (byte *) text;
338     atom_write_lock();
339     aix = index_put(&erts_atom_table, (void*) &a);
340     atom_write_unlock();
341     return aix;
342 }
343 
344 /*
345  * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned!
346  */
347 Eterm
erts_atom_put(const byte * name,Sint len,ErtsAtomEncoding enc,int trunc)348 erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
349 {
350     int aix = erts_atom_put_index(name, len, enc, trunc);
351     if (aix >= 0)
352 	return make_atom(aix);
353     else
354 	return THE_NON_VALUE;
355 }
356 
357 Eterm
am_atom_put(const char * name,Sint len)358 am_atom_put(const char* name, Sint len)
359 {
360     /* Assumes 7-bit ascii; use erts_atom_put() for other encodings... */
361     return erts_atom_put((byte *) name, len, ERTS_ATOM_ENC_7BIT_ASCII, 1);
362 }
363 
atom_table_size(void)364 int atom_table_size(void)
365 {
366     int ret;
367     int lock = !ERTS_IS_CRASH_DUMPING;
368     if (lock)
369 	atom_read_lock();
370     ret = erts_atom_table.entries;
371     if (lock)
372 	atom_read_unlock();
373     return ret;
374 }
375 
atom_table_sz(void)376 int atom_table_sz(void)
377 {
378     int ret;
379     int lock = !ERTS_IS_CRASH_DUMPING;
380     if (lock)
381 	atom_read_lock();
382     ret = index_table_sz(&erts_atom_table);
383     if (lock)
384 	atom_read_unlock();
385     return ret;
386 }
387 
388 int
erts_atom_get(const char * name,Uint len,Eterm * ap,ErtsAtomEncoding enc)389 erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
390 {
391     byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
392     Atom a;
393     int i;
394     int res;
395 
396     switch (enc) {
397     case ERTS_ATOM_ENC_LATIN1:
398         if (len > MAX_ATOM_CHARACTERS) {
399             return 0;
400         }
401 
402         latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len);
403 
404         a.name = (byte*)name;
405         a.len = (Sint16)len;
406         break;
407     case ERTS_ATOM_ENC_7BIT_ASCII:
408         if (len > MAX_ATOM_CHARACTERS) {
409             return 0;
410         }
411 
412         for (i = 0; i < len; i++) {
413             if (name[i] & 0x80) {
414                 return 0;
415             }
416         }
417 
418         a.len = (Sint16)len;
419         a.name = (byte*)name;
420         break;
421     case ERTS_ATOM_ENC_UTF8:
422         if (len > MAX_ATOM_SZ_LIMIT) {
423             return 0;
424         }
425 
426         /* We don't need to check whether the encoding is legal as all atom
427          * names are stored as UTF-8 and we know a lookup with a badly encoded
428          * name will fail. */
429 
430         a.len = (Sint16)len;
431         a.name = (byte*)name;
432         break;
433     }
434 
435     atom_read_lock();
436     i = index_get(&erts_atom_table, (void*) &a);
437     res = i < 0 ? 0 : (*ap = make_atom(i), 1);
438     atom_read_unlock();
439 
440     return res;
441 }
442 
443 void
erts_atom_get_text_space_sizes(Uint * reserved,Uint * used)444 erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
445 {
446     int lock = !ERTS_IS_CRASH_DUMPING;
447     if (lock)
448 	atom_read_lock();
449     if (reserved)
450 	*reserved = reserved_atom_space;
451     if (used)
452 	*used = atom_space;
453     if (lock)
454 	atom_read_unlock();
455 }
456 
457 void
init_atom_table(void)458 init_atom_table(void)
459 {
460     HashFunctions f;
461     int i;
462     Atom a;
463     erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER;
464 
465     rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ;
466     rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED;
467 
468 #ifdef ERTS_ATOM_PUT_OPS_STAT
469     erts_atomic_init_nob(&atom_put_ops, 0);
470 #endif
471 
472     erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL,
473         ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
474 
475     f.hash = (H_FUN) atom_hash;
476     f.cmp  = (HCMP_FUN) atom_cmp;
477     f.alloc = (HALLOC_FUN) atom_alloc;
478     f.free = (HFREE_FUN) atom_free;
479     f.meta_alloc = (HMALLOC_FUN) erts_alloc;
480     f.meta_free = (HMFREE_FUN) erts_free;
481     f.meta_print = (HMPRINT_FUN) erts_print;
482 
483     atom_text_pos = NULL;
484     atom_text_end = NULL;
485     reserved_atom_space = 0;
486     atom_space = 0;
487     text_list = NULL;
488 
489     erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
490 		    "atom_tab", ATOM_SIZE, erts_atom_table_size, f);
491     more_atom_space();
492 
493     /* Ordinary atoms */
494     for (i = 0; erl_atom_names[i] != 0; i++) {
495 	int ix;
496 	a.len = sys_strlen(erl_atom_names[i]);
497 	a.latin1_chars = a.len;
498 	a.name = (byte*)erl_atom_names[i];
499 	a.slot.index = i;
500 #ifdef DEBUG
501 	/* Verify 7-bit ascii */
502 	for (ix = 0; ix < a.len; ix++) {
503 	    ASSERT((a.name[ix] & 0x80) == 0);
504 	}
505 #endif
506 	ix = index_put(&erts_atom_table, (void*) &a);
507 	atom_text_pos -= a.len;
508 	atom_space -= a.len;
509 	atom_tab(ix)->name = (byte*)erl_atom_names[i];
510     }
511 
512     /* Hide am_ErtsSecretAtom */
513     hash_erase(&erts_atom_table.htable, atom_tab(atom_val(am_ErtsSecretAtom)));
514 }
515 
516 void
dump_atoms(fmtfn_t to,void * to_arg)517 dump_atoms(fmtfn_t to, void *to_arg)
518 {
519     int i = erts_atom_table.entries;
520 
521     /*
522      * Print out the atom table starting from the end.
523      */
524     while (--i >= 0) {
525 	if (erts_index_lookup(&erts_atom_table, i)) {
526 	    erts_print(to, to_arg, "%T\n", make_atom(i));
527 	}
528     }
529 }
530 
531 Uint
erts_get_atom_limit(void)532 erts_get_atom_limit(void)
533 {
534     return erts_atom_table.limit;
535 }
536