1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 1996-2020. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21 #ifndef __ATOM_H__
22 #define __ATOM_H__
23
24 #include "index.h"
25 #include "erl_atom_table.h"
26
27 #define MAX_ATOM_CHARACTERS 255
28 #define MAX_ATOM_SZ_FROM_LATIN1 (2*MAX_ATOM_CHARACTERS)
29 #define MAX_ATOM_SZ_LIMIT (4*MAX_ATOM_CHARACTERS) /* theoretical byte limit */
30 #define ATOM_LIMIT (1024*1024)
31 #define MIN_ATOM_TABLE_SIZE 8192
32 #define ATOM_BAD_ENCODING_ERROR -1
33 #define ATOM_MAX_CHARS_ERROR -2
34
35 #ifndef ARCH_32
36 /* Internal atom cache needs MAX_ATOM_TABLE_SIZE to be less than an
37 unsigned 32 bit integer. See external.c(erts_encode_ext_dist_header_setup)
38 for more details. */
39 #define MAX_ATOM_TABLE_SIZE ((MAX_ATOM_INDEX + 1 < (UWORD_CONSTANT(1) << 32)) ? MAX_ATOM_INDEX + 1 : ((UWORD_CONSTANT(1) << 31) - 1)) /* Here we use maximum signed interger value to avoid integer overflow */
40 #else
41 #define MAX_ATOM_TABLE_SIZE (MAX_ATOM_INDEX + 1)
42 #endif
43
44
45 /*
46 * Atom entry.
47 */
48 typedef struct atom {
49 IndexSlot slot; /* MUST BE LOCATED AT TOP OF STRUCT!!! */
50 Sint16 len; /* length of atom name (UTF-8 encoded) */
51 Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */
52 int ord0; /* ordinal value of first 3 bytes + 7 bits */
53 byte* name; /* name of atom */
54 } Atom;
55
56 extern IndexTable erts_atom_table;
57
58 ERTS_GLB_INLINE Atom* atom_tab(Uint i);
59 ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term);
60 ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1);
61
62 #if ERTS_GLB_INLINE_INCL_FUNC_DEF
63 ERTS_GLB_INLINE Atom*
atom_tab(Uint i)64 atom_tab(Uint i)
65 {
66 return (Atom *) erts_index_lookup(&erts_atom_table, i);
67 }
68
erts_is_atom_utf8_bytes(byte * text,size_t len,Eterm term)69 ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term)
70 {
71 Atom *a;
72 if (!is_atom(term))
73 return 0;
74 a = atom_tab(atom_val(term));
75 return (len == (size_t) a->len
76 && sys_memcmp((void *) a->name, (void *) text, len) == 0);
77 }
78
erts_is_atom_str(const char * str,Eterm term,int is_latin1)79 ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1)
80 {
81 Atom *a;
82 int i, len;
83 const byte* aname;
84 const byte* s = (const byte*) str;
85
86 if (!is_atom(term))
87 return 0;
88 a = atom_tab(atom_val(term));
89 len = a->len;
90 aname = a->name;
91 if (is_latin1) {
92 for (i = 0; i < len; s++) {
93 if (aname[i] < 0x80) {
94 if (aname[i] != *s || *s == '\0')
95 return 0;
96 i++;
97 }
98 else {
99 if (aname[i] != (0xC0 | (*s >> 6)) ||
100 aname[i+1] != (0x80 | (*s & 0x3F))) {
101 return 0;
102 }
103 i += 2;
104 }
105 }
106 }
107 else {
108 for (i = 0; i < len; i++, s++)
109 if (aname[i] != *s || *s == '\0')
110 return 0;
111 }
112 return *s == '\0';
113 }
114
115 #endif
116
117 typedef enum {
118 ERTS_ATOM_ENC_7BIT_ASCII,
119 ERTS_ATOM_ENC_LATIN1,
120 ERTS_ATOM_ENC_UTF8
121 } ErtsAtomEncoding;
122
123 /*
124 * Note, ERTS_IS_ATOM_STR() expects the first argument to be a
125 * 7-bit ASCII string literal.
126 */
127 #define ERTS_IS_ATOM_STR(LSTR, TERM) \
128 (erts_is_atom_utf8_bytes((byte *) LSTR, sizeof(LSTR) - 1, (TERM)))
129 #define ERTS_DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
130 #define ERTS_INIT_AM(S) AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
131 #define ERTS_MAKE_AM(Str) am_atom_put(Str, sizeof(Str) - 1)
132
133 int atom_table_size(void); /* number of elements */
134 int atom_table_sz(void); /* table size in bytes, excluding stored objects */
135
136 Eterm am_atom_put(const char*, Sint); /* ONLY 7-bit ascii! */
137 Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc);
138 int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc);
139 void init_atom_table(void);
140 void atom_info(fmtfn_t, void *);
141 void dump_atoms(fmtfn_t, void *);
142 Uint erts_get_atom_limit(void);
143 int erts_atom_get(const char* name, Uint len, Eterm* ap, ErtsAtomEncoding enc);
144 void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used);
145 #endif
146
147