1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 1996-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 #ifndef __ATOM_H__
22 #define __ATOM_H__
23 
24 #include "index.h"
25 #include "erl_atom_table.h"
26 
27 #define MAX_ATOM_CHARACTERS 255
28 #define MAX_ATOM_SZ_FROM_LATIN1 (2*MAX_ATOM_CHARACTERS)
29 #define MAX_ATOM_SZ_LIMIT (4*MAX_ATOM_CHARACTERS) /* theoretical byte limit */
30 #define ATOM_LIMIT (1024*1024)
31 #define MIN_ATOM_TABLE_SIZE 8192
32 #define ATOM_BAD_ENCODING_ERROR -1
33 #define ATOM_MAX_CHARS_ERROR -2
34 
35 #ifndef ARCH_32
36 /* Internal atom cache needs MAX_ATOM_TABLE_SIZE to be less than an
37    unsigned 32 bit integer. See external.c(erts_encode_ext_dist_header_setup)
38    for more details. */
39 #define MAX_ATOM_TABLE_SIZE ((MAX_ATOM_INDEX + 1 < (UWORD_CONSTANT(1) << 32)) ? MAX_ATOM_INDEX + 1 : ((UWORD_CONSTANT(1) << 31) - 1)) /* Here we use maximum signed interger value to avoid integer overflow */
40 #else
41 #define MAX_ATOM_TABLE_SIZE (MAX_ATOM_INDEX + 1)
42 #endif
43 
44 
45 /*
46  * Atom entry.
47  */
48 typedef struct atom {
49     IndexSlot slot;  /* MUST BE LOCATED AT TOP OF STRUCT!!! */
50     Sint16 len;      /* length of atom name (UTF-8 encoded) */
51     Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */
52     int ord0;        /* ordinal value of first 3 bytes + 7 bits */
53     byte* name;      /* name of atom */
54 } Atom;
55 
56 extern IndexTable erts_atom_table;
57 
58 ERTS_GLB_INLINE Atom* atom_tab(Uint i);
59 ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term);
60 ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1);
61 
62 #if ERTS_GLB_INLINE_INCL_FUNC_DEF
63 ERTS_GLB_INLINE Atom*
atom_tab(Uint i)64 atom_tab(Uint i)
65 {
66     return (Atom *) erts_index_lookup(&erts_atom_table, i);
67 }
68 
erts_is_atom_utf8_bytes(byte * text,size_t len,Eterm term)69 ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term)
70 {
71     Atom *a;
72     if (!is_atom(term))
73 	return 0;
74     a = atom_tab(atom_val(term));
75     return (len == (size_t) a->len
76 	    && sys_memcmp((void *) a->name, (void *) text, len) == 0);
77 }
78 
erts_is_atom_str(const char * str,Eterm term,int is_latin1)79 ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1)
80 {
81     Atom *a;
82     int i, len;
83     const byte* aname;
84     const byte* s = (const byte*) str;
85 
86     if (!is_atom(term))
87 	return 0;
88     a = atom_tab(atom_val(term));
89     len = a->len;
90     aname = a->name;
91     if (is_latin1) {
92 	for (i = 0; i < len; s++) {
93 	    if (aname[i] < 0x80) {
94 		if (aname[i] != *s || *s == '\0')
95 		    return 0;
96 		i++;
97 	    }
98 	    else {
99 		if (aname[i]   != (0xC0 | (*s >> 6)) ||
100 		    aname[i+1] != (0x80 | (*s & 0x3F))) {
101 		    return 0;
102 		}
103 		i += 2;
104 	    }
105 	}
106     }
107     else {
108 	for (i = 0; i < len; i++, s++)
109 	    if (aname[i] != *s || *s == '\0')
110 		return 0;
111     }
112     return *s == '\0';
113 }
114 
115 #endif
116 
117 typedef enum {
118     ERTS_ATOM_ENC_7BIT_ASCII,
119     ERTS_ATOM_ENC_LATIN1,
120     ERTS_ATOM_ENC_UTF8
121 } ErtsAtomEncoding;
122 
123 /*
124  * Note, ERTS_IS_ATOM_STR() expects the first argument to be a
125  * 7-bit ASCII string literal.
126  */
127 #define ERTS_IS_ATOM_STR(LSTR, TERM) \
128   (erts_is_atom_utf8_bytes((byte *) LSTR, sizeof(LSTR) - 1, (TERM)))
129 #define ERTS_DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
130 #define ERTS_INIT_AM(S) AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
131 #define ERTS_MAKE_AM(Str) am_atom_put(Str, sizeof(Str) - 1)
132 
133 int atom_table_size(void);	/* number of elements */
134 int atom_table_sz(void);	/* table size in bytes, excluding stored objects */
135 
136 Eterm am_atom_put(const char*, Sint); /* ONLY 7-bit ascii! */
137 Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc);
138 int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc);
139 void init_atom_table(void);
140 void atom_info(fmtfn_t, void *);
141 void dump_atoms(fmtfn_t, void *);
142 Uint erts_get_atom_limit(void);
143 int erts_atom_get(const char* name, Uint len, Eterm* ap, ErtsAtomEncoding enc);
144 void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used);
145 #endif
146 
147