1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 1998-2017. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20 #include <string.h>
21 #include <limits.h>
22 #include "eidef.h"
23 #include "eiext.h"
24 #include "putget.h"
25
26
27 static int verify_ascii_atom(const char* src, int slen);
28 static int verify_utf8_atom(const char* src, int slen);
29
ei_encode_atom(char * buf,int * index,const char * p)30 int ei_encode_atom(char *buf, int *index, const char *p)
31 {
32 size_t len = strlen(p);
33
34 if (len >= MAXATOMLEN)
35 len = MAXATOMLEN - 1;
36 return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
37 }
38
ei_encode_atom_len(char * buf,int * index,const char * p,int len)39 int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
40 {
41 /* This function is documented to truncate at MAXATOMLEN (256) */
42 if (len >= MAXATOMLEN)
43 len = MAXATOMLEN - 1;
44 return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
45 }
46
ei_encode_atom_as(char * buf,int * index,const char * p,erlang_char_encoding from_enc,erlang_char_encoding to_enc)47 int ei_encode_atom_as(char *buf, int *index, const char *p,
48 erlang_char_encoding from_enc,
49 erlang_char_encoding to_enc)
50 {
51 return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc);
52 }
53
ei_encode_atom_len_as(char * buf,int * index,const char * p,int len,erlang_char_encoding from_enc,erlang_char_encoding to_enc)54 int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
55 erlang_char_encoding from_enc,
56 erlang_char_encoding to_enc)
57 {
58 char *s = buf + *index;
59 char *s0 = s;
60 int offs;
61
62 if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) {
63 return -1;
64 }
65
66 /*
67 * Since OTP 20 we totally ignore 'to_enc'
68 * and alway encode as UTF8.
69 */
70 {
71 offs = 1 + 1;
72 switch (from_enc) {
73 case ERLANG_LATIN1:
74 if (len >= 256/2) offs++;
75 len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
76 break;
77 case ERLANG_ASCII:
78 if (verify_ascii_atom(p, len) < 0) return -1;
79 if (buf) memcpy(s+offs,p,len);
80 break;
81 case ERLANG_UTF8:
82 if (len >= 256) offs++;
83 if (verify_utf8_atom(p, len) < 0) return -1;
84 if (buf) memcpy(s+offs,p,len);
85 break;
86 default:
87 return -1;
88 }
89 if (buf) {
90 if (offs == 2) {
91 put8(s, ERL_SMALL_ATOM_UTF8_EXT);
92 put8(s, len);
93 }
94 else {
95 put8(s, ERL_ATOM_UTF8_EXT);
96 put16be(s, len);
97 }
98 }
99 else s+= offs;
100 }
101 s += len;
102
103 *index += s-s0;
104
105 return 0;
106 }
107
108 int
ei_internal_put_atom(char ** bufp,const char * p,int slen,erlang_char_encoding to_enc)109 ei_internal_put_atom(char** bufp, const char* p, int slen,
110 erlang_char_encoding to_enc)
111 {
112 int ix = 0;
113 if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0)
114 return -1;
115 *bufp += ix;
116 return 0;
117 }
118
119
verify_ascii_atom(const char * src,int slen)120 static int verify_ascii_atom(const char* src, int slen)
121 {
122 while (slen > 0) {
123 if ((src[0] & 0x80) != 0) return -1;
124 src++;
125 slen--;
126 }
127 return 0;
128 }
129
verify_utf8_atom(const char * src,int slen)130 static int verify_utf8_atom(const char* src, int slen)
131 {
132 int num_chars = 0;
133
134 while (slen > 0) {
135 if (++num_chars >= MAXATOMLEN) return -1;
136 if ((src[0] & 0x80) != 0) {
137 if ((src[0] & 0xE0) == 0xC0) {
138 if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1;
139 src++;
140 slen--;
141 }
142 else if ((src[0] & 0xF0) == 0xE0) {
143 if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1;
144 src += 2;
145 slen -= 2;
146 }
147 else if ((src[0] & 0xF8) == 0xF0) {
148 if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1;
149 src += 3;
150 slen -= 3;
151 }
152 else return -1;
153 }
154 src++;
155 slen--;
156 }
157 return 0;
158 }
159
160