1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 1998-2017. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 #include <string.h>
21 #include <limits.h>
22 #include "eidef.h"
23 #include "eiext.h"
24 #include "putget.h"
25 
26 
27 static int verify_ascii_atom(const char* src, int slen);
28 static int verify_utf8_atom(const char* src, int slen);
29 
ei_encode_atom(char * buf,int * index,const char * p)30 int ei_encode_atom(char *buf, int *index, const char *p)
31 {
32     size_t len = strlen(p);
33 
34     if (len >= MAXATOMLEN)
35 	len = MAXATOMLEN - 1;
36     return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
37 }
38 
ei_encode_atom_len(char * buf,int * index,const char * p,int len)39 int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
40 {
41     /* This function is documented to truncate at MAXATOMLEN (256) */
42     if (len >= MAXATOMLEN)
43 	len = MAXATOMLEN - 1;
44     return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
45 }
46 
ei_encode_atom_as(char * buf,int * index,const char * p,erlang_char_encoding from_enc,erlang_char_encoding to_enc)47 int ei_encode_atom_as(char *buf, int *index, const char *p,
48 		      erlang_char_encoding from_enc,
49 		      erlang_char_encoding to_enc)
50 {
51     return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc);
52 }
53 
ei_encode_atom_len_as(char * buf,int * index,const char * p,int len,erlang_char_encoding from_enc,erlang_char_encoding to_enc)54 int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
55 			  erlang_char_encoding from_enc,
56 			  erlang_char_encoding to_enc)
57 {
58   char *s = buf + *index;
59   char *s0 = s;
60   int offs;
61 
62   if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) {
63       return -1;
64   }
65 
66   /*
67    * Since OTP 20 we totally ignore 'to_enc'
68    * and alway encode as UTF8.
69    */
70   {
71       offs =  1 + 1;
72       switch (from_enc) {
73       case ERLANG_LATIN1:
74 	  if (len >= 256/2) offs++;
75 	  len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
76 	  break;
77       case ERLANG_ASCII:
78 	  if (verify_ascii_atom(p, len) < 0) return -1;
79 	  if (buf) memcpy(s+offs,p,len);
80 	  break;
81       case ERLANG_UTF8:
82 	  if (len >= 256) offs++;
83 	  if (verify_utf8_atom(p, len) < 0) return -1;
84 	  if (buf) memcpy(s+offs,p,len);
85 	  break;
86       default:
87 	  return -1;
88       }
89       if (buf) {
90 	  if (offs == 2) {
91 	      put8(s, ERL_SMALL_ATOM_UTF8_EXT);
92 	      put8(s, len);
93 	  }
94 	  else {
95 	      put8(s, ERL_ATOM_UTF8_EXT);
96 	      put16be(s, len);
97 	  }
98       }
99       else s+= offs;
100   }
101   s += len;
102 
103   *index += s-s0;
104 
105   return 0;
106 }
107 
108 int
ei_internal_put_atom(char ** bufp,const char * p,int slen,erlang_char_encoding to_enc)109 ei_internal_put_atom(char** bufp, const char* p, int slen,
110 		     erlang_char_encoding to_enc)
111 {
112     int ix = 0;
113     if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0)
114 	return -1;
115     *bufp += ix;
116     return 0;
117 }
118 
119 
verify_ascii_atom(const char * src,int slen)120 static int verify_ascii_atom(const char* src, int slen)
121 {
122     while (slen > 0) {
123 	if ((src[0] & 0x80) != 0) return -1;
124 	src++;
125 	slen--;
126     }
127     return 0;
128 }
129 
verify_utf8_atom(const char * src,int slen)130 static int verify_utf8_atom(const char* src, int slen)
131 {
132     int num_chars = 0;
133 
134     while (slen > 0) {
135 	if (++num_chars >= MAXATOMLEN) return -1;
136 	if ((src[0] & 0x80) != 0) {
137 	    if ((src[0] & 0xE0) == 0xC0) {
138 		if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1;
139 		src++;
140 		slen--;
141 	    }
142 	    else if ((src[0] & 0xF0) == 0xE0) {
143 		if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1;
144 		src += 2;
145 		slen -= 2;
146 	    }
147 	    else if ((src[0] & 0xF8) == 0xF0) {
148 		if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1;
149 		src += 3;
150 		slen -= 3;
151 	    }
152 	    else return -1;
153 	}
154 	src++;
155 	slen--;
156     }
157     return 0;
158 }
159 
160