1* $Id: json.sno,v 1.6 2020-10-31 05:25:26 phil Exp $
2* A simple SNOBOL4 JSON package.
3*	(in very unstylish code)
4*=pea
5*=sect NAME
6*JSON Encoding and Decoding
7*=sect SYNOPSYS
8*=code
9*B<-INCLUDE 'json.sno'>
10*      string = B<JSON_ENCODE(>I<data>B<)>
11*      data = B<JSON_DECODE(>I<string>B<)>
12*=ecode
13*=sect DESCRIPTION
14*B<JSON_ENCODE(>I<data>B<)> accepts a subset of SNOBOL4 data types:
15*=bull B<STRING>
16*=bull B<INTEGER>
17*=bull B<REAL>
18*=bull B<ARRAY>
19*MUST be singly-dimensioned and zero-based.
20*=bull B<TABLE>
21*
22*B<JSON_DECODE(>I<json_string>B<)> accepts a proper subset of JSON
23*with the following limits and omissions:
24*=bull arrays must have at least one item.
25*=bull \u escapes can only be used for ASCII (00-7F) characters.
26*=bull B<true> and B<false> are not accepted.
27*
28*NOTE!!! JSON is defined to use UTF-8 encoding, but this is not enforced
29*by this library.  To exchange binary data with other software which
30*may enforce use of only UTF-8 sequences, the binary strings should
31*be encoded (eg with base64) to ASCII sequences.
32*=sect COMPATIBILITY
33*L<https://tools.ietf.org/html/rfc8259>
34*=sect SEE ALSO
35*B<snobol4>(1),
36*B<snobol4base64>(3)
37*=sect AUTHORS
38*Philip L. Budne
39*=cut
40-include 'utf.sno'
41
42	json.nul = CHAR(0)
43	json.bs = CHAR(8)
44	json.ht = CHAR(9)
45	json.lf = CHAR(10)
46	json.ff = CHAR(12)
47	json.cr = CHAR(13)
48	json.ctrl = SUBSTR(&ALPHABET,1,32)
49	json.ws = SPAN(' ' json.ht json.lf json.cr)
50
51	DATA("json.token(json.tokval)")
52	DATA("json.cons(json.car,json.cdr)")
53
54* hex alphabet
55	json.ha = '0123456789abcdef'
56
57**************** json.hi
58* convert hex string to integer
59* (HI available in logic module)
60
61	DEFINE("json.hi(xx)i,x")			:(e.json.hi)
62json.hi	xx = REPLACE(xx, 'ABCDEF', 'abcdef')
63	json.hi = 0
64json.hi.loop
65+	IDENT(xx)					:S(RETURN)
66	json.ha POS(0) ARB SUBSTR(xx, 1, 1) @i		:F(FRETURN)
67	json.hi = json.hi * 16 + i - 1
68	xx = SUBSTR(xx, 2)				:(json.hi.loop)
69e.json.hi
70
71**************** json.hex
72* convert 8 bit number to two hex digits (for control characters)
73
74	DEFINE("json.hex(n)")				:(e.json.hex)
75json.hex
76+	json.hex = SUBSTR(json.ha, n / 16 + 1, 1)
77+		   SUBSTR(json.ha, remdr(n,16) + 1, 1)
78+	:(RETURN)
79e.json.hex
80**************** json.descape
81* de-escape backslash escaped string
82
83* for decode:
84* single character escape codes
85	json.escapes = '\"btnfr'
86* escape sequence:
87	json.eseq = '\' ANY(json.escapes)
88* match string not containing an escape sequence:
89	json.escbreak = BREAK('\"')
90
91	json.hd = ANY('0123456789abcdefABCDEF')
92* only allow ASCII in w/ \u escapes (avoid UTF-16 decode)
93	json.hexesc = '\u00' (ANY('01234567') json.hd) . x
94	json.hexesc2 = '\U00' (('0' | '1') json.hd json.hd json.hd json.hd json.hd) . x
95
96	json.q2char = TABLE()
97	json.q2char['\b'] = json.bs
98	json.q2char['\t'] = json.ht
99	json.q2char['\n'] = json.lf
100	json.q2char['\f'] = json.ff
101	json.q2char['\r'] = json.cr
102	json.q2char['\\'] = '\'
103	json.q2char['\"'] = '"'
104	FREEZE(json.q2char)
105
106	DEFINE("json.descape(in)x")			:(e.json.descape)
107* append a marker must be something that can't appear in a quoted string
108* (bare ctrl char, or an un-escaped quote)
109json.descape
110+	in = in '"'
111json.descape.loop
112+	IDENT(in, '"')					:S(RETURN)
113	in POS(0) json.eseq . x =			:S(json.descape.esc)
114	in POS(0) json.hexesc =				:S(json.descape.hex)
115	in POS(0) json.hexesc2 =			:S(json.descape.hex2)
116	in POS(0) BREAK('\"') . x (ANY('\"') REM) . in	:S(json.descape.str)
117	:(freturn)
118json.descape.esc
119+	x = json.q2char[x]				:(json.descape.str)
120json.descape.hex2
121+	x = RUNE(json.hi(x))			:F(FRETURN)S(json.descape.str)
122json.descape.hex
123+	x = CHAR(json.hi(x))
124json.descape.str
125	json.descape = json.descape x			:(json.descape.loop)
126e.json.descape
127
128**************** json.gtok ****************
129* returns simple values as SNOBOL4 values,
130* other elements returned as JSON.TOKEN('X')
131
132	json.int = ('-' | '') SPAN('123456789') (SPAN(&DIGITS) | '')
133	json.number = json.int
134+		(('.' (SPAN(&DIGITS) | '')) | '')
135+		((('e' | 'E') json.int) | '')
136
137	json.string = '"' ARBNO(json.escbreak |
138+				json.eseq |
139+				json.hexesc |
140+				json.hexesc2) . json.gtok '"'
141
142	DEFINE("json.gtok()x")				:(e.json.gtok)
143* note x local for json.hexesc[2] patterns
144json.gtok
145+	json.subject POS(0) json.ws =			:S(json.gtok)
146	IDENT(json.subject)				:S(FRETURN)
147	json.subject POS(0) json.number . json.gtok =	:S(json.gtok.num)
148	json.subject POS(0) ANY('[],{}:') . json.gtok =	:S(json.gtok.tok)
149	json.subject POS(0) json.string =		:S(json.gtok.str)
150	json.subject POS(0) 'null' =			:S(RETURN)F(FRETURN)
151* here with string to de-escape
152json.gtok.str
153+	json.gtok = json.descape(json.gtok)		:S(RETURN)F(FRETURN)
154*
155* here with number
156json.gtok.num
157+	json.gtok = json.gtok + 0
158+	:(RETURN)
159*
160* here with a terminal syntax element
161json.gtok.tok
162+	json.gtok = json.token(json.gtok)
163+	:(RETURN)
164e.json.gtok
165**************** json.istok ****************
166* predicate: only true if tok is a JSON.TOKEN instance (not a value)
167* if val non-empty token must match
168	DEFINE("json.istok(tok,val)")			:(e.json.istok)
169json.istok
170+	IDENT(datatype(tok),.json.token)		:F(FRETURN)
171	IDENT(val)					:S(RETURN)
172	IDENT(json.tokval(tok), val)			:S(RETURN)F(FRETURN)
173e.json.istok
174**************** json.value ****************
175* inner worker, parses a json.value from json.subject
176* called from JSON_DECODE, and from self
177	DEFINE("json.value()tok,n,list")		:(e.json.value)
178json.value
179+	json.value = json.gtok()			:F(FRETURN)
180	json.istok(json.value)				:F(RETURN)
181* here with a json.token
182	json.istok(json.value, '[')			:S(json.array)
183	json.istok(json.value, '{')			:S(json.object)
184	:(FRETURN)
185****
186* here when '[' seen; return as array (MUST have at least one datum)
187json.array
188+	list =
189	n = -1
190* loop creating LIFO list of items, keeping (zero-based) count
191json.array.loop
192+	list = json.cons(json.value(),list)		:F(FRETURN)
193	n = n + 1
194	tok = json.gtok()				:F(FRETURN)
195	json.istok(tok,',')				:S(json.array.loop)
196	json.istok(tok,']')				:F(FRETURN)
197* here at end of array with reverse order list
198* create ARRAY, and fill in entries from last to first!
199	json.value = ARRAY('0:' n)
200json.array.loop2
201+	json.value[n] = json.car(list)
202	list = json.cdr(list)
203	n = n - 1
204	DIFFER(list)					:S(json.array.loop2)
205	:(RETURN)
206****
207* here when '{' seen
208json.object
209+	json.value = TABLE()
210json.object.loop
211+	n = json.gtok()					:F(FRETURN)
212* allow empty table, trailing comma
213	json.istok(n,'}')				:S(RETURN)
214*	key must be string
215	IDENT(DATATYPE(n),'STRING')			:F(FRETURN)
216	json.istok(json.gtok(),':')			:F(FRETURN)
217	json.value[n] = json.value()			:F(FRETURN)
218*	OUTPUT = n '=>' json.value[n]
219	tok = json.gtok()				:F(FRETURN)
220	json.istok(tok)					:F(FRETURN)
221	json.istok(tok, ',')				:S(json.object.loop)
222	json.istok(tok, '}')				:F(FRETURN)
223	:(RETURN)
224e.json.value
225**************** json.loads (decoder entry point)
226* enter here with subject string in json.subject
227* (establishes context for json.value, and calls it)
228	define("JSON_DECODE(json.subject)")		:(e.json.loads)
229JSON_DECODE
230+	JSON_DECODE = json.value()			:F(FRETURN)
231* XXX just check if only whitespace left in json.subject?
232* any additional tokens are error:
233	json.gtok()					:S(FRETURN)
234	:(RETURN)
235
236e.json.loads
237
238****************************************************************
239**************** encode
240
241	json.ord2q = ARRAY('0:255')
242	i = 0
243json.init.loop
244+	json.ord2q[i] = '\u00' json.hex(i)
245	i = i + 1
246	le(i, 31)					:S(json.init.loop)
247	json.ord2q[ORD(json.bs)] = '\b'
248	json.ord2q[ORD(json.ht)] = '\t'
249	json.ord2q[ORD(json.lf)] = '\n'
250	json.ord2q[ORD(json.ff)] = '\f'
251	json.ord2q[ORD(json.cr)] = '\r'
252	json.ord2q[ORD('\')] = '\\'
253	json.ord2q[ORD('"')] = '\"'
254
255	json.escaped = '\"' json.ctrl
256	json.brk.esc.p0 = POS(0) BREAK(json.escaped)
257	json.any.esc.p0 = POS(0) ANY(json.escaped)
258	json.any.esc.rem = ANY(json.escaped) REM
259
260* helper for JSON_ENCODE
261* encode string, escaping TAB, CR, LF, \, " and ctrl chars
262	define("json.escape(s)x")			:(e.json.escape)
263* appends a marker, could be any quoted byte
264json.escape
265+	s = s json.nul
266json.escape.loop
267+	IDENT(s, json.nul)				:S(json.escape.done)
268	s json.any.esc.p0 . x =				:S(json.escape.escape)
269	s json.brk.esc.p0 . x json.any.esc.rem . s	:S(json.escape.append)
270	:(FRETURN)
271json.escape.escape
272+	x = json.ord2q[ORD(x)]
273	IDENT(x)					:S(FRETURN)
274json.escape.append
275+	json.escape = json.escape x			:(json.escape.loop)
276
277json.escape.done
278+	json.escape = '"' json.escape '"'		:(RETURN)
279e.json.escape
280
281****************
282	define("JSON_ENCODE(j)d,a,i,k,v,s")		:(e.JSON_ENCODE)
283JSON_ENCODE
284+	d = DATATYPE(j)
285	IDENT(d, 'STRING')				:S(json.encode.str)
286	IDENT(d, 'INTEGER')				:S(json.encode.num)
287	IDENT(d, 'REAL')				:S(json.encode.num)
288	IDENT(d, 'TABLE')				:S(json.encode.tab)
289	IDENT(d, 'ARRAY')				:S(json.encode.arr)
290	TERMINAL = 'json.encode: illegal datatype ' d
291+	:(FRETURN)
292
293* XXX quoting!!!
294json.encode.str
295+	JSON_ENCODE = json.escape(j)			:(RETURN)
296
297json.encode.num
298+	JSON_ENCODE = CONVERT(j, 'STRING')		:(RETURN)
299
300json.encode.tab
301+	a = convert(j, 'ARRAY')				:F(json.encode.tab.mt)
302	s = '{'
303	i = 1
304json.encode.tab.loop
305+	k = a[i,1]					:F(json.encode.tab.done)
306	v = a[i,2]					:F(json.encode.tab.done)
307	JSON_ENCODE = JSON_ENCODE s json.escape(k) ':' JSON_ENCODE(v)
308+		 					:F(FRETURN)
309	i = i + 1
310	s = ','						:(json.encode.tab.loop)
311json.encode.tab.mt
312+	JSON_ENCODE = '{'
313json.encode.tab.done
314+	JSON_ENCODE = JSON_ENCODE '}'
315+	:(RETURN)
316
317+
318json.encode.arr
319+	a = PROTOTYPE(j)
320	a pos(0) break(',') ','				:S(json.encode.arr.bad)
321	a pos(0) '0:'					:F(json.encode.arr.bad)
322	i = 0
323	s = '['
324json.encode.arr.loop
325+	a = j[i]					:F(json.encode.arr.done)
326	JSON_ENCODE = JSON_ENCODE s JSON_ENCODE(a)	:F(FRETURN)
327	i = i + 1
328	s = ','						:(json.encode.arr.loop)
329
330json.encode.arr.done
331+	JSON_ENCODE = JSON_ENCODE ']'
332+	:(RETURN)
333
334json.encode.arr.bad
335+	TERMINAL = 'json.encode: bad array prototype ' a
336+	:(FRETURN)
337
338e.JSON_ENCODE
339