1%% The MIT License 2 3%% Copyright (c) 2010-2013 Alisdair Sullivan <alisdairsullivan@yahoo.ca> 4 5%% Permission is hereby granted, free of charge, to any person obtaining a copy 6%% of this software and associated documentation files (the "Software"), to deal 7%% in the Software without restriction, including without limitation the rights 8%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9%% copies of the Software, and to permit persons to whom the Software is 10%% furnished to do so, subject to the following conditions: 11 12%% The above copyright notice and this permission notice shall be included in 13%% all copies or substantial portions of the Software. 14 15%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21%% THE SOFTWARE. 22 23 24-module(jsx_parser). 25 26-export([parser/3, resume/5]). 27-export([init/1, handle_event/2]). 28 29 30-spec parser(Handler::module(), State::any(), Config::list()) -> jsx:parser(). 31 32parser(Handler, State, Config) -> 33 fun(Tokens) -> value(Tokens, {Handler, Handler:init(State)}, [], jsx_config:parse_config(Config)) end. 34 35 36%% resume allows continuation from interrupted decoding without having to explicitly export 37%% all states 38-spec resume( 39 Rest::jsx:token(), 40 State::atom(), 41 Handler::{atom(), any()}, 42 Stack::list(atom()), 43 Config::jsx:config() 44 ) -> jsx:parser() | {incomplete, jsx:parser()}. 45 46resume(Rest, State, Handler, Stack, Config) -> 47 case State of 48 value -> value(Rest, Handler, Stack, Config); 49 object -> object(Rest, Handler, Stack, Config); 50 array -> array(Rest, Handler, Stack, Config); 51 maybe_done -> maybe_done(Rest, Handler, Stack, Config); 52 done -> done(Rest, Handler, Stack, Config) 53 end. 54 55 56-include("jsx_config.hrl"). 57 58 59%% error, incomplete and event macros 60-ifndef(error). 61-define(error(State, Terms, Handler, Stack, Config), 62 case Config#config.error_handler of 63 false -> erlang:error(badarg); 64 F -> F(Terms, {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)) 65 end 66 67). 68-endif. 69 70 71incomplete(State, Handler, Stack, Config=#config{stream=false}) -> 72 ?error(State, [], Handler, Stack, Config); 73incomplete(State, Handler, Stack, Config=#config{incomplete_handler=false}) -> 74 {incomplete, fun(End) when End == end_stream; End == end_json -> 75 case resume([end_json], State, Handler, Stack, Config) of 76 {incomplete, _} -> ?error(State, [], Handler, Stack, Config); 77 Else -> Else 78 end; 79 (Tokens) -> 80 resume(Tokens, State, Handler, Stack, Config) 81 end 82 }; 83incomplete(State, Handler, Stack, Config=#config{incomplete_handler=F}) -> 84 F([], {parser, State, Handler, Stack}, jsx_config:config_to_list(Config)). 85 86 87handle_event(Event, {Handler, State}, _Config) -> {Handler, Handler:handle_event(Event, State)}. 88 89 90value([String|Tokens], Handler, Stack, Config) when is_binary(String) -> 91 try clean_string(String, Config) of Clean -> 92 maybe_done(Tokens, handle_event({string, Clean}, Handler, Config), Stack, Config) 93 catch error:badarg -> 94 ?error(value, [{string, String}|Tokens], Handler, Stack, Config) 95 end; 96value([true|Tokens], Handler, Stack, Config) -> 97 maybe_done(Tokens, handle_event({literal, true}, Handler, Config), Stack, Config); 98value([false|Tokens], Handler, Stack, Config) -> 99 maybe_done(Tokens, handle_event({literal, false}, Handler, Config), Stack, Config); 100value([null|Tokens], Handler, Stack, Config) -> 101 maybe_done(Tokens, handle_event({literal, null}, Handler, Config), Stack, Config); 102value([start_object|Tokens], Handler, Stack, Config) -> 103 object(Tokens, handle_event(start_object, Handler, Config), [object|Stack], Config); 104value([start_array|Tokens], Handler, Stack, Config) -> 105 array(Tokens, handle_event(start_array, Handler, Config), [array|Stack], Config); 106value([Number|Tokens], Handler, Stack, Config) when is_integer(Number) -> 107 maybe_done(Tokens, handle_event({integer, Number}, Handler, Config), Stack, Config); 108value([Number|Tokens], Handler, Stack, Config) when is_float(Number) -> 109 maybe_done(Tokens, handle_event({float, Number}, Handler, Config), Stack, Config); 110value([{raw, Raw}|Tokens], Handler, Stack, Config) when is_binary(Raw) -> 111 value((jsx:decoder(?MODULE, [], []))(Raw) ++ Tokens, Handler, Stack, Config); 112value([{_,_,_}=Timestamp|Tokens], Handler, Stack, Config) -> 113 {{Year, Month, Day}, {Hour, Min, Sec}} = calendar:now_to_datetime( 114 Timestamp), 115 value([{string, unicode:characters_to_binary(io_lib:format( 116 "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0BZ", 117 [Year, Month, Day, Hour, Min, Sec] 118 ))}|Tokens], 119 Handler, 120 Stack, 121 Config 122 ); 123value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config) 124when is_integer(Year), is_integer(Month), is_integer(Day), is_integer(Hour), is_integer(Min), is_integer(Sec) -> 125 value([{string, unicode:characters_to_binary(io_lib:format( 126 "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~2.10.0BZ", 127 [Year, Month, Day, Hour, Min, Sec] 128 ))}|Tokens], 129 Handler, 130 Stack, 131 Config 132 ); 133value([{{Year, Month, Day}, {Hour, Min, Sec}}|Tokens], Handler, Stack, Config) 134when is_integer(Year), is_integer(Month), is_integer(Day), is_integer(Hour), is_integer(Min), is_float(Sec) -> 135 value([{string, unicode:characters_to_binary(io_lib:format( 136 "~4.10.0B-~2.10.0B-~2.10.0BT~2.10.0B:~2.10.0B:~9.6.0fZ", 137 [Year, Month, Day, Hour, Min, Sec] 138 ))}|Tokens], 139 Handler, 140 Stack, 141 Config 142 ); 143value([{literal, Value}|Tokens], Handler, Stack, Config) 144when Value == true; Value == false; Value == null -> 145 value([Value] ++ Tokens, Handler, Stack, Config); 146value([{integer, Value}|Tokens], Handler, Stack, Config) 147when is_integer(Value) -> 148 value([Value] ++ Tokens, Handler, Stack, Config); 149value([{float, Value}|Tokens], Handler, Stack, Config) 150when is_float(Value) -> 151 value([Value] ++ Tokens, Handler, Stack, Config); 152value([{string, Value}|Tokens], Handler, Stack, Config) 153when is_binary(Value); is_atom(Value) -> 154 value([Value] ++ Tokens, Handler, Stack, Config); 155value([{number, Value}|Tokens], Handler, Stack, Config) 156when is_float(Value); is_integer(Value) -> 157 value([Value] ++ Tokens, Handler, Stack, Config); 158value([String|Tokens], Handler, Stack, Config) when is_atom(String) -> 159 value([{string, atom_to_binary(String, utf8)}] ++ Tokens, Handler, Stack, Config); 160value([], Handler, Stack, Config) -> 161 incomplete(value, Handler, Stack, Config); 162value(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> 163 ?error(value, BadTokens, Handler, Stack, Config); 164value(Token, Handler, Stack, Config) -> 165 value([Token], Handler, Stack, Config). 166 167 168object([end_object|Tokens], Handler, [object|Stack], Config) -> 169 maybe_done(Tokens, handle_event(end_object, Handler, Config), Stack, Config); 170object([{key, Key}|Tokens], Handler, Stack, Config) 171when is_atom(Key); is_binary(Key); is_integer(Key) -> 172 object([Key|Tokens], Handler, Stack, Config); 173object([Key|Tokens], Handler, [object|Stack], Config) 174when is_atom(Key); is_binary(Key); is_integer(Key) -> 175 try clean_string(fix_key(Key), Config) 176 of K -> 177 value( 178 Tokens, 179 handle_event({key, K}, Handler, Config), 180 [object|Stack], 181 Config 182 ) 183 catch error:badarg -> 184 ?error(object, [{string, Key}|Tokens], Handler, Stack, Config) 185 end; 186object([], Handler, Stack, Config) -> 187 incomplete(object, Handler, Stack, Config); 188object(Token, Handler, Stack, Config) -> 189 object([Token], Handler, Stack, Config). 190 191 192array([end_array|Tokens], Handler, [array|Stack], Config) -> 193 maybe_done(Tokens, handle_event(end_array, Handler, Config), Stack, Config); 194array([], Handler, Stack, Config) -> 195 incomplete(array, Handler, Stack, Config); 196array(Tokens, Handler, Stack, Config) when is_list(Tokens) -> 197 value(Tokens, Handler, Stack, Config); 198array(Token, Handler, Stack, Config) -> 199 array([Token], Handler, Stack, Config). 200 201 202maybe_done([end_json], Handler, [], Config) -> 203 done([end_json], Handler, [], Config); 204maybe_done(Tokens, Handler, [object|_] = Stack, Config) when is_list(Tokens) -> 205 object(Tokens, Handler, Stack, Config); 206maybe_done(Tokens, Handler, [array|_] = Stack, Config) when is_list(Tokens) -> 207 array(Tokens, Handler, Stack, Config); 208maybe_done([], Handler, Stack, Config) -> 209 incomplete(maybe_done, Handler, Stack, Config); 210maybe_done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> 211 ?error(maybe_done, BadTokens, Handler, Stack, Config); 212maybe_done(Token, Handler, Stack, Config) -> 213 maybe_done([Token], Handler, Stack, Config). 214 215 216done([], Handler, [], Config=#config{stream=true}) -> 217 incomplete(done, Handler, [], Config); 218done(Tokens, Handler, [], Config) when Tokens == [end_json]; Tokens == [] -> 219 {_, State} = handle_event(end_json, Handler, Config), 220 State; 221done(BadTokens, Handler, Stack, Config) when is_list(BadTokens) -> 222 ?error(done, BadTokens, Handler, Stack, Config); 223done(Token, Handler, Stack, Config) -> 224 done([Token], Handler, Stack, Config). 225 226 227fix_key(Key) when is_atom(Key) -> atom_to_binary(Key, utf8); 228fix_key(Key) when is_integer(Key) -> list_to_binary(integer_to_list(Key)); 229fix_key(Key) when is_binary(Key) -> Key. 230 231 232clean_string(Bin, #config{dirty_strings=true}) -> Bin; 233clean_string(Bin, Config) -> clean(Bin, [], Config). 234 235 236%% unroll the control characters 237clean(<<0, Rest/binary>>, Acc, Config) -> 238 clean(Rest, [Acc, maybe_replace(0, Config)], Config); 239clean(<<1, Rest/binary>>, Acc, Config) -> 240 clean(Rest, [Acc, maybe_replace(1, Config)], Config); 241clean(<<2, Rest/binary>>, Acc, Config) -> 242 clean(Rest, [Acc, maybe_replace(2, Config)], Config); 243clean(<<3, Rest/binary>>, Acc, Config) -> 244 clean(Rest, [Acc, maybe_replace(3, Config)], Config); 245clean(<<4, Rest/binary>>, Acc, Config) -> 246 clean(Rest, [Acc, maybe_replace(4, Config)], Config); 247clean(<<5, Rest/binary>>, Acc, Config) -> 248 clean(Rest, [Acc, maybe_replace(5, Config)], Config); 249clean(<<6, Rest/binary>>, Acc, Config) -> 250 clean(Rest, [Acc, maybe_replace(6, Config)], Config); 251clean(<<7, Rest/binary>>, Acc, Config) -> 252 clean(Rest, [Acc, maybe_replace(7, Config)], Config); 253clean(<<8, Rest/binary>>, Acc, Config) -> 254 clean(Rest, [Acc, maybe_replace(8, Config)], Config); 255clean(<<9, Rest/binary>>, Acc, Config) -> 256 clean(Rest, [Acc, maybe_replace(9, Config)], Config); 257clean(<<10, Rest/binary>>, Acc, Config) -> 258 clean(Rest, [Acc, maybe_replace(10, Config)], Config); 259clean(<<11, Rest/binary>>, Acc, Config) -> 260 clean(Rest, [Acc, maybe_replace(11, Config)], Config); 261clean(<<12, Rest/binary>>, Acc, Config) -> 262 clean(Rest, [Acc, maybe_replace(12, Config)], Config); 263clean(<<13, Rest/binary>>, Acc, Config) -> 264 clean(Rest, [Acc, maybe_replace(13, Config)], Config); 265clean(<<14, Rest/binary>>, Acc, Config) -> 266 clean(Rest, [Acc, maybe_replace(14, Config)], Config); 267clean(<<15, Rest/binary>>, Acc, Config) -> 268 clean(Rest, [Acc, maybe_replace(15, Config)], Config); 269clean(<<16, Rest/binary>>, Acc, Config) -> 270 clean(Rest, [Acc, maybe_replace(16, Config)], Config); 271clean(<<17, Rest/binary>>, Acc, Config) -> 272 clean(Rest, [Acc, maybe_replace(17, Config)], Config); 273clean(<<18, Rest/binary>>, Acc, Config) -> 274 clean(Rest, [Acc, maybe_replace(18, Config)], Config); 275clean(<<19, Rest/binary>>, Acc, Config) -> 276 clean(Rest, [Acc, maybe_replace(19, Config)], Config); 277clean(<<20, Rest/binary>>, Acc, Config) -> 278 clean(Rest, [Acc, maybe_replace(20, Config)], Config); 279clean(<<21, Rest/binary>>, Acc, Config) -> 280 clean(Rest, [Acc, maybe_replace(21, Config)], Config); 281clean(<<22, Rest/binary>>, Acc, Config) -> 282 clean(Rest, [Acc, maybe_replace(22, Config)], Config); 283clean(<<23, Rest/binary>>, Acc, Config) -> 284 clean(Rest, [Acc, maybe_replace(23, Config)], Config); 285clean(<<24, Rest/binary>>, Acc, Config) -> 286 clean(Rest, [Acc, maybe_replace(24, Config)], Config); 287clean(<<25, Rest/binary>>, Acc, Config) -> 288 clean(Rest, [Acc, maybe_replace(25, Config)], Config); 289clean(<<26, Rest/binary>>, Acc, Config) -> 290 clean(Rest, [Acc, maybe_replace(26, Config)], Config); 291clean(<<27, Rest/binary>>, Acc, Config) -> 292 clean(Rest, [Acc, maybe_replace(27, Config)], Config); 293clean(<<28, Rest/binary>>, Acc, Config) -> 294 clean(Rest, [Acc, maybe_replace(28, Config)], Config); 295clean(<<29, Rest/binary>>, Acc, Config) -> 296 clean(Rest, [Acc, maybe_replace(29, Config)], Config); 297clean(<<30, Rest/binary>>, Acc, Config) -> 298 clean(Rest, [Acc, maybe_replace(30, Config)], Config); 299clean(<<31, Rest/binary>>, Acc, Config) -> 300 clean(Rest, [Acc, maybe_replace(31, Config)], Config); 301clean(<<34, Rest/binary>>, Acc, Config) -> 302 clean(Rest, [Acc, maybe_replace(34, Config)], Config); 303clean(<<47, Rest/binary>>, Acc, Config) -> 304 clean(Rest, [Acc, maybe_replace(47, Config)], Config); 305clean(<<92, Rest/binary>>, Acc, Config) -> 306 clean(Rest, [Acc, maybe_replace(92, Config)], Config); 307clean(<<X/utf8, Rest/binary>> = Bin, Acc, Config=#config{uescape=true}) -> 308 case X of 309 X when X < 16#80 -> start_count(Bin, Acc, Config); 310 _ -> clean(Rest, [Acc, json_escape_sequence(X)], Config) 311 end; 312%% u+2028 313clean(<<226, 128, 168, Rest/binary>>, Acc, Config) -> 314 clean(Rest, [Acc, maybe_replace(16#2028, Config)], Config); 315%% u+2029 316clean(<<226, 128, 169, Rest/binary>>, Acc, Config) -> 317 clean(Rest, [Acc, maybe_replace(16#2029, Config)], Config); 318clean(<<_/utf8, _/binary>> = Bin, Acc, Config) -> start_count(Bin, Acc, Config); 319%% surrogates 320clean(<<237, X, _, Rest/binary>>, Acc, Config) when X >= 160 -> 321 clean(Rest, [Acc, maybe_replace(surrogate, Config)], Config); 322%% overlong encodings and missing continuations of a 2 byte sequence 323clean(<<X, Rest/binary>>, Acc, Config) when X >= 192, X =< 223 -> 324 clean(strip_continuations(Rest, 1), [Acc, maybe_replace(badutf, Config)], Config); 325%% overlong encodings and missing continuations of a 3 byte sequence 326clean(<<X, Rest/binary>>, Acc, Config) when X >= 224, X =< 239 -> 327 clean(strip_continuations(Rest, 2), [Acc, maybe_replace(badutf, Config)], Config); 328%% overlong encodings and missing continuations of a 4 byte sequence 329clean(<<X, Rest/binary>>, Acc, Config) when X >= 240, X =< 247 -> 330 clean(strip_continuations(Rest, 3), [Acc, maybe_replace(badutf, Config)], Config); 331clean(<<_, Rest/binary>>, Acc, Config) -> 332 clean(Rest, [Acc, maybe_replace(badutf, Config)], Config); 333clean(<<>>, Acc, _) -> iolist_to_binary(Acc). 334 335 336start_count(Bin, Acc, Config) -> 337 Size = count(Bin, 0, Config), 338 <<Clean:Size/binary, Rest/binary>> = Bin, 339 clean(Rest, [Acc, Clean], Config). 340 341 342%% again, unrolling ascii makes a huge difference. sadly 343count(<<0, _/binary>>, N, _) -> N; 344count(<<1, _/binary>>, N, _) -> N; 345count(<<2, _/binary>>, N, _) -> N; 346count(<<3, _/binary>>, N, _) -> N; 347count(<<4, _/binary>>, N, _) -> N; 348count(<<5, _/binary>>, N, _) -> N; 349count(<<6, _/binary>>, N, _) -> N; 350count(<<7, _/binary>>, N, _) -> N; 351count(<<8, _/binary>>, N, _) -> N; 352count(<<9, _/binary>>, N, _) -> N; 353count(<<10, _/binary>>, N, _) -> N; 354count(<<11, _/binary>>, N, _) -> N; 355count(<<12, _/binary>>, N, _) -> N; 356count(<<13, _/binary>>, N, _) -> N; 357count(<<14, _/binary>>, N, _) -> N; 358count(<<15, _/binary>>, N, _) -> N; 359count(<<16, _/binary>>, N, _) -> N; 360count(<<17, _/binary>>, N, _) -> N; 361count(<<18, _/binary>>, N, _) -> N; 362count(<<19, _/binary>>, N, _) -> N; 363count(<<20, _/binary>>, N, _) -> N; 364count(<<21, _/binary>>, N, _) -> N; 365count(<<22, _/binary>>, N, _) -> N; 366count(<<23, _/binary>>, N, _) -> N; 367count(<<24, _/binary>>, N, _) -> N; 368count(<<25, _/binary>>, N, _) -> N; 369count(<<26, _/binary>>, N, _) -> N; 370count(<<27, _/binary>>, N, _) -> N; 371count(<<28, _/binary>>, N, _) -> N; 372count(<<29, _/binary>>, N, _) -> N; 373count(<<30, _/binary>>, N, _) -> N; 374count(<<31, _/binary>>, N, _) -> N; 375count(<<32, Rest/binary>>, N, Config) -> 376 count(Rest, N + 1, Config); 377count(<<33, Rest/binary>>, N, Config) -> 378 count(Rest, N + 1, Config); 379count(<<34, _/binary>>, N, _) -> N; 380count(<<35, Rest/binary>>, N, Config) -> 381 count(Rest, N + 1, Config); 382count(<<36, Rest/binary>>, N, Config) -> 383 count(Rest, N + 1, Config); 384count(<<37, Rest/binary>>, N, Config) -> 385 count(Rest, N + 1, Config); 386count(<<38, Rest/binary>>, N, Config) -> 387 count(Rest, N + 1, Config); 388count(<<39, Rest/binary>>, N, Config) -> 389 count(Rest, N + 1, Config); 390count(<<40, Rest/binary>>, N, Config) -> 391 count(Rest, N + 1, Config); 392count(<<41, Rest/binary>>, N, Config) -> 393 count(Rest, N + 1, Config); 394count(<<42, Rest/binary>>, N, Config) -> 395 count(Rest, N + 1, Config); 396count(<<43, Rest/binary>>, N, Config) -> 397 count(Rest, N + 1, Config); 398count(<<44, Rest/binary>>, N, Config) -> 399 count(Rest, N + 1, Config); 400count(<<45, Rest/binary>>, N, Config) -> 401 count(Rest, N + 1, Config); 402count(<<46, Rest/binary>>, N, Config) -> 403 count(Rest, N + 1, Config); 404count(<<47, _/binary>>, N, _) -> N; 405count(<<48, Rest/binary>>, N, Config) -> 406 count(Rest, N + 1, Config); 407count(<<49, Rest/binary>>, N, Config) -> 408 count(Rest, N + 1, Config); 409count(<<50, Rest/binary>>, N, Config) -> 410 count(Rest, N + 1, Config); 411count(<<51, Rest/binary>>, N, Config) -> 412 count(Rest, N + 1, Config); 413count(<<52, Rest/binary>>, N, Config) -> 414 count(Rest, N + 1, Config); 415count(<<53, Rest/binary>>, N, Config) -> 416 count(Rest, N + 1, Config); 417count(<<54, Rest/binary>>, N, Config) -> 418 count(Rest, N + 1, Config); 419count(<<55, Rest/binary>>, N, Config) -> 420 count(Rest, N + 1, Config); 421count(<<56, Rest/binary>>, N, Config) -> 422 count(Rest, N + 1, Config); 423count(<<57, Rest/binary>>, N, Config) -> 424 count(Rest, N + 1, Config); 425count(<<58, Rest/binary>>, N, Config) -> 426 count(Rest, N + 1, Config); 427count(<<59, Rest/binary>>, N, Config) -> 428 count(Rest, N + 1, Config); 429count(<<60, Rest/binary>>, N, Config) -> 430 count(Rest, N + 1, Config); 431count(<<61, Rest/binary>>, N, Config) -> 432 count(Rest, N + 1, Config); 433count(<<62, Rest/binary>>, N, Config) -> 434 count(Rest, N + 1, Config); 435count(<<63, Rest/binary>>, N, Config) -> 436 count(Rest, N + 1, Config); 437count(<<64, Rest/binary>>, N, Config) -> 438 count(Rest, N + 1, Config); 439count(<<65, Rest/binary>>, N, Config) -> 440 count(Rest, N + 1, Config); 441count(<<66, Rest/binary>>, N, Config) -> 442 count(Rest, N + 1, Config); 443count(<<67, Rest/binary>>, N, Config) -> 444 count(Rest, N + 1, Config); 445count(<<68, Rest/binary>>, N, Config) -> 446 count(Rest, N + 1, Config); 447count(<<69, Rest/binary>>, N, Config) -> 448 count(Rest, N + 1, Config); 449count(<<70, Rest/binary>>, N, Config) -> 450 count(Rest, N + 1, Config); 451count(<<71, Rest/binary>>, N, Config) -> 452 count(Rest, N + 1, Config); 453count(<<72, Rest/binary>>, N, Config) -> 454 count(Rest, N + 1, Config); 455count(<<73, Rest/binary>>, N, Config) -> 456 count(Rest, N + 1, Config); 457count(<<74, Rest/binary>>, N, Config) -> 458 count(Rest, N + 1, Config); 459count(<<75, Rest/binary>>, N, Config) -> 460 count(Rest, N + 1, Config); 461count(<<76, Rest/binary>>, N, Config) -> 462 count(Rest, N + 1, Config); 463count(<<77, Rest/binary>>, N, Config) -> 464 count(Rest, N + 1, Config); 465count(<<78, Rest/binary>>, N, Config) -> 466 count(Rest, N + 1, Config); 467count(<<79, Rest/binary>>, N, Config) -> 468 count(Rest, N + 1, Config); 469count(<<80, Rest/binary>>, N, Config) -> 470 count(Rest, N + 1, Config); 471count(<<81, Rest/binary>>, N, Config) -> 472 count(Rest, N + 1, Config); 473count(<<82, Rest/binary>>, N, Config) -> 474 count(Rest, N + 1, Config); 475count(<<83, Rest/binary>>, N, Config) -> 476 count(Rest, N + 1, Config); 477count(<<84, Rest/binary>>, N, Config) -> 478 count(Rest, N + 1, Config); 479count(<<85, Rest/binary>>, N, Config) -> 480 count(Rest, N + 1, Config); 481count(<<86, Rest/binary>>, N, Config) -> 482 count(Rest, N + 1, Config); 483count(<<87, Rest/binary>>, N, Config) -> 484 count(Rest, N + 1, Config); 485count(<<88, Rest/binary>>, N, Config) -> 486 count(Rest, N + 1, Config); 487count(<<89, Rest/binary>>, N, Config) -> 488 count(Rest, N + 1, Config); 489count(<<90, Rest/binary>>, N, Config) -> 490 count(Rest, N + 1, Config); 491count(<<91, Rest/binary>>, N, Config) -> 492 count(Rest, N + 1, Config); 493count(<<92, _/binary>>, N, _) -> N; 494count(<<93, Rest/binary>>, N, Config) -> 495 count(Rest, N + 1, Config); 496count(<<94, Rest/binary>>, N, Config) -> 497 count(Rest, N + 1, Config); 498count(<<95, Rest/binary>>, N, Config) -> 499 count(Rest, N + 1, Config); 500count(<<96, Rest/binary>>, N, Config) -> 501 count(Rest, N + 1, Config); 502count(<<97, Rest/binary>>, N, Config) -> 503 count(Rest, N + 1, Config); 504count(<<98, Rest/binary>>, N, Config) -> 505 count(Rest, N + 1, Config); 506count(<<99, Rest/binary>>, N, Config) -> 507 count(Rest, N + 1, Config); 508count(<<100, Rest/binary>>, N, Config) -> 509 count(Rest, N + 1, Config); 510count(<<101, Rest/binary>>, N, Config) -> 511 count(Rest, N + 1, Config); 512count(<<102, Rest/binary>>, N, Config) -> 513 count(Rest, N + 1, Config); 514count(<<103, Rest/binary>>, N, Config) -> 515 count(Rest, N + 1, Config); 516count(<<104, Rest/binary>>, N, Config) -> 517 count(Rest, N + 1, Config); 518count(<<105, Rest/binary>>, N, Config) -> 519 count(Rest, N + 1, Config); 520count(<<106, Rest/binary>>, N, Config) -> 521 count(Rest, N + 1, Config); 522count(<<107, Rest/binary>>, N, Config) -> 523 count(Rest, N + 1, Config); 524count(<<108, Rest/binary>>, N, Config) -> 525 count(Rest, N + 1, Config); 526count(<<109, Rest/binary>>, N, Config) -> 527 count(Rest, N + 1, Config); 528count(<<110, Rest/binary>>, N, Config) -> 529 count(Rest, N + 1, Config); 530count(<<111, Rest/binary>>, N, Config) -> 531 count(Rest, N + 1, Config); 532count(<<112, Rest/binary>>, N, Config) -> 533 count(Rest, N + 1, Config); 534count(<<113, Rest/binary>>, N, Config) -> 535 count(Rest, N + 1, Config); 536count(<<114, Rest/binary>>, N, Config) -> 537 count(Rest, N + 1, Config); 538count(<<115, Rest/binary>>, N, Config) -> 539 count(Rest, N + 1, Config); 540count(<<116, Rest/binary>>, N, Config) -> 541 count(Rest, N + 1, Config); 542count(<<117, Rest/binary>>, N, Config) -> 543 count(Rest, N + 1, Config); 544count(<<118, Rest/binary>>, N, Config) -> 545 count(Rest, N + 1, Config); 546count(<<119, Rest/binary>>, N, Config) -> 547 count(Rest, N + 1, Config); 548count(<<120, Rest/binary>>, N, Config) -> 549 count(Rest, N + 1, Config); 550count(<<121, Rest/binary>>, N, Config) -> 551 count(Rest, N + 1, Config); 552count(<<122, Rest/binary>>, N, Config) -> 553 count(Rest, N + 1, Config); 554count(<<123, Rest/binary>>, N, Config) -> 555 count(Rest, N + 1, Config); 556count(<<124, Rest/binary>>, N, Config) -> 557 count(Rest, N + 1, Config); 558count(<<125, Rest/binary>>, N, Config) -> 559 count(Rest, N + 1, Config); 560count(<<126, Rest/binary>>, N, Config) -> 561 count(Rest, N + 1, Config); 562count(<<127, Rest/binary>>, N, Config) -> 563 count(Rest, N + 1, Config); 564count(<<_/utf8, _/binary>>, N, #config{uescape=true}) -> N; 565count(<<X/utf8, Rest/binary>>, N, Config) -> 566 case X of 567 X when X < 16#800 -> count(Rest, N + 2, Config); 568 16#2028 -> N; 569 16#2029 -> N; 570 X when X < 16#10000 -> count(Rest, N + 3, Config); 571 _ -> count(Rest, N + 4, Config) 572 end; 573count(<<_, _/binary>>, N, _) -> N; 574count(<<>>, N, _) -> N. 575 576 577strip_continuations(Bin, 0) -> Bin; 578strip_continuations(<<X, Rest/binary>>, N) when X >= 128, X =< 191 -> 579 strip_continuations(Rest, N - 1); 580%% not a continuation byte 581strip_continuations(Bin, _) -> Bin. 582 583 584maybe_replace($\b, #config{escaped_strings=true}) -> <<$\\, $b>>; 585maybe_replace($\t, #config{escaped_strings=true}) -> <<$\\, $t>>; 586maybe_replace($\n, #config{escaped_strings=true}) -> <<$\\, $n>>; 587maybe_replace($\f, #config{escaped_strings=true}) -> <<$\\, $f>>; 588maybe_replace($\r, #config{escaped_strings=true}) -> <<$\\, $r>>; 589maybe_replace($\", #config{escaped_strings=true}) -> <<$\\, $\">>; 590maybe_replace($/, Config=#config{escaped_strings=true}) -> 591 case Config#config.escaped_forward_slashes of 592 true -> <<$\\, $/>>; 593 false -> <<$/>> 594 end; 595maybe_replace($\\, #config{escaped_strings=true}) -> <<$\\, $\\>>; 596maybe_replace(X, #config{escaped_strings=true}) when X < 32 -> 597 json_escape_sequence(X); 598maybe_replace(X, Config=#config{escaped_strings=true}) when X == 16#2028; X == 16#2029 -> 599 case Config#config.unescaped_jsonp of 600 true -> <<X/utf8>>; 601 false -> json_escape_sequence(X) 602 end; 603maybe_replace(Atom, #config{strict_utf8=true}) when is_atom(Atom) -> 604 erlang:error(badarg); 605maybe_replace(surrogate, _Config) -> 606 <<16#fffd/utf8>>; 607maybe_replace(badutf, _Config) -> 608 <<16#fffd/utf8>>; 609maybe_replace(X, _Config) -> 610 <<X/utf8>>. 611 612 613%% convert a codepoint to it's \uXXXX equiv. 614json_escape_sequence(X) when X < 65536 -> 615 <<A:4, B:4, C:4, D:4>> = <<X:16>>, 616 <<$\\, $u, (to_hex(A)), (to_hex(B)), (to_hex(C)), (to_hex(D))>>; 617json_escape_sequence(X) -> 618 Adjusted = X - 16#10000, 619 <<A:10, B:10>> = <<Adjusted:20>>, 620 [json_escape_sequence(A + 16#d800), json_escape_sequence(B + 16#dc00)]. 621 622 623to_hex(10) -> $a; 624to_hex(11) -> $b; 625to_hex(12) -> $c; 626to_hex(13) -> $d; 627to_hex(14) -> $e; 628to_hex(15) -> $f; 629to_hex(X) -> X + 48. %% ascii "1" is [49], "2" is [50], etc... 630 631 632%% for raw input 633-spec init(proplists:proplist()) -> list(). 634 635init([]) -> []. 636 637 638-spec handle_event(Event::any(), Acc::list()) -> list(). 639 640handle_event(end_json, State) -> lists:reverse(State); 641handle_event(Event, State) -> [Event] ++ State. 642 643 644 645-ifdef(TEST). 646-include_lib("eunit/include/eunit.hrl"). 647 648 649parse(Events, Config) -> value(Events, {jsx, []}, [], jsx_config:parse_config(Config)). 650 651 652error_test_() -> 653 [ 654 {"value error", ?_assertError(badarg, parse([self()], []))}, 655 {"maybe_done error", ?_assertError(badarg, parse([start_array, end_array, start_array, end_json], []))}, 656 {"done error", ?_assertError(badarg, parse([{string, <<"">>}, {literal, true}, end_json], []))}, 657 {"string error", ?_assertError(badarg, parse([{string, <<237, 160, 128>>}, end_json], [strict]))} 658 ]. 659 660 661custom_error_handler_test_() -> 662 Error = fun(Rest, {_, State, _, _}, _) -> {State, Rest} end, 663 [ 664 {"value error", ?_assertEqual( 665 {value, [self()]}, 666 parse([self()], [{error_handler, Error}]) 667 )}, 668 {"maybe_done error", ?_assertEqual( 669 {maybe_done, [start_array, end_json]}, 670 parse([start_array, end_array, start_array, end_json], [{error_handler, Error}]) 671 )}, 672 {"done error", ?_assertEqual( 673 {maybe_done, [{literal, true}, end_json]}, 674 parse([{string, <<"">>}, {literal, true}, end_json], [{error_handler, Error}]) 675 )}, 676 {"string error", ?_assertEqual( 677 {value, [{string, <<237, 160, 128>>}, end_json]}, 678 parse([{string, <<237, 160, 128>>}, end_json], [{error_handler, Error}, strict]) 679 )} 680 ]. 681 682 683incomplete_test_() -> 684 Cases = [ 685 {"incomplete value", []}, 686 {"incomplete object", [start_object]}, 687 {"incomplete array", [start_array]}, 688 {"incomplete maybe_done", [start_array, end_array]} 689 ], 690 [{Title, ?_assertError(badarg, parse(Events, []))} 691 || {Title, Events} <- Cases 692 ]. 693 694 695custom_incomplete_handler_test_() -> 696 [ 697 {"custom incomplete handler", ?_assertError( 698 badarg, 699 parse([], [{incomplete_handler, fun(_, _, _) -> erlang:error(badarg) end}]) 700 )} 701 ]. 702 703 704raw_test_() -> 705 Parse = fun(Events, Config) -> (parser(?MODULE, [], Config))(Events ++ [end_json]) end, 706 [ 707 {"raw empty list", ?_assertEqual( 708 [start_array, end_array], 709 Parse([{raw, <<"[]">>}], []) 710 )}, 711 {"raw empty object", ?_assertEqual( 712 [start_object, end_object], 713 Parse([{raw, <<"{}">>}], []) 714 )}, 715 {"raw chunk inside stream", ?_assertEqual( 716 [start_object, {key, <<"key">>}, start_array, {literal, true}, end_array, end_object], 717 Parse([start_object, {key, <<"key">>}, {raw, <<"[true]">>}, end_object], []) 718 )} 719 ]. 720 721 722%% erlang refuses to encode certain codepoints, so fake them 723to_fake_utf8(N) when N < 16#0080 -> <<N:8>>; 724to_fake_utf8(N) when N < 16#0800 -> 725 <<0:5, Y:5, X:6>> = <<N:16>>, 726 <<2#110:3, Y:5, 2#10:2, X:6>>; 727to_fake_utf8(N) when N < 16#10000 -> 728 <<Z:4, Y:6, X:6>> = <<N:16>>, 729 <<2#1110:4, Z:4, 2#10:2, Y:6, 2#10:2, X:6>>; 730to_fake_utf8(N) -> 731 <<0:3, W:3, Z:6, Y:6, X:6>> = <<N:24>>, 732 <<2#11110:5, W:3, 2#10:2, Z:6, 2#10:2, Y:6, 2#10:2, X:6>>. 733 734 735codepoints() -> 736 unicode:characters_to_binary( 737 [32, 33] 738 ++ lists:seq(35, 46) 739 ++ lists:seq(48, 91) 740 ++ lists:seq(93, 16#2027) 741 ++ lists:seq(16#202a, 16#d7ff) 742 ++ lists:seq(16#e000, 16#ffff) 743 ). 744 745 746extended_codepoints() -> 747 unicode:characters_to_binary( 748 lists:seq(16#10000, 16#1ffff) ++ [ 749 16#20000, 16#30000, 16#40000, 16#50000, 16#60000, 750 16#70000, 16#80000, 16#90000, 16#a0000, 16#b0000, 751 16#c0000, 16#d0000, 16#e0000, 16#f0000, 16#100000 752 ] 753 ). 754 755 756surrogates() -> [ to_fake_utf8(N) || N <- lists:seq(16#d800, 16#dfff) ]. 757 758 759clean_string_helper(String) -> 760 try clean_string(String, #config{strict_utf8=true}) of Clean -> Clean 761 catch error:badarg -> {error, badarg} 762 end. 763 764 765clean_string_test_() -> 766 [ 767 {"clean codepoints", ?_assertEqual( 768 codepoints(), 769 clean_string(codepoints(), #config{}) 770 )}, 771 {"clean extended codepoints", ?_assertEqual( 772 extended_codepoints(), 773 clean_string(extended_codepoints(), #config{}) 774 )}, 775 {"escape path codepoints", ?_assertEqual( 776 codepoints(), 777 clean_string(codepoints(), #config{escaped_strings=true}) 778 )}, 779 {"escape path extended codepoints", ?_assertEqual( 780 extended_codepoints(), 781 clean_string(extended_codepoints(), #config{escaped_strings=true}) 782 )}, 783 {"error surrogates", ?_assertEqual( 784 lists:duplicate(length(surrogates()), {error, badarg}), 785 lists:map(fun(Codepoint) -> clean_string_helper(Codepoint) end, surrogates()) 786 )}, 787 {"clean surrogates", ?_assertEqual( 788 lists:duplicate(length(surrogates()), <<16#fffd/utf8>>), 789 lists:map(fun(Codepoint) -> clean_string(Codepoint, #config{}) end, surrogates()) 790 )} 791 ]. 792 793 794escape_test_() -> 795 [ 796 {"maybe_escape backspace", ?_assertEqual( 797 <<"\\b">>, 798 clean_string(<<16#0008/utf8>>, #config{escaped_strings=true}) 799 )}, 800 {"don't escape backspace", ?_assertEqual( 801 <<"\b">>, 802 clean_string(<<16#0008/utf8>>, #config{}) 803 )}, 804 {"maybe_escape tab", ?_assertEqual( 805 <<"\\t">>, 806 clean_string(<<16#0009/utf8>>, #config{escaped_strings=true}) 807 )}, 808 {"maybe_escape newline", ?_assertEqual( 809 <<"\\n">>, 810 clean_string(<<16#000a/utf8>>, #config{escaped_strings=true}) 811 )}, 812 {"maybe_escape formfeed", ?_assertEqual( 813 <<"\\f">>, 814 clean_string(<<16#000c/utf8>>, #config{escaped_strings=true}) 815 )}, 816 {"maybe_escape carriage return", ?_assertEqual( 817 <<"\\r">>, 818 clean_string(<<16#000d/utf8>>, #config{escaped_strings=true}) 819 )}, 820 {"maybe_escape quote", ?_assertEqual( 821 <<"\\\"">>, 822 clean_string(<<16#0022/utf8>>, #config{escaped_strings=true}) 823 )}, 824 {"maybe_escape forward slash", ?_assertEqual( 825 <<"\\/">>, 826 clean_string(<<16#002f/utf8>>, #config{escaped_strings=true, escaped_forward_slashes=true}) 827 )}, 828 {"do not maybe_escape forward slash", ?_assertEqual( 829 <<"/">>, 830 clean_string(<<16#002f/utf8>>, #config{escaped_strings=true}) 831 )}, 832 {"maybe_escape backslash", ?_assertEqual( 833 <<"\\\\">>, 834 clean_string(<<16#005c/utf8>>, #config{escaped_strings=true}) 835 )}, 836 {"maybe_escape jsonp (u2028)", ?_assertEqual( 837 <<"\\u2028">>, 838 clean_string(<<16#2028/utf8>>, #config{escaped_strings=true}) 839 )}, 840 {"do not maybe_escape jsonp (u2028)", ?_assertEqual( 841 <<16#2028/utf8>>, 842 clean_string(<<16#2028/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) 843 )}, 844 {"maybe_escape jsonp (u2029)", ?_assertEqual( 845 <<"\\u2029">>, 846 clean_string(<<16#2029/utf8>>, #config{escaped_strings=true}) 847 )}, 848 {"do not maybe_escape jsonp (u2029)", ?_assertEqual( 849 <<16#2029/utf8>>, 850 clean_string(<<16#2029/utf8>>, #config{escaped_strings=true, unescaped_jsonp=true}) 851 )}, 852 {"maybe_escape u0000", ?_assertEqual( 853 <<"\\u0000">>, 854 clean_string(<<16#0000/utf8>>, #config{escaped_strings=true}) 855 )}, 856 {"maybe_escape u0001", ?_assertEqual( 857 <<"\\u0001">>, 858 clean_string(<<16#0001/utf8>>, #config{escaped_strings=true}) 859 )}, 860 {"maybe_escape u0002", ?_assertEqual( 861 <<"\\u0002">>, 862 clean_string(<<16#0002/utf8>>, #config{escaped_strings=true}) 863 )}, 864 {"maybe_escape u0003", ?_assertEqual( 865 <<"\\u0003">>, 866 clean_string(<<16#0003/utf8>>, #config{escaped_strings=true}) 867 )}, 868 {"maybe_escape u0004", ?_assertEqual( 869 <<"\\u0004">>, 870 clean_string(<<16#0004/utf8>>, #config{escaped_strings=true}) 871 )}, 872 {"maybe_escape u0005", ?_assertEqual( 873 <<"\\u0005">>, 874 clean_string(<<16#0005/utf8>>, #config{escaped_strings=true}) 875 )}, 876 {"maybe_escape u0006", ?_assertEqual( 877 <<"\\u0006">>, 878 clean_string(<<16#0006/utf8>>, #config{escaped_strings=true}) 879 )}, 880 {"maybe_escape u0007", ?_assertEqual( 881 <<"\\u0007">>, 882 clean_string(<<16#0007/utf8>>, #config{escaped_strings=true}) 883 )}, 884 {"maybe_escape u000b", ?_assertEqual( 885 <<"\\u000b">>, 886 clean_string(<<16#000b/utf8>>, #config{escaped_strings=true}) 887 )}, 888 {"maybe_escape u000e", ?_assertEqual( 889 <<"\\u000e">>, 890 clean_string(<<16#000e/utf8>>, #config{escaped_strings=true}) 891 )}, 892 {"maybe_escape u000f", ?_assertEqual( 893 <<"\\u000f">>, 894 clean_string(<<16#000f/utf8>>, #config{escaped_strings=true}) 895 )}, 896 {"maybe_escape u0010", ?_assertEqual( 897 <<"\\u0010">>, 898 clean_string(<<16#0010/utf8>>, #config{escaped_strings=true}) 899 )}, 900 {"maybe_escape u0011", ?_assertEqual( 901 <<"\\u0011">>, 902 clean_string(<<16#0011/utf8>>, #config{escaped_strings=true}) 903 )}, 904 {"maybe_escape u0012", ?_assertEqual( 905 <<"\\u0012">>, 906 clean_string(<<16#0012/utf8>>, #config{escaped_strings=true}) 907 )}, 908 {"maybe_escape u0013", ?_assertEqual( 909 <<"\\u0013">>, 910 clean_string(<<16#0013/utf8>>, #config{escaped_strings=true}) 911 )}, 912 {"maybe_escape u0014", ?_assertEqual( 913 <<"\\u0014">>, 914 clean_string(<<16#0014/utf8>>, #config{escaped_strings=true}) 915 )}, 916 {"maybe_escape u0015", ?_assertEqual( 917 <<"\\u0015">>, 918 clean_string(<<16#0015/utf8>>, #config{escaped_strings=true}) 919 )}, 920 {"maybe_escape u0016", ?_assertEqual( 921 <<"\\u0016">>, 922 clean_string(<<16#0016/utf8>>, #config{escaped_strings=true}) 923 )}, 924 {"maybe_escape u0017", ?_assertEqual( 925 <<"\\u0017">>, 926 clean_string(<<16#0017/utf8>>, #config{escaped_strings=true}) 927 )}, 928 {"maybe_escape u0018", ?_assertEqual( 929 <<"\\u0018">>, 930 clean_string(<<16#0018/utf8>>, #config{escaped_strings=true}) 931 )}, 932 {"maybe_escape u0019", ?_assertEqual( 933 <<"\\u0019">>, 934 clean_string(<<16#0019/utf8>>, #config{escaped_strings=true}) 935 )}, 936 {"maybe_escape u001a", ?_assertEqual( 937 <<"\\u001a">>, 938 clean_string(<<16#001a/utf8>>, #config{escaped_strings=true}) 939 )}, 940 {"maybe_escape u001b", ?_assertEqual( 941 <<"\\u001b">>, 942 clean_string(<<16#001b/utf8>>, #config{escaped_strings=true}) 943 )}, 944 {"maybe_escape u001c", ?_assertEqual( 945 <<"\\u001c">>, 946 clean_string(<<16#001c/utf8>>, #config{escaped_strings=true}) 947 )}, 948 {"maybe_escape u001d", ?_assertEqual( 949 <<"\\u001d">>, 950 clean_string(<<16#001d/utf8>>, #config{escaped_strings=true}) 951 )}, 952 {"maybe_escape u001e", ?_assertEqual( 953 <<"\\u001e">>, 954 clean_string(<<16#001e/utf8>>, #config{escaped_strings=true}) 955 )}, 956 {"maybe_escape u001f", ?_assertEqual( 957 <<"\\u001f">>, 958 clean_string(<<16#001f/utf8>>, #config{escaped_strings=true}) 959 )} 960 ]. 961 962 963bad_utf8_test_() -> 964 [ 965 {"orphan continuation byte u+0080", ?_assertError( 966 badarg, 967 clean_string(<<16#0080>>, #config{strict_utf8=true}) 968 )}, 969 {"orphan continuation byte u+0080 replaced", ?_assertEqual( 970 <<16#fffd/utf8>>, 971 clean_string(<<16#0080>>, #config{}) 972 )}, 973 {"orphan continuation byte u+00bf", ?_assertError( 974 badarg, 975 clean_string(<<16#00bf>>, #config{strict_utf8=true}) 976 )}, 977 {"orphan continuation byte u+00bf replaced", ?_assertEqual( 978 <<16#fffd/utf8>>, 979 clean_string(<<16#00bf>>, #config{}) 980 )}, 981 {"2 continuation bytes", ?_assertError( 982 badarg, 983 clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{strict_utf8=true}) 984 )}, 985 {"2 continuation bytes replaced", ?_assertEqual( 986 binary:copy(<<16#fffd/utf8>>, 2), 987 clean_string(<<(binary:copy(<<16#0080>>, 2))/binary>>, #config{}) 988 )}, 989 {"3 continuation bytes", ?_assertError( 990 badarg, 991 clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{strict_utf8=true}) 992 )}, 993 {"3 continuation bytes replaced", ?_assertEqual( 994 binary:copy(<<16#fffd/utf8>>, 3), 995 clean_string(<<(binary:copy(<<16#0080>>, 3))/binary>>, #config{}) 996 )}, 997 {"4 continuation bytes", ?_assertError( 998 badarg, 999 clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{strict_utf8=true}) 1000 )}, 1001 {"4 continuation bytes replaced", ?_assertEqual( 1002 binary:copy(<<16#fffd/utf8>>, 4), 1003 clean_string(<<(binary:copy(<<16#0080>>, 4))/binary>>, #config{}) 1004 )}, 1005 {"5 continuation bytes", ?_assertError( 1006 badarg, 1007 clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{strict_utf8=true}) 1008 )}, 1009 {"5 continuation bytes replaced", ?_assertEqual( 1010 binary:copy(<<16#fffd/utf8>>, 5), 1011 clean_string(<<(binary:copy(<<16#0080>>, 5))/binary>>, #config{}) 1012 )}, 1013 {"6 continuation bytes", ?_assertError( 1014 badarg, 1015 clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{strict_utf8=true}) 1016 )}, 1017 {"6 continuation bytes replaced", ?_assertEqual( 1018 binary:copy(<<16#fffd/utf8>>, 6), 1019 clean_string(<<(binary:copy(<<16#0080>>, 6))/binary>>, #config{}) 1020 )}, 1021 {"all continuation bytes", ?_assertError( 1022 badarg, 1023 clean_string(<<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, #config{strict_utf8=true}) 1024 )}, 1025 {"all continuation bytes replaced", ?_assertEqual( 1026 binary:copy(<<16#fffd/utf8>>, length(lists:seq(16#0080, 16#00bf))), 1027 clean_string( 1028 <<(list_to_binary(lists:seq(16#0080, 16#00bf)))/binary>>, 1029 #config{} 1030 ) 1031 )}, 1032 {"lonely start byte", ?_assertError( 1033 badarg, 1034 clean_string(<<16#00c0>>, #config{strict_utf8=true}) 1035 )}, 1036 {"lonely start byte replaced", ?_assertEqual( 1037 <<16#fffd/utf8>>, 1038 clean_string(<<16#00c0>>, #config{}) 1039 )}, 1040 {"lonely start bytes (2 byte)", ?_assertError( 1041 badarg, 1042 clean_string(<<16#00c0, 32, 16#00df>>, #config{strict_utf8=true}) 1043 )}, 1044 {"lonely start bytes (2 byte) replaced", ?_assertEqual( 1045 <<16#fffd/utf8, 32, 16#fffd/utf8>>, 1046 clean_string(<<16#00c0, 32, 16#00df>>, #config{}) 1047 )}, 1048 {"lonely start bytes (3 byte)", ?_assertError( 1049 badarg, 1050 clean_string(<<16#00e0, 32, 16#00ef>>, #config{strict_utf8=true}) 1051 )}, 1052 {"lonely start bytes (3 byte) replaced", ?_assertEqual( 1053 <<16#fffd/utf8, 32, 16#fffd/utf8>>, 1054 clean_string(<<16#00e0, 32, 16#00ef>>, #config{}) 1055 )}, 1056 {"lonely start bytes (4 byte)", ?_assertError( 1057 badarg, 1058 clean_string(<<16#00f0, 32, 16#00f7>>, #config{strict_utf8=true}) 1059 )}, 1060 {"lonely start bytes (4 byte) replaced", ?_assertEqual( 1061 <<16#fffd/utf8, 32, 16#fffd/utf8>>, 1062 clean_string(<<16#00f0, 32, 16#00f7>>, #config{}) 1063 )}, 1064 {"missing continuation byte (3 byte)", ?_assertError( 1065 badarg, 1066 clean_string(<<224, 160, 32>>, #config{strict_utf8=true}) 1067 )}, 1068 {"missing continuation byte (3 byte) replaced", ?_assertEqual( 1069 <<16#fffd/utf8, 32>>, 1070 clean_string(<<224, 160, 32>>, #config{}) 1071 )}, 1072 {"missing continuation byte (4 byte missing one)", ?_assertError( 1073 badarg, 1074 clean_string(<<240, 144, 128, 32>>, #config{strict_utf8=true}) 1075 )}, 1076 {"missing continuation byte (4 byte missing one) replaced", ?_assertEqual( 1077 <<16#fffd/utf8, 32>>, 1078 clean_string(<<240, 144, 128, 32>>, #config{}) 1079 )}, 1080 {"missing continuation byte (4 byte missing two)", ?_assertError( 1081 badarg, 1082 clean_string(<<240, 144, 32>>, #config{strict_utf8=true}) 1083 )}, 1084 {"missing continuation byte (4 byte missing two) replaced", ?_assertEqual( 1085 <<16#fffd/utf8, 32>>, 1086 clean_string(<<240, 144, 32>>, #config{}) 1087 )}, 1088 {"overlong encoding of u+002f (2 byte)", ?_assertError( 1089 badarg, 1090 clean_string(<<16#c0, 16#af, 32>>, #config{strict_utf8=true}) 1091 )}, 1092 {"overlong encoding of u+002f (2 byte) replaced", ?_assertEqual( 1093 <<16#fffd/utf8, 32>>, 1094 clean_string(<<16#c0, 16#af, 32>>, #config{}) 1095 )}, 1096 {"overlong encoding of u+002f (3 byte)", ?_assertError( 1097 badarg, 1098 clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{strict_utf8=true}) 1099 )}, 1100 {"overlong encoding of u+002f (3 byte) replaced", ?_assertEqual( 1101 <<16#fffd/utf8, 32>>, 1102 clean_string(<<16#e0, 16#80, 16#af, 32>>, #config{}) 1103 )}, 1104 {"overlong encoding of u+002f (4 byte)", ?_assertError( 1105 badarg, 1106 clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{strict_utf8=true}) 1107 )}, 1108 {"overlong encoding of u+002f (4 byte) replaced", ?_assertEqual( 1109 <<16#fffd/utf8, 32>>, 1110 clean_string(<<16#f0, 16#80, 16#80, 16#af, 32>>, #config{}) 1111 )}, 1112 {"highest overlong 2 byte sequence", ?_assertError( 1113 badarg, 1114 clean_string(<<16#c1, 16#bf, 32>>, #config{strict_utf8=true}) 1115 )}, 1116 {"highest overlong 2 byte sequence replaced", ?_assertEqual( 1117 <<16#fffd/utf8, 32>>, 1118 clean_string(<<16#c1, 16#bf, 32>>, #config{}) 1119 )}, 1120 {"highest overlong 3 byte sequence", ?_assertError( 1121 badarg, 1122 clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{strict_utf8=true}) 1123 )}, 1124 {"highest overlong 3 byte sequence replaced", ?_assertEqual( 1125 <<16#fffd/utf8, 32>>, 1126 clean_string(<<16#e0, 16#9f, 16#bf, 32>>, #config{}) 1127 )}, 1128 {"highest overlong 4 byte sequence", ?_assertError( 1129 badarg, 1130 clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{strict_utf8=true}) 1131 )}, 1132 {"highest overlong 4 byte sequence replaced", ?_assertEqual( 1133 <<16#fffd/utf8, 32>>, 1134 clean_string(<<16#f0, 16#8f, 16#bf, 16#bf, 32>>, #config{}) 1135 )} 1136 ]. 1137 1138 1139json_escape_sequence_test_() -> 1140 [ 1141 {"json escape sequence test - 16#0000", ?_assertEqual(<<"\\u0000"/utf8>>, json_escape_sequence(16#0000))}, 1142 {"json escape sequence test - 16#abc", ?_assertEqual(<<"\\u0abc"/utf8>>, json_escape_sequence(16#abc))}, 1143 {"json escape sequence test - 16#def", ?_assertEqual(<<"\\u0def"/utf8>>, json_escape_sequence(16#def))} 1144 ]. 1145 1146 1147uescape_test_() -> 1148 [ 1149 {"\"\\u0080\"", ?_assertEqual( 1150 <<"\\u0080">>, 1151 clean_string(<<128/utf8>>, #config{uescape=true}) 1152 )}, 1153 {"\"\\u8ca8\\u5481\\u3002\\u0091\\u0091\"", ?_assertEqual( 1154 <<"\\u8ca8\\u5481\\u3002\\u0091\\u0091">>, 1155 clean_string( 1156 <<232,178,168,229,146,129,227,128,130,194,145,194,145>>, 1157 #config{uescape=true} 1158 ) 1159 )}, 1160 {"\"\\ud834\\udd1e\"", ?_assertEqual( 1161 <<"\\ud834\\udd1e">>, 1162 clean_string(<<240, 157, 132, 158>>, #config{uescape=true}) 1163 )}, 1164 {"\"\\ud83d\\ude0a\"", ?_assertEqual( 1165 <<"\\ud83d\\ude0a">>, 1166 clean_string(<<240, 159, 152, 138>>, #config{uescape=true}) 1167 )} 1168 ]. 1169 1170 1171fix_key_test_() -> 1172 [ 1173 {"binary key", ?_assertEqual(fix_key(<<"foo">>), <<"foo">>)}, 1174 {"atom key", ?_assertEqual(fix_key(foo), <<"foo">>)}, 1175 {"integer key", ?_assertEqual(fix_key(123), <<"123">>)} 1176 ]. 1177 1178 1179datetime_test_() -> 1180 [ 1181 {"datetime", ?_assertEqual( 1182 [start_array, {string, <<"2014-08-13T23:12:34Z">>}, end_array, end_json], 1183 parse([start_array, {{2014,08,13},{23,12,34}}, end_array, end_json], []) 1184 )}, 1185 {"datetime", ?_assertEqual( 1186 [start_array, {string, <<"2014-08-13T23:12:34.363369Z">>}, end_array, end_json], 1187 parse([start_array, {{2014,08,13},{23,12,34.363369}}, end_array, end_json], []) 1188 )} 1189 ]. 1190 1191 1192timestamp_test_() -> 1193 [ 1194 {"timestamp", ?_assertEqual( 1195 [start_array, {string, <<"2016-01-15T18:19:28Z">>}, end_array, end_json], 1196 parse([start_array, {1452,881968,111772}, end_array, end_json], []) 1197 )} 1198 ]. 1199 1200 1201rogue_tuple_test_() -> 1202 [ 1203 {"kv in value position of object", ?_assertError( 1204 badarg, 1205 parse([start_object, <<"key">>, {<<"key">>, <<"value">>}, end_object, end_json], []) 1206 )}, 1207 {"kv in value position of list", ?_assertError( 1208 badarg, 1209 parse([start_array, {<<"key">>, <<"value">>}, end_array, end_json], []) 1210 )} 1211 ]. 1212 1213 1214-endif. 1215