1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2003-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21%%% Description : Utility module for handling XML trees. 22%%%---------------------------------------------------------------------- 23 24-module(xmerl_lib). 25 26-export([normalize_content/1, normalize_content/3, expand_content/1, 27 expand_content/3, normalize_element/1, normalize_element/3, 28 expand_element/1, expand_element/3, expand_attributes/1, 29 expand_attributes/3, export_text/1, flatten_text/1, 30 export_attribute/1, markup/2, markup/3, simplify_element/1, 31 simplify_content/1, start_tag/1, start_tag/2, end_tag/1, 32 empty_tag/1, empty_tag/2,is_empty_data/1, find_attribute/2, 33 remove_whitespace/1,to_lower/1]). 34 35-export([is_letter/1,is_namechar/1,is_ncname/1, 36 detect_charset/1,detect_charset/2,is_name/1,is_char/1]). 37 38 39-export([mapxml/2, foldxml/3, mapfoldxml/3]). 40 41%% exports for XSD 42-export([is_facet/1,is_builtin_simple_type/1,is_xsd_string/1]). 43 44-include("xmerl.hrl"). 45-include("xmerl_xsd.hrl"). 46 47 48%% Escape special characters `<' and `&', flattening the text. 49%% Also escapes `>', just for symmetry. 50 51export_text(T) -> 52 export_text(T, []). 53 54export_text([$< | T], Cont) -> 55 "<" ++ export_text(T, Cont); 56export_text([$> | T], Cont) -> 57 ">" ++ export_text(T, Cont); 58export_text([$& | T], Cont) -> 59 "&" ++ export_text(T, Cont); 60export_text([C | T], Cont) when is_integer(C) -> 61 [C | export_text(T, Cont)]; 62export_text([T | T1], Cont) -> 63 export_text(T, [T1 | Cont]); 64export_text([], [T | Cont]) -> 65 export_text(T, Cont); 66export_text([], []) -> 67 []; 68export_text(Bin, Cont) -> 69 export_text(binary_to_list(Bin), Cont). 70 71 72%% Only flatten text. 73 74flatten_text(T) -> 75 flatten_text(T, []). 76 77flatten_text([C | T], Cont) when is_integer(C) -> 78 [C | flatten_text(T, Cont)]; 79flatten_text([T | T1], Cont) -> 80 flatten_text(T, [T1 | Cont]); 81flatten_text([], [T | Cont]) -> 82 flatten_text(T, Cont); 83flatten_text([], []) -> 84 []; 85flatten_text(Bin, Cont) -> 86 flatten_text(binary_to_list(Bin), Cont). 87 88%% Convert attribute value to a flat string, escaping characters `"', 89%% `<' and `&'. (Note that single-quote characters are not escaped; the 90%% markup-generating functions (`start_tag', `end_tag', ...) always use 91%% `"' to delimit the attribute values.) 92 93export_attribute(I) when is_integer(I) -> 94 integer_to_list(I); 95export_attribute(A) when is_atom(A) -> 96 export_attribute(atom_to_list(A), []); 97export_attribute(S) -> 98 export_attribute(S, []). 99 100export_attribute([$< | T], Cont) -> 101 "<" ++ export_attribute(T, Cont); 102export_attribute([$& | T], Cont) -> 103 "&" ++ export_attribute(T, Cont); 104export_attribute([$" | T], Cont) -> 105 """ ++ export_attribute(T, Cont); 106export_attribute([C | T], Cont) when is_integer(C) -> 107 [C | export_attribute(T, Cont)]; 108export_attribute([T | T1], Cont) -> 109 export_attribute(T, [T1 | Cont]); 110export_attribute([], [T | Cont]) -> 111 export_attribute(T, Cont); 112export_attribute([], []) -> 113 []; 114export_attribute(Bin, Cont) -> 115 export_attribute(binary_to_list(Bin), Cont). 116 117 118%% SimpleContent: [SimpleElement] 119%% SimpleElement: #xml...{} | String | {atom(), [Attr], SimpleContent} 120%% | {atom(), SimpleContent} | atom() 121%% Attr: {atom(), Value} | #xmlAttribute{} 122%% Value: atom() | integer() | String 123%% String: [char() | binary() | String] 124%% 125%% Because strings can be deep, we do not allow content lists to also be 126%% deep; otherwise, traversal of the simple representation becomes too 127%% complicated and expensive. Simple content lists are thus flat lists 128%% of simple elements. 129 130%% TODO: namespace-qualified tags in simple-form? /RC 131 132%% 'normalize' is like 'expand', but also turns all text elements into 133%% flat strings. 134 135normalize_element(Element) -> 136 normalize_element(Element, 1, []). 137 138normalize_element(Element, Pos, Parents) -> 139 expand_element(Element, Pos, Parents, true). 140 141%% 'expand' expands simple-form elements to normal XML elements. 142%% All attribute values (also in #xmlAttribute records) become flat 143%% strings, so that string comparisons can be made. Text elements are 144%% not flattened. 145 146expand_element(Element) -> 147 expand_element(Element, 1, []). 148 149expand_element(Element, Pos, Parents) -> 150 expand_element(Element, Pos, Parents, false). 151 152expand_element(E = #xmlElement{name = N}, Pos, Parents, Norm) -> 153 NewParents = [{N,Pos}|Parents], 154 Content = expand_content(E#xmlElement.content, 1, NewParents, Norm), 155 Attrs = expand_attributes(E#xmlElement.attributes, 1, NewParents), 156 E#xmlElement{pos = Pos, 157 parents = Parents, 158 attributes = Attrs, 159 content = Content}; 160expand_element(E = #xmlText{}, Pos, Parents, Norm) -> 161 E#xmlText{pos = Pos, 162 parents = Parents, 163 value = expand_text(E#xmlText.value, Norm)}; 164expand_element(E = #xmlPI{}, Pos, Parents, Norm) -> 165 E#xmlPI{pos = Pos, 166 parents = Parents, 167 value = expand_text(E#xmlPI.value, Norm)}; 168expand_element(E = #xmlComment{}, Pos, Parents, Norm) -> 169 E#xmlComment{pos = Pos, 170 parents = Parents, 171 value = expand_text(E#xmlComment.value, Norm)}; 172expand_element(E = #xmlDecl{}, _Pos, _Parents, _Norm) -> 173 Attrs = expand_attributes(E#xmlDecl.attributes, 1, []), 174 E#xmlDecl{attributes = Attrs}; 175expand_element({Tag, Attrs, Content}, Pos, Parents, Norm) when is_atom(Tag) -> 176 NewParents = [{Tag, Pos} | Parents], 177 #xmlElement{name = Tag, 178 pos = Pos, 179 parents = Parents, 180 attributes = expand_attributes(Attrs, 1, NewParents), 181 content = expand_content(Content, 1, NewParents, Norm)}; 182expand_element({Tag, Content}, Pos, Parents, Norm) when is_atom(Tag) -> 183 NewParents = [{Tag, Pos} | Parents], 184 #xmlElement{name = Tag, 185 pos = Pos, 186 parents = Parents, 187 attributes = [], 188 content = expand_content(Content, 1, NewParents, Norm)}; 189expand_element(Tag, Pos, Parents, _Norm) when is_atom(Tag) -> 190 #xmlElement{name = Tag, 191 pos = Pos, 192 parents = Parents, 193 attributes = [], 194 content = []}; 195expand_element(String, Pos, Parents, Norm) when is_list(String) -> 196 #xmlText{pos = Pos, 197 parents = Parents, 198 value = expand_text(String, Norm)}. 199 200expand_text(S, false) -> S; 201expand_text(S, true) -> flatten_text(S). 202 203%% Content must be a flat list of elements. 204 205normalize_content(Content) -> 206 normalize_content(Content, 1, []). 207 208normalize_content(Content, Pos, Parents) -> 209 expand_content(Content, Pos, Parents, true). 210 211expand_content(Content) -> 212 expand_content(Content, 1, []). 213 214expand_content(Content, Pos, Parents) -> 215 expand_content(Content, Pos, Parents, false). 216 217expand_content([{H} | T], Pos, Parents, Norm) -> 218 expand_content(H ++ T, Pos, Parents, Norm); 219expand_content([{F,S}|T], Pos, Parents, Norm) when is_function(F) -> 220 case F(S) of 221 done -> expand_content(T, Pos, Parents, Norm); 222 {C,S2} -> expand_content([{F,S2},C|T], Pos, Parents, Norm) 223 end; 224expand_content([H | T], Pos, Parents, Norm) -> 225 [expand_element(H, Pos, Parents, Norm) 226 | expand_content(T, Pos+1, Parents, Norm)]; 227expand_content([], _Pos, _Parents, _Norm) -> 228 []. 229 230expand_attributes(Attrs) -> 231 expand_attributes(Attrs, 1, []). 232 233%% Expanding always turns all attribute values into flat strings. 234 235expand_attributes([H = #xmlAttribute{} | T], Pos, Parents) -> 236 [H#xmlAttribute{pos = Pos, 237 value = expand_value(H#xmlAttribute.value)} 238 | expand_attributes(T, Pos+1, Parents)]; 239expand_attributes([{P,S}|T], Pos, Parents) when is_function(P) -> 240 case P(S) of 241 done -> 242 expand_attributes(T, Pos, Parents); 243 {A,S2} -> 244 expand_attributes([{P,S2},A|T], Pos, Parents) 245 end; 246expand_attributes([{K, V} | T], Pos, Parents) -> 247 [#xmlAttribute{name = K, 248 pos = Pos, 249 parents = Parents, 250 value = expand_value(V)} 251 | expand_attributes(T, Pos+1, Parents)]; 252expand_attributes([], _Pos, _Parents) -> 253 []. 254 255expand_value(S) when is_atom(S) -> 256 atom_to_list(S); 257expand_value(S) when is_integer(S) -> 258 integer_to_list(S); 259expand_value(S) -> 260 flatten_text(S). 261 262%% We want simplification to yield a normal form, so we always generate 263%% three-tuples for elements. PI, Comment and Decl elements are 264%% discarded from content lists. Attribute values become flat 265%% strings. Text elements are not flattened. 266 267simplify_element(#xmlElement{expanded_name = [], name = Tag, 268 attributes = Attrs, content = Content}) -> 269 {Tag, simplify_attributes(Attrs), simplify_content(Content)}; 270simplify_element(#xmlElement{expanded_name = Name, 271 attributes = Attrs, content = Content}) -> 272 {Name, simplify_attributes(Attrs), simplify_content(Content)}; 273simplify_element(#xmlText{value = Text}) -> 274 Text; 275simplify_element({Tag, Attrs, Content}) when is_atom(Tag) -> 276 {Tag, simplify_attributes(Attrs), simplify_content(Content)}; 277simplify_element({Tag, Content}) when is_atom(Tag) -> 278 {Tag, [], simplify_content(Content)}; 279simplify_element(Tag) when is_atom(Tag) -> 280 {Tag, [], []}; 281simplify_element(Text) when is_list(Text) -> 282 Text. 283 284simplify_content([#xmlPI{} | T]) -> 285 simplify_content(T); 286simplify_content([#xmlComment{} | T]) -> 287 simplify_content(T); 288simplify_content([#xmlDecl{} | T]) -> 289 simplify_content(T); 290simplify_content([H | T]) -> 291 [simplify_element(H) | simplify_content(T)]; 292simplify_content([]) -> 293 []. 294 295simplify_attributes([#xmlAttribute{name = K, value = V} | T]) 296 when is_atom(K) -> 297 [{K, expand_value(V)} | simplify_attributes(T)]; 298simplify_attributes([H = {K, _} | T]) when is_atom(K) -> 299 [H | simplify_attributes(T)]; 300simplify_attributes([]) -> 301 []. 302 303%% Looking up an attribute value 304 305find_attribute(Name, Attrs) -> 306 case lists:keysearch(Name, #xmlAttribute.name, Attrs) of 307 {value, #xmlAttribute{value = V}} -> 308 {value, V}; 309 false -> 310 false 311 end. 312 313 314markup(Tag, Data) -> 315 markup(Tag, [], Data). 316 317markup(Tag, Attrs, []) -> 318 empty_tag(Tag, Attrs); 319markup(Tag, Attrs, Data) -> 320 [start_tag(Tag, Attrs), Data, end_tag(Tag)]. 321 322start_tag(TagStr) -> 323 start_tag(TagStr, []). 324 325start_tag(Tag, Attrs) when is_atom(Tag) -> 326 start_tag(atom_to_list(Tag), Attrs); 327start_tag(TagStr, []) -> 328 ["<", TagStr, ">"]; 329start_tag(TagStr, Attrs) -> 330 ["<", TagStr, attributes(Attrs), ">"]. 331 332empty_tag(Tag) -> 333 empty_tag(Tag, []). 334 335empty_tag(Tag, Attrs) when is_atom(Tag) -> 336 empty_tag(atom_to_list(Tag), Attrs); 337empty_tag(TagStr, []) -> 338 ["<", TagStr, "/>"]; 339empty_tag(TagStr, Attrs) -> 340 ["<", TagStr, attributes(Attrs), "/>"]. 341 342end_tag(Tag) when is_atom(Tag) -> 343 end_tag(atom_to_list(Tag)); 344end_tag(TagStr) -> 345 ["</", TagStr, ">"]. 346 347attributes(Attrs) -> 348 [attr_string(A) || A <- Attrs]. 349 350attr_string(#xmlAttribute{name = K, value = V}) -> 351 [" ", atom_to_list(K), "=\"", export_attribute(V), "\""]. 352 353is_empty_data([]) -> 354 true; 355is_empty_data([X | Xs]) -> 356 case is_empty_data(X) of 357 false -> 358 false; 359 true -> 360 is_empty_data(Xs) 361 end; 362is_empty_data(_) -> 363 false. 364 365 366%% Removing normalised whitespace-only text segments. 367 368remove_whitespace([#xmlText{value = " "} | Data]) -> 369 remove_whitespace(Data); 370remove_whitespace([E = #xmlElement{content = Content} | Data]) -> 371 [E#xmlElement{content = remove_whitespace(Content)} 372 | remove_whitespace(Data)]; 373remove_whitespace([Other | Data]) -> 374 [Other | remove_whitespace(Data)]; 375remove_whitespace([]) -> 376 []. 377 378 379%%% ---------------------------------------------------------------------------- 380%%% funs traversing the xmerl tree left-right and top-down 381 382%% mapxml 383%% Fun is fun(Old#xmlElement) -> New#xmlElement 384mapxml(Fun, #xmlElement{}= E) -> 385 C1 = Fun(E), 386 C2 = mapxml(Fun,lists:flatten(C1#xmlElement.content)), 387 C1#xmlElement{content=C2}; 388mapxml(Fun, List) when is_list(List) -> 389 AFun = fun(E) -> mapxml(Fun, E) end, 390 lists:map(AFun, List); 391mapxml(Fun, E) -> 392 Fun(E). 393 394 395%% foldxml 396%% Fun is fun(#xmlElement, OldAccu) -> NewAccu 397foldxml(Fun, Accu0, #xmlElement{content=C}=E) -> 398 Accu1 = Fun(E, Accu0), 399 foldxml(Fun, Accu1, C); 400foldxml(Fun, Accu, List) when is_list(List) -> 401 AFun = fun(E,A) -> foldxml(Fun, A, E) end, 402 lists:foldl(AFun, Accu, List); 403foldxml(Fun, Accu, E) -> 404 Fun(E, Accu). 405 406 407%% mapfoldxml 408%% Fun is fun(Old#xmlElement, OldAccu) -> {New#xmlElement, NewAccu} 409mapfoldxml(Fun, Accu0, #xmlElement{}=E) -> 410 {C1,Accu1} = Fun(E, Accu0), 411 {C2,Accu2} = mapfoldxml(Fun, Accu1, lists:flatten(C1#xmlElement.content)), 412 {C1#xmlElement{content=C2},Accu2}; 413mapfoldxml(Fun, Accu, List) when is_list(List) -> 414 AFun = fun(E,A) -> mapfoldxml(Fun, A, E) end, 415 lists:mapfoldl(AFun, Accu, List); 416mapfoldxml(Fun, Accu, E) -> 417 Fun(E,Accu). 418 419 420%%% @spec detect_charset(T::list()) -> charset_info() 421%%% @equiv detect_charset(undefined,T) 422detect_charset(Content) -> 423 detect_charset(undefined,Content). 424 425%%% FIXME! Whatabout aliases etc? Shouldn't transforming with ucs be optional? 426%%% @spec detect_charset(ExtCharset::atom(),T::list()) -> charset_info() 427%%% @doc Automatically decides character set used in XML document. 428%%% charset_info() is 429%%% <table> 430%%% <tr><td><code>{auto,'iso-10646-utf-1',Content} |</code></td></tr> 431%%% <tr><td><code>{external,'iso-10646-utf-1',Content} |</code></td></tr> 432%%% <tr><td><code>{undefined,undefined,Content} |</code></td></tr> 433%%% <tr><td><code>{external,ExtCharset,Content}</code></td></tr> 434%%% </table> 435%%% ExtCharset is any externally declared character set (e.g. in HTTP 436%%% Content-Type header) and Content is an XML Document. 437%%% 438detect_charset(ExtCharset,Content) when is_list(ExtCharset) -> 439 %% FIXME! Don't allow both atom and list for character set names 440 detect_charset(list_to_atom(ExtCharset),Content); 441detect_charset(ExtCharset,Content) -> 442 case autodetect(ExtCharset,Content) of 443 {auto,Content1} -> 444 {auto,'iso-10646-utf-1',Content1}; 445 {external,Content1} -> 446 {external,'iso-10646-utf-1',Content1}; 447 {undefined,_} -> 448 {undefined,undefined,Content}; 449 {ExtCharset, Content} -> 450 {external,ExtCharset,Content} 451 end. 452 453%%------------------------------------------------------------------------------ 454%% Auto detect what kind of character set we are dealing with and transform 455%% to Erlang integer Unicode format if found. 456%% Appendix F, Page 56-57, XML 1.0 W3C Recommendation 6 October 2000 457%% (http://www.w3.org/TR/REC-xml) 458%% 00 00 00 3C ( "<" in UCS-4 big-endian) 459%% 3C 00 00 00 ( "<" in UCS-4 little-endian) 460%% FE FF (UTF-16 - big-endian Mark) 461%% FF FE (UTF-16 - little-endian Mark) 462%% 00 3C 00 3F ( "<?" in UTF-16 big-endian) 463%% 3C 00 3F 00 ( "<?" in UTF-16 big-endian) 464%% 3C 3F (7-bit,8-bit or mixed width encoding) 465%% 4C 6F A7 94 (EBCDIC) - Not Implemented!!!! 466 467%% Check byte-order mark and transform to Unicode, Erlang integer 468%%% --- With byte-order mark 469autodetect(undefined,[0,0,16#fe,16#ff | Input]) -> 470 {auto, xmerl_ucs:from_ucs4be(Input)}; 471autodetect('iso-10646-utf-1',[0,0,16#fe,16#ff | Input]) -> 472 {external, xmerl_ucs:from_ucs4be(Input)}; 473autodetect(undefined,[16#ff,16#fe,0,0 | Input]) -> 474 {auto, xmerl_ucs:from_ucs4le(Input)}; 475autodetect('iso-10646-utf-1',[16#ff,16#fe,0,0 | Input]) -> 476 {external, xmerl_ucs:from_ucs4le(Input)}; 477 478autodetect(undefined,[16#fe,16#ff | Input]) -> 479 {auto, xmerl_ucs:from_utf16be(Input)}; 480autodetect('utf-16be',[16#fe,16#ff | Input]) -> 481 {external, xmerl_ucs:from_utf16be(Input)}; 482autodetect(undefined,[16#ff,16#fe | Input]) -> 483 {auto, xmerl_ucs:from_utf16le(Input)}; 484autodetect('utf-16le',[16#ff,16#fe | Input]) -> 485 {external, xmerl_ucs:from_utf16le(Input)}; 486 487autodetect(undefined,[16#ef,16#bb,16#bf | Input]) -> 488 {auto, xmerl_ucs:from_utf8(Input)}; 489autodetect('utf-8',[16#ef,16#bb,16#bf | Input]) -> 490 {external, xmerl_ucs:from_utf8(Input)}; 491autodetect('utf-8',[16#ff,16#fe | Input]) -> 492 {external, xmerl_ucs:from_utf16le(Input)}; 493autodetect('utf-8',[16#fe,16#ff | Input]) -> 494 {external, xmerl_ucs:from_utf16be(Input)}; 495 496%%% --- Without byte-order mark 497autodetect(undefined,[0,0,0,16#3c|Input]) -> 498 {auto, xmerl_ucs:from_ucs4be([0,0,0,16#3c|Input])}; 499autodetect('iso-10646-utf-1',[0,0,0,16#3c|Input]) -> 500 {external, xmerl_ucs:from_ucs4be([0,0,0,16#3c|Input])}; 501autodetect(undefined,[16#3c,0,0,0|Input]) -> 502 {auto, xmerl_ucs:from_ucs4le([16#3c,0,0,0|Input])}; 503autodetect('iso-10646-utf-1',[16#3c,0,0,0|Input]) -> 504 {external, xmerl_ucs:from_ucs4le([16#3c,0,0,0|Input])}; 505 506autodetect(undefined,[0,16#3c,0,16#3f | Input]) -> 507 {auto, xmerl_ucs:from_utf16be([0,16#3c,0,16#3f|Input])}; 508autodetect('utf-16be',[0,16#3c,0,16#3f | Input]) -> 509 {external, xmerl_ucs:from_utf16be([0,16#3c,0,16#3f|Input])}; 510autodetect(undefined,[16#3c,0,16#3f,0 | Input]) -> 511 {auto, xmerl_ucs:from_utf16le([16#3c,0,16#3f,0|Input])}; 512autodetect('utf-16le',[16#3c,0,16#3f,0 | Input]) -> 513 {external, xmerl_ucs:from_utf16le([16#3c,0,16#3f,0|Input])}; 514 515autodetect(ExtCharset,Content) -> 516 {ExtCharset, Content}. 517 518 519is_ncname(A) when is_atom(A) -> 520 is_ncname(atom_to_list(A)); 521is_ncname([$_|T]) -> 522 is_name1(T); 523is_ncname([H|T]) -> 524 case is_letter(H) of 525 true -> 526 is_name1(T); 527 _ -> false 528 end. 529 530is_name(A) when is_atom(A) -> 531 is_name(atom_to_list(A)); 532is_name([$_|T]) -> 533 is_name1(T); 534is_name([$:|T]) -> 535 is_name1(T); 536is_name([H|T]) -> 537 case is_letter(H) of 538 true -> 539 is_name1(T); 540 _ -> false 541 end. 542 543is_name1([]) -> 544 true; 545is_name1([H|T]) -> 546 case is_namechar(H) of 547 true -> 548 is_name1(T); 549 _ -> false 550 end. 551 552 553 554% ======= 555%%% UNICODE character definitions 556 557%%%%%%%% [2] Char 558 559is_char(16#09) -> true; 560is_char(16#0A) -> true; 561is_char(16#0D) -> true; 562is_char(X) when X >= 16#20, X =< 16#D7FF -> true; 563is_char(X) when X >= 16#E000, X =< 16#FFFD -> true; 564is_char(X) when X >= 16#10000, X =< 16#10FFFF -> true; 565is_char(_) -> false. 566 567%% 0 - not classified, 568%% 1 - base_char or ideographic, 569%% 2 - combining_char or digit or extender, 570%% 3 - $. or $- or $_ or $: 571-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 572 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0, 573 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 574 1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 575 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 576 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 577 0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 578 1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 579 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}). 580 581%% [4] NameChar 582is_namechar(X) -> 583 try element(X, ?SMALL) > 0 584 catch _:_ -> 585 case is_letter(X) of 586 true -> true; 587 false -> 588 case is_digit(X) of 589 true -> true; 590 false -> 591 case is_combining_char(X) of 592 true -> true; 593 false -> 594 is_extender(X) 595 end 596 end 597 end 598 end. 599 600%% [84] Letter 601is_letter(X) -> 602 try element(X, ?SMALL) =:= 1 603 catch _:_ -> 604 case is_base_char(X) of 605 false -> 606 is_ideographic(X); 607 true -> 608 true 609 end 610 end. 611 612%% [85] BaseChar 613is_base_char(X) when X >= 16#0041, X =< 16#005A -> true; 614is_base_char(X) when X >= 16#0061, X =< 16#007A -> true; 615is_base_char(X) when X >= 16#00C0, X =< 16#00D6 -> true; 616is_base_char(X) when X >= 16#00D8, X =< 16#00F6 -> true; 617is_base_char(X) when X >= 16#00F8, X =< 16#00FF -> true; 618is_base_char(X) when X >= 16#0100, X =< 16#0131 -> true; 619is_base_char(X) when X >= 16#0134, X =< 16#013E -> true; 620is_base_char(X) when X >= 16#0141, X =< 16#0148 -> true; 621is_base_char(X) when X >= 16#014A, X =< 16#017E -> true; 622is_base_char(X) when X >= 16#0180, X =< 16#01C3 -> true; 623is_base_char(X) when X >= 16#01CD, X =< 16#01F0 -> true; 624is_base_char(X) when X >= 16#01F4, X =< 16#01F5 -> true; 625is_base_char(X) when X >= 16#01FA, X =< 16#0217 -> true; 626is_base_char(X) when X >= 16#0250, X =< 16#02A8 -> true; 627is_base_char(X) when X >= 16#02BB, X =< 16#02C1 -> true; 628is_base_char(16#0386) -> true; 629is_base_char(X) when X >= 16#0388, X =< 16#038A -> true; 630is_base_char(16#038C) -> true; 631is_base_char(X) when X >= 16#038E, X =< 16#03A1 -> true; 632is_base_char(X) when X >= 16#03A3, X =< 16#03CE -> true; 633is_base_char(X) when X >= 16#03D0, X =< 16#03D6 -> true; 634is_base_char(16#03DA) -> true; 635is_base_char(16#03DC) -> true; 636is_base_char(16#03DE) -> true; 637is_base_char(16#03E0) -> true; 638is_base_char(X) when X >= 16#03E2, X =< 16#03F3 -> true; 639is_base_char(X) when X >= 16#0401, X =< 16#040C -> true; 640is_base_char(X) when X >= 16#040E, X =< 16#044F -> true; 641is_base_char(X) when X >= 16#0451, X =< 16#045C -> true; 642is_base_char(X) when X >= 16#045E, X =< 16#0481 -> true; 643is_base_char(X) when X >= 16#0490, X =< 16#04C4 -> true; 644is_base_char(X) when X >= 16#04C7, X =< 16#04C8 -> true; 645is_base_char(X) when X >= 16#04CB, X =< 16#04CC -> true; 646is_base_char(X) when X >= 16#04D0, X =< 16#04EB -> true; 647is_base_char(X) when X >= 16#04EE, X =< 16#04F5 -> true; 648is_base_char(X) when X >= 16#04F8, X =< 16#04F9 -> true; 649is_base_char(X) when X >= 16#0531, X =< 16#0556 -> true; 650is_base_char(16#0559) -> true; 651is_base_char(X) when X >= 16#0561, X =< 16#0586 -> true; 652is_base_char(X) when X >= 16#05D0, X =< 16#05EA -> true; 653is_base_char(X) when X >= 16#05F0, X =< 16#05F2 -> true; 654is_base_char(X) when X >= 16#0621, X =< 16#063A -> true; 655is_base_char(X) when X >= 16#0641, X =< 16#064A -> true; 656is_base_char(X) when X >= 16#0671, X =< 16#06B7 -> true; 657is_base_char(X) when X >= 16#06BA, X =< 16#06BE -> true; 658is_base_char(X) when X >= 16#06C0, X =< 16#06CE -> true; 659is_base_char(X) when X >= 16#06D0, X =< 16#06D3 -> true; 660is_base_char(16#06D5) -> true; 661is_base_char(X) when X >= 16#06E5, X =< 16#06E6 -> true; 662is_base_char(X) when X >= 16#0905, X =< 16#0939 -> true; 663is_base_char(16#093D) -> true; 664is_base_char(X) when X >= 16#0958, X =< 16#0961 -> true; 665is_base_char(X) when X >= 16#0985, X =< 16#098C -> true; 666is_base_char(X) when X >= 16#098F, X =< 16#0990 -> true; 667is_base_char(X) when X >= 16#0993, X =< 16#09A8 -> true; 668is_base_char(X) when X >= 16#09AA, X =< 16#09B0 -> true; 669is_base_char(16#09B2) -> true; 670is_base_char(X) when X >= 16#09B6, X =< 16#09B9 -> true; 671is_base_char(X) when X >= 16#09DC, X =< 16#09DD -> true; 672is_base_char(X) when X >= 16#09DF, X =< 16#09E1 -> true; 673is_base_char(X) when X >= 16#09F0, X =< 16#09F1 -> true; 674is_base_char(X) when X >= 16#0A05, X =< 16#0A0A -> true; 675is_base_char(X) when X >= 16#0A0F, X =< 16#0A10 -> true; 676is_base_char(X) when X >= 16#0A13, X =< 16#0A28 -> true; 677is_base_char(X) when X >= 16#0A2A, X =< 16#0A30 -> true; 678is_base_char(X) when X >= 16#0A32, X =< 16#0A33 -> true; 679is_base_char(X) when X >= 16#0A35, X =< 16#0A36 -> true; 680is_base_char(X) when X >= 16#0A38, X =< 16#0A39 -> true; 681is_base_char(X) when X >= 16#0A59, X =< 16#0A5C -> true; 682is_base_char(16#0A5E) -> true; 683is_base_char(X) when X >= 16#0A72, X =< 16#0A74 -> true; 684is_base_char(X) when X >= 16#0A85, X =< 16#0A8B -> true; 685is_base_char(16#0A8D) -> true; 686is_base_char(X) when X >= 16#0A8F, X =< 16#0A91 -> true; 687is_base_char(X) when X >= 16#0A93, X =< 16#0AA8 -> true; 688is_base_char(X) when X >= 16#0AAA, X =< 16#0AB0 -> true; 689is_base_char(X) when X >= 16#0AB2, X =< 16#0AB3 -> true; 690is_base_char(X) when X >= 16#0AB5, X =< 16#0AB9 -> true; 691is_base_char(16#0ABD) -> true; 692is_base_char(16#0AE0) -> true; 693is_base_char(X) when X >= 16#0B05, X =< 16#0B0C -> true; 694is_base_char(X) when X >= 16#0B0F, X =< 16#0B10 -> true; 695is_base_char(X) when X >= 16#0B13, X =< 16#0B28 -> true; 696is_base_char(X) when X >= 16#0B2A, X =< 16#0B30 -> true; 697is_base_char(X) when X >= 16#0B32, X =< 16#0B33 -> true; 698is_base_char(X) when X >= 16#0B36, X =< 16#0B39 -> true; 699is_base_char(16#0B3D) -> true; 700is_base_char(X) when X >= 16#0B5C, X =< 16#0B5D -> true; 701is_base_char(X) when X >= 16#0B5F, X =< 16#0B61 -> true; 702is_base_char(X) when X >= 16#0B85, X =< 16#0B8A -> true; 703is_base_char(X) when X >= 16#0B8E, X =< 16#0B90 -> true; 704is_base_char(X) when X >= 16#0B92, X =< 16#0B95 -> true; 705is_base_char(X) when X >= 16#0B99, X =< 16#0B9A -> true; 706is_base_char(16#0B9C) -> true; 707is_base_char(X) when X >= 16#0B9E, X =< 16#0B9F -> true; 708is_base_char(X) when X >= 16#0BA3, X =< 16#0BA4 -> true; 709is_base_char(X) when X >= 16#0BA8, X =< 16#0BAA -> true; 710is_base_char(X) when X >= 16#0BAE, X =< 16#0BB5 -> true; 711is_base_char(X) when X >= 16#0BB7, X =< 16#0BB9 -> true; 712is_base_char(X) when X >= 16#0C05, X =< 16#0C0C -> true; 713is_base_char(X) when X >= 16#0C0E, X =< 16#0C10 -> true; 714is_base_char(X) when X >= 16#0C12, X =< 16#0C28 -> true; 715is_base_char(X) when X >= 16#0C2A, X =< 16#0C33 -> true; 716is_base_char(X) when X >= 16#0C35, X =< 16#0C39 -> true; 717is_base_char(X) when X >= 16#0C60, X =< 16#0C61 -> true; 718is_base_char(X) when X >= 16#0C85, X =< 16#0C8C -> true; 719is_base_char(X) when X >= 16#0C8E, X =< 16#0C90 -> true; 720is_base_char(X) when X >= 16#0C92, X =< 16#0CA8 -> true; 721is_base_char(X) when X >= 16#0CAA, X =< 16#0CB3 -> true; 722is_base_char(X) when X >= 16#0CB5, X =< 16#0CB9 -> true; 723is_base_char(16#0CDE) -> true; 724is_base_char(X) when X >= 16#0CE0, X =< 16#0CE1 -> true; 725is_base_char(X) when X >= 16#0D05, X =< 16#0D0C -> true; 726is_base_char(X) when X >= 16#0D0E, X =< 16#0D10 -> true; 727is_base_char(X) when X >= 16#0D12, X =< 16#0D28 -> true; 728is_base_char(X) when X >= 16#0D2A, X =< 16#0D39 -> true; 729is_base_char(X) when X >= 16#0D60, X =< 16#0D61 -> true; 730is_base_char(X) when X >= 16#0E01, X =< 16#0E2E -> true; 731is_base_char(16#0E30) -> true; 732is_base_char(X) when X >= 16#0E32, X =< 16#0E33 -> true; 733is_base_char(X) when X >= 16#0E40, X =< 16#0E45 -> true; 734is_base_char(X) when X >= 16#0E81, X =< 16#0E82 -> true; 735is_base_char(16#0E84) -> true; 736is_base_char(X) when X >= 16#0E87, X =< 16#0E88 -> true; 737is_base_char(16#0E8A) -> true; 738is_base_char(16#0E8D) -> true; 739is_base_char(X) when X >= 16#0E94, X =< 16#0E97 -> true; 740is_base_char(X) when X >= 16#0E99, X =< 16#0E9F -> true; 741is_base_char(X) when X >= 16#0EA1, X =< 16#0EA3 -> true; 742is_base_char(16#0EA5) -> true; 743is_base_char(16#0EA7) -> true; 744is_base_char(X) when X >= 16#0EAA, X =< 16#0EAB -> true; 745is_base_char(X) when X >= 16#0EAD, X =< 16#0EAE -> true; 746is_base_char(16#0EB0) -> true; 747is_base_char(X) when X >= 16#0EB2, X =< 16#0EB3 -> true; 748is_base_char(16#0EBD) -> true; 749is_base_char(X) when X >= 16#0EC0, X =< 16#0EC4 -> true; 750is_base_char(X) when X >= 16#0F40, X =< 16#0F47 -> true; 751is_base_char(X) when X >= 16#0F49, X =< 16#0F69 -> true; 752is_base_char(X) when X >= 16#10A0, X =< 16#10C5 -> true; 753is_base_char(X) when X >= 16#10D0, X =< 16#10F6 -> true; 754is_base_char(16#1100) -> true; 755is_base_char(X) when X >= 16#1102, X =< 16#1103 -> true; 756is_base_char(X) when X >= 16#1105, X =< 16#1107 -> true; 757is_base_char(16#1109) -> true; 758is_base_char(X) when X >= 16#110B, X =< 16#110C -> true; 759is_base_char(X) when X >= 16#110E, X =< 16#1112 -> true; 760is_base_char(16#113C) -> true; 761is_base_char(16#113E) -> true; 762is_base_char(16#1140) -> true; 763is_base_char(16#114C) -> true; 764is_base_char(16#114E) -> true; 765is_base_char(16#1150) -> true; 766is_base_char(X) when X >= 16#1154, X =< 16#1155 -> true; 767is_base_char(16#1159) -> true; 768is_base_char(X) when X >= 16#115F, X =< 16#1161 -> true; 769is_base_char(16#1163) -> true; 770is_base_char(16#1165) -> true; 771is_base_char(16#1167) -> true; 772is_base_char(16#1169) -> true; 773is_base_char(X) when X >= 16#116D, X =< 16#116E -> true; 774is_base_char(X) when X >= 16#1172, X =< 16#1173 -> true; 775is_base_char(16#1175) -> true; 776is_base_char(16#119E) -> true; 777is_base_char(16#11A8) -> true; 778is_base_char(16#11AB) -> true; 779is_base_char(X) when X >= 16#11AE, X =< 16#11AF -> true; 780is_base_char(X) when X >= 16#11B7, X =< 16#11B8 -> true; 781is_base_char(16#11BA) -> true; 782is_base_char(X) when X >= 16#11BC, X =< 16#11C2 -> true; 783is_base_char(16#11EB) -> true; 784is_base_char(16#11F0) -> true; 785is_base_char(16#11F9) -> true; 786is_base_char(X) when X >= 16#1E00, X =< 16#1E9B -> true; 787is_base_char(X) when X >= 16#1EA0, X =< 16#1EF9 -> true; 788is_base_char(X) when X >= 16#1F00, X =< 16#1F15 -> true; 789is_base_char(X) when X >= 16#1F18, X =< 16#1F1D -> true; 790is_base_char(X) when X >= 16#1F20, X =< 16#1F45 -> true; 791is_base_char(X) when X >= 16#1F48, X =< 16#1F4D -> true; 792is_base_char(X) when X >= 16#1F50, X =< 16#1F57 -> true; 793is_base_char(16#1F59) -> true; 794is_base_char(16#1F5B) -> true; 795is_base_char(16#1F5D) -> true; 796is_base_char(X) when X >= 16#1F5F, X =< 16#1F7D -> true; 797is_base_char(X) when X >= 16#1F80, X =< 16#1FB4 -> true; 798is_base_char(X) when X >= 16#1FB6, X =< 16#1FBC -> true; 799is_base_char(16#1FBE) -> true; 800is_base_char(X) when X >= 16#1FC2, X =< 16#1FC4 -> true; 801is_base_char(X) when X >= 16#1FC6, X =< 16#1FCC -> true; 802is_base_char(X) when X >= 16#1FD0, X =< 16#1FD3 -> true; 803is_base_char(X) when X >= 16#1FD6, X =< 16#1FDB -> true; 804is_base_char(X) when X >= 16#1FE0, X =< 16#1FEC -> true; 805is_base_char(X) when X >= 16#1FF2, X =< 16#1FF4 -> true; 806is_base_char(X) when X >= 16#1FF6, X =< 16#1FFC -> true; 807is_base_char(16#2126) -> true; 808is_base_char(X) when X >= 16#212A, X =< 16#212B -> true; 809is_base_char(16#212E) -> true; 810is_base_char(X) when X >= 16#2180, X =< 16#2182 -> true; 811is_base_char(X) when X >= 16#3041, X =< 16#3094 -> true; 812is_base_char(X) when X >= 16#30A1, X =< 16#30FA -> true; 813is_base_char(X) when X >= 16#3105, X =< 16#312C -> true; 814is_base_char(X) when X >= 16#ac00, X =< 16#d7a3 -> true; 815is_base_char(_) -> 816 false. 817 818%% [86] Ideographic 819is_ideographic(X) when X >= 16#4e00, X =< 16#9fa5 -> true; 820is_ideographic(16#3007) -> true; 821is_ideographic(X) when X >= 16#3021, X =< 16#3029 -> true; 822is_ideographic(_) -> 823 false. 824 825%% [87] CombiningChar 826is_combining_char(X) when X >= 16#0300, X =< 16#0345 -> true; 827is_combining_char(X) when X >= 16#0360, X =< 16#0361 -> true; 828is_combining_char(X) when X >= 16#0483, X =< 16#0486 -> true; 829is_combining_char(X) when X >= 16#0591, X =< 16#05a1 -> true; 830is_combining_char(X) when X >= 16#05a3, X =< 16#05b9 -> true; 831is_combining_char(X) when X >= 16#05bb, X =< 16#05bd -> true; 832is_combining_char(16#05bf) -> true; 833is_combining_char(X) when X >= 16#05c1, X =< 16#05c2 -> true; 834is_combining_char(16#05c4) -> true; 835is_combining_char(X) when X >= 16#064b, X =< 16#0652 -> true; 836is_combining_char(16#0670) -> true; 837is_combining_char(X) when X >= 16#06d6, X =< 16#06dc -> true; 838is_combining_char(X) when X >= 16#06dd, X =< 16#06df -> true; 839is_combining_char(X) when X >= 16#06e0, X =< 16#06e4 -> true; 840is_combining_char(X) when X >= 16#06e7, X =< 16#06e8 -> true; 841is_combining_char(X) when X >= 16#06ea, X =< 16#06ed -> true; 842is_combining_char(X) when X >= 16#0901, X =< 16#0903 -> true; 843is_combining_char(16#093c) -> true; 844is_combining_char(X) when X >= 16#093e, X =< 16#094c -> true; 845is_combining_char(16#094d) -> true; 846is_combining_char(X) when X >= 16#0951, X =< 16#0954 -> true; 847is_combining_char(X) when X >= 16#0962, X =< 16#0963 -> true; 848is_combining_char(X) when X >= 16#0981, X =< 16#0983 -> true; 849is_combining_char(16#09bc) -> true; 850is_combining_char(16#09be) -> true; 851is_combining_char(16#09bf) -> true; 852is_combining_char(X) when X >= 16#09c0, X =< 16#09c4 -> true; 853is_combining_char(X) when X >= 16#09c7, X =< 16#09c8 -> true; 854is_combining_char(X) when X >= 16#09cb, X =< 16#09cd -> true; 855is_combining_char(16#09d7) -> true; 856is_combining_char(X) when X >= 16#09e2, X =< 16#09e3 -> true; 857is_combining_char(16#0a02) -> true; 858is_combining_char(16#0a3c) -> true; 859is_combining_char(16#0a3e) -> true; 860is_combining_char(16#0a3f) -> true; 861is_combining_char(X) when X >= 16#0a40, X =< 16#0a42 -> true; 862is_combining_char(X) when X >= 16#0a47, X =< 16#0a48 -> true; 863is_combining_char(X) when X >= 16#0a4b, X =< 16#0a4d -> true; 864is_combining_char(X) when X >= 16#0a70, X =< 16#0a71 -> true; 865is_combining_char(X) when X >= 16#0a81, X =< 16#0a83 -> true; 866is_combining_char(16#0abc) -> true; 867is_combining_char(X) when X >= 16#0abe, X =< 16#0ac5 -> true; 868is_combining_char(X) when X >= 16#0ac7, X =< 16#0ac9 -> true; 869is_combining_char(X) when X >= 16#0acb, X =< 16#0acd -> true; 870is_combining_char(X) when X >= 16#0b01, X =< 16#0b03 -> true; 871is_combining_char(16#0b3c) -> true; 872is_combining_char(X) when X >= 16#0b3e, X =< 16#0b43 -> true; 873is_combining_char(X) when X >= 16#0b47, X =< 16#0b48 -> true; 874is_combining_char(X) when X >= 16#0b4b, X =< 16#0b4d -> true; 875is_combining_char(X) when X >= 16#0b56, X =< 16#0b57 -> true; 876is_combining_char(X) when X >= 16#0b82, X =< 16#0b83 -> true; 877is_combining_char(X) when X >= 16#0bbe, X =< 16#0bc2 -> true; 878is_combining_char(X) when X >= 16#0bc6, X =< 16#0bc8 -> true; 879is_combining_char(X) when X >= 16#0bca, X =< 16#0bcd -> true; 880is_combining_char(16#0bd7) -> true; 881is_combining_char(X) when X >= 16#0c01, X =< 16#0c03 -> true; 882is_combining_char(X) when X >= 16#0c3e, X =< 16#0c44 -> true; 883is_combining_char(X) when X >= 16#0c46, X =< 16#0c48 -> true; 884is_combining_char(X) when X >= 16#0c4a, X =< 16#0c4d -> true; 885is_combining_char(X) when X >= 16#0c55, X =< 16#0c56 -> true; 886is_combining_char(X) when X >= 16#0c82, X =< 16#0c83 -> true; 887is_combining_char(X) when X >= 16#0cbe, X =< 16#0cc4 -> true; 888is_combining_char(X) when X >= 16#0cc6, X =< 16#0cc8 -> true; 889is_combining_char(X) when X >= 16#0cca, X =< 16#0ccd -> true; 890is_combining_char(X) when X >= 16#0cd5, X =< 16#0cd6 -> true; 891is_combining_char(X) when X >= 16#0d02, X =< 16#0d03 -> true; 892is_combining_char(X) when X >= 16#0d3e, X =< 16#0d43 -> true; 893is_combining_char(X) when X >= 16#0d46, X =< 16#0d48 -> true; 894is_combining_char(X) when X >= 16#0d4a, X =< 16#0d4d -> true; 895is_combining_char(16#0d57) -> true; 896is_combining_char(16#0e31) -> true; 897is_combining_char(X) when X >= 16#0e34, X =< 16#0e3a -> true; 898is_combining_char(X) when X >= 16#0e47, X =< 16#0e4e -> true; 899is_combining_char(16#0eb1) -> true; 900is_combining_char(X) when X >= 16#0eb4, X =< 16#0eb9 -> true; 901is_combining_char(X) when X >= 16#0ebb, X =< 16#0ebc -> true; 902is_combining_char(X) when X >= 16#0ec8, X =< 16#0ecd -> true; 903is_combining_char(X) when X >= 16#0f18, X =< 16#0f19 -> true; 904is_combining_char(16#0f35) -> true; 905is_combining_char(16#0f37) -> true; 906is_combining_char(16#0f39) -> true; 907is_combining_char(16#0f3e) -> true; 908is_combining_char(16#0f3f) -> true; 909is_combining_char(X) when X >= 16#0f71, X =< 16#0f84 -> true; 910is_combining_char(X) when X >= 16#0f86, X =< 16#0f8b -> true; 911is_combining_char(X) when X >= 16#0f90, X =< 16#0f95 -> true; 912is_combining_char(16#0f97) -> true; 913is_combining_char(X) when X >= 16#0f99, X =< 16#0fad -> true; 914is_combining_char(X) when X >= 16#0fb1, X =< 16#0fb7 -> true; 915is_combining_char(16#0fb9) -> true; 916is_combining_char(X) when X >= 16#20d0, X =< 16#20dc -> true; 917is_combining_char(16#20e1) -> true; 918is_combining_char(X) when X >= 16#302a, X =< 16#302f -> true; 919is_combining_char(16#3099) -> true; 920is_combining_char(16#309a) -> true; 921is_combining_char(_) -> false. 922 923%% [88] Digit 924is_digit(X) when X >= 16#0030, X =< 16#0039 -> true; 925is_digit(X) when X >= 16#0660, X =< 16#0669 -> true; 926is_digit(X) when X >= 16#06F0, X =< 16#06F9 -> true; 927is_digit(X) when X >= 16#0966, X =< 16#096f -> true; 928is_digit(X) when X >= 16#09e6, X =< 16#09ef -> true; 929is_digit(X) when X >= 16#0a66, X =< 16#0a6f -> true; 930is_digit(X) when X >= 16#0ae6, X =< 16#0aef -> true; 931is_digit(X) when X >= 16#0b66, X =< 16#0b6f -> true; 932is_digit(X) when X >= 16#0be7, X =< 16#0bef -> true; 933is_digit(X) when X >= 16#0c66, X =< 16#0c6f -> true; 934is_digit(X) when X >= 16#0ce6, X =< 16#0cef -> true; 935is_digit(X) when X >= 16#0d66, X =< 16#0d6f -> true; 936is_digit(X) when X >= 16#0e50, X =< 16#0e59 -> true; 937is_digit(X) when X >= 16#0ed0, X =< 16#0ed9 -> true; 938is_digit(X) when X >= 16#0f20, X =< 16#0f29 -> true; 939is_digit(_) -> false. 940 941%% [89] Extender 942is_extender(16#00b7) -> true; 943is_extender(16#02d0) -> true; 944is_extender(16#02d1) -> true; 945is_extender(16#0387) -> true; 946is_extender(16#0640) -> true; 947is_extender(16#0e46) -> true; 948is_extender(16#0ec6) -> true; 949is_extender(16#3005) -> true; 950is_extender(X) when X >= 16#3031, X =< 16#3035 -> true; 951is_extender(X) when X >= 16#309d, X =< 16#309e -> true; 952is_extender(X) when X >= 16#30fc, X =< 16#30fe -> true; 953is_extender(_) -> false. 954 955to_lower(Str) -> 956 to_lower(Str, []). 957to_lower([C|Cs], Acc) when C >= $A, C =< $Z -> 958 to_lower(Cs, [C+($a-$A)| Acc]); 959to_lower([C|Cs], Acc) -> 960 to_lower(Cs, [C| Acc]); 961to_lower([], Acc) -> 962 lists:reverse(Acc). 963 964%%% XSD helpers 965 966is_facet(length) -> true; 967is_facet(minLength) -> true; 968is_facet(maxLength) -> true; 969is_facet(pattern) -> true; 970is_facet(enumeration) -> true; 971is_facet(whiteSpace) -> true; 972is_facet(maxInclusive) -> true; 973is_facet(maxExclusive) -> true; 974is_facet(minInclusive) -> true; 975is_facet(minExclusive) -> true; 976is_facet(totalDigits) -> true; 977is_facet(fractionDigits) -> true; 978is_facet(_) -> false. 979 980 981is_builtin_simple_type({Type,_,?XSD_NAMESPACE}) when is_atom(Type) -> 982 is_builtin_simple_type(atom_to_list(Type)); 983is_builtin_simple_type({Type,_,?XSD_NAMESPACE}) -> 984 is_builtin_simple_type(Type); 985is_builtin_simple_type({_,_,_}) -> 986 false; 987is_builtin_simple_type("string") -> true; 988is_builtin_simple_type("normalizedString") -> true; 989is_builtin_simple_type("token") -> true; 990is_builtin_simple_type("base64Binary") -> true; 991is_builtin_simple_type("hexBinary") -> true; 992is_builtin_simple_type("integer") -> true; 993is_builtin_simple_type("positiveInteger") -> true; 994is_builtin_simple_type("negativeInteger") -> true; 995is_builtin_simple_type("nonNegativeInteger") -> true; 996is_builtin_simple_type("nonPositiveInteger") -> true; 997is_builtin_simple_type("long") -> true; 998is_builtin_simple_type("unsignedLong") -> true; 999is_builtin_simple_type("int") -> true; 1000is_builtin_simple_type("unsignedInt") -> true; 1001is_builtin_simple_type("short") -> true; 1002is_builtin_simple_type("unsignedShort") -> true; 1003is_builtin_simple_type("decimal") -> true; 1004is_builtin_simple_type("float") -> true; 1005is_builtin_simple_type("double") -> true; 1006is_builtin_simple_type("boolean") -> true; 1007is_builtin_simple_type("duration") -> true; 1008is_builtin_simple_type("dateTime") -> true; 1009is_builtin_simple_type("date") -> true; 1010is_builtin_simple_type("time") -> true; 1011is_builtin_simple_type("gYear") -> true; 1012is_builtin_simple_type("gYearMonth") -> true; 1013is_builtin_simple_type("gMonth") -> true; 1014is_builtin_simple_type("gMonthDay") -> true; 1015is_builtin_simple_type("gDay") -> true; 1016is_builtin_simple_type("Name") -> true; 1017is_builtin_simple_type("QName") -> true; 1018is_builtin_simple_type("NCName") -> true; 1019is_builtin_simple_type("anyURI") -> true; 1020is_builtin_simple_type("language") -> true; 1021is_builtin_simple_type("ID") -> true; 1022is_builtin_simple_type("IDREF") -> true; 1023is_builtin_simple_type("IDREFS") -> true; 1024is_builtin_simple_type("ENTITY") -> true; 1025is_builtin_simple_type("ENTITIES") ->true; 1026is_builtin_simple_type("NOTATION") -> true; 1027is_builtin_simple_type("NMTOKEN") -> true; 1028is_builtin_simple_type("NMTOKENS") -> true; 1029is_builtin_simple_type("byte") -> true; 1030is_builtin_simple_type("unsignedByte") -> true; 1031is_builtin_simple_type(_) -> false. 1032 1033is_xsd_string({Type,_,?XSD_NAMESPACE}) when is_atom(Type) -> 1034 is_xsd_string(Type); 1035is_xsd_string({Type,_,?XSD_NAMESPACE}) -> 1036 is_xsd_string(Type); 1037is_xsd_string({_,_,_}) -> 1038 false; 1039is_xsd_string(Atom) when is_atom(Atom) -> 1040 is_xsd_string(atom_to_list(Atom)); 1041is_xsd_string("string") -> 1042 true; 1043is_xsd_string("normalizedString") -> 1044 true; 1045is_xsd_string("token") -> 1046 true; 1047is_xsd_string("language") -> 1048 true; 1049is_xsd_string("Name") -> 1050 true; 1051is_xsd_string("NMTOKEN") -> 1052 true; 1053is_xsd_string("NMTOKENS") -> 1054 true; 1055is_xsd_string("NCName") -> 1056 true; 1057is_xsd_string("ID") -> 1058 true; 1059is_xsd_string("IDREF") -> 1060 true; 1061is_xsd_string("IDREFS") -> 1062 true; 1063is_xsd_string("ENTITY") -> 1064 true; 1065is_xsd_string("ENTITIES") -> 1066 true; 1067is_xsd_string(_) -> 1068 false. 1069