1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2003-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21%%% Description  : Utility module for handling XML trees.
22%%%----------------------------------------------------------------------
23
24-module(xmerl_lib).
25
26-export([normalize_content/1, normalize_content/3, expand_content/1,
27	 expand_content/3, normalize_element/1, normalize_element/3,
28	 expand_element/1, expand_element/3, expand_attributes/1,
29	 expand_attributes/3, export_text/1, flatten_text/1,
30	 export_attribute/1, markup/2, markup/3, simplify_element/1,
31	 simplify_content/1, start_tag/1, start_tag/2, end_tag/1,
32	 empty_tag/1, empty_tag/2,is_empty_data/1, find_attribute/2,
33	 remove_whitespace/1,to_lower/1]).
34
35-export([is_letter/1,is_namechar/1,is_ncname/1,
36	 detect_charset/1,detect_charset/2,is_name/1,is_char/1]).
37
38
39-export([mapxml/2, foldxml/3, mapfoldxml/3]).
40
41%% exports for XSD
42-export([is_facet/1,is_builtin_simple_type/1,is_xsd_string/1]).
43
44-include("xmerl.hrl").
45-include("xmerl_xsd.hrl").
46
47
48%% Escape special characters `<' and `&', flattening the text.
49%% Also escapes `>', just for symmetry.
50
51export_text(T) ->
52    export_text(T, []).
53
54export_text([$< | T], Cont) ->
55    "&lt;" ++ export_text(T, Cont);
56export_text([$> | T], Cont) ->
57    "&gt;" ++ export_text(T, Cont);
58export_text([$& | T], Cont) ->
59    "&amp;" ++ export_text(T, Cont);
60export_text([C | T], Cont) when is_integer(C) ->
61    [C | export_text(T, Cont)];
62export_text([T | T1], Cont) ->
63    export_text(T, [T1 | Cont]);
64export_text([], [T | Cont]) ->
65    export_text(T, Cont);
66export_text([], []) ->
67    [];
68export_text(Bin, Cont) ->
69    export_text(binary_to_list(Bin), Cont).
70
71
72%% Only flatten text.
73
74flatten_text(T) ->
75    flatten_text(T, []).
76
77flatten_text([C | T], Cont) when is_integer(C) ->
78    [C | flatten_text(T, Cont)];
79flatten_text([T | T1], Cont) ->
80    flatten_text(T, [T1 | Cont]);
81flatten_text([], [T | Cont]) ->
82    flatten_text(T, Cont);
83flatten_text([], []) ->
84    [];
85flatten_text(Bin, Cont) ->
86    flatten_text(binary_to_list(Bin), Cont).
87
88%% Convert attribute value to a flat string, escaping characters `"',
89%% `<' and `&'. (Note that single-quote characters are not escaped; the
90%% markup-generating functions (`start_tag', `end_tag', ...) always use
91%% `"' to delimit the attribute values.)
92
93export_attribute(I) when is_integer(I) ->
94    integer_to_list(I);
95export_attribute(A) when is_atom(A) ->
96    export_attribute(atom_to_list(A), []);
97export_attribute(S) ->
98    export_attribute(S, []).
99
100export_attribute([$< | T], Cont) ->
101    "&lt;" ++ export_attribute(T, Cont);
102export_attribute([$& | T], Cont) ->
103    "&amp;" ++ export_attribute(T, Cont);
104export_attribute([$" | T], Cont) ->
105    "&quot;" ++ export_attribute(T, Cont);
106export_attribute([C | T], Cont) when is_integer(C) ->
107    [C | export_attribute(T, Cont)];
108export_attribute([T | T1], Cont) ->
109    export_attribute(T, [T1 | Cont]);
110export_attribute([], [T | Cont]) ->
111    export_attribute(T, Cont);
112export_attribute([], []) ->
113    [];
114export_attribute(Bin, Cont) ->
115    export_attribute(binary_to_list(Bin), Cont).
116
117
118%% SimpleContent: [SimpleElement]
119%% SimpleElement: #xml...{} | String | {atom(), [Attr], SimpleContent}
120%%                | {atom(), SimpleContent} | atom()
121%% Attr: {atom(), Value} | #xmlAttribute{}
122%% Value: atom() | integer() | String
123%% String: [char() | binary() | String]
124%%
125%% Because strings can be deep, we do not allow content lists to also be
126%% deep; otherwise, traversal of the simple representation becomes too
127%% complicated and expensive. Simple content lists are thus flat lists
128%% of simple elements.
129
130%% TODO: namespace-qualified tags in simple-form? /RC
131
132%% 'normalize' is like 'expand', but also turns all text elements into
133%% flat strings.
134
135normalize_element(Element) ->
136    normalize_element(Element, 1, []).
137
138normalize_element(Element, Pos, Parents) ->
139    expand_element(Element, Pos, Parents, true).
140
141%% 'expand' expands simple-form elements to normal XML elements.
142%% All attribute values (also in #xmlAttribute records) become flat
143%% strings, so that string comparisons can be made. Text elements are
144%% not flattened.
145
146expand_element(Element) ->
147    expand_element(Element, 1, []).
148
149expand_element(Element, Pos, Parents) ->
150    expand_element(Element, Pos, Parents, false).
151
152expand_element(E = #xmlElement{name = N}, Pos, Parents, Norm) ->
153    NewParents = [{N,Pos}|Parents],
154    Content = expand_content(E#xmlElement.content, 1, NewParents, Norm),
155    Attrs = expand_attributes(E#xmlElement.attributes, 1, NewParents),
156    E#xmlElement{pos = Pos,
157		 parents = Parents,
158		 attributes = Attrs,
159		 content = Content};
160expand_element(E = #xmlText{}, Pos, Parents, Norm) ->
161    E#xmlText{pos = Pos,
162	      parents = Parents,
163	      value = expand_text(E#xmlText.value, Norm)};
164expand_element(E = #xmlPI{}, Pos, Parents, Norm) ->
165    E#xmlPI{pos = Pos,
166	    parents = Parents,
167	    value = expand_text(E#xmlPI.value, Norm)};
168expand_element(E = #xmlComment{}, Pos, Parents, Norm) ->
169    E#xmlComment{pos = Pos,
170		 parents = Parents,
171		 value = expand_text(E#xmlComment.value, Norm)};
172expand_element(E = #xmlDecl{}, _Pos, _Parents, _Norm) ->
173    Attrs = expand_attributes(E#xmlDecl.attributes, 1, []),
174    E#xmlDecl{attributes = Attrs};
175expand_element({Tag, Attrs, Content}, Pos, Parents, Norm) when is_atom(Tag) ->
176    NewParents = [{Tag, Pos} | Parents],
177    #xmlElement{name = Tag,
178		pos = Pos,
179		parents = Parents,
180		attributes = expand_attributes(Attrs, 1, NewParents),
181		content = expand_content(Content, 1, NewParents, Norm)};
182expand_element({Tag, Content}, Pos, Parents, Norm) when is_atom(Tag) ->
183    NewParents = [{Tag, Pos} | Parents],
184    #xmlElement{name = Tag,
185		pos = Pos,
186		parents = Parents,
187		attributes = [],
188		content = expand_content(Content, 1, NewParents, Norm)};
189expand_element(Tag, Pos, Parents, _Norm) when is_atom(Tag) ->
190    #xmlElement{name = Tag,
191		pos = Pos,
192		parents = Parents,
193		attributes = [],
194		content = []};
195expand_element(String, Pos, Parents, Norm) when is_list(String) ->
196    #xmlText{pos = Pos,
197	     parents = Parents,
198	     value = expand_text(String, Norm)}.
199
200expand_text(S, false) -> S;
201expand_text(S, true) -> flatten_text(S).
202
203%% Content must be a flat list of elements.
204
205normalize_content(Content) ->
206    normalize_content(Content, 1, []).
207
208normalize_content(Content, Pos, Parents) ->
209    expand_content(Content, Pos, Parents, true).
210
211expand_content(Content) ->
212    expand_content(Content, 1, []).
213
214expand_content(Content, Pos, Parents) ->
215    expand_content(Content, Pos, Parents, false).
216
217expand_content([{H} | T], Pos, Parents, Norm) ->
218    expand_content(H ++ T, Pos, Parents, Norm);
219expand_content([{F,S}|T], Pos, Parents, Norm) when is_function(F) ->
220    case F(S) of
221	done -> expand_content(T, Pos, Parents, Norm);
222	{C,S2} -> expand_content([{F,S2},C|T], Pos, Parents, Norm)
223    end;
224expand_content([H | T], Pos, Parents, Norm) ->
225    [expand_element(H, Pos, Parents, Norm)
226     | expand_content(T, Pos+1, Parents, Norm)];
227expand_content([], _Pos, _Parents, _Norm) ->
228    [].
229
230expand_attributes(Attrs) ->
231    expand_attributes(Attrs, 1, []).
232
233%% Expanding always turns all attribute values into flat strings.
234
235expand_attributes([H = #xmlAttribute{} | T], Pos, Parents) ->
236    [H#xmlAttribute{pos = Pos,
237		    value = expand_value(H#xmlAttribute.value)}
238     | expand_attributes(T, Pos+1, Parents)];
239expand_attributes([{P,S}|T], Pos, Parents) when is_function(P) ->
240    case P(S) of
241	done ->
242	    expand_attributes(T, Pos, Parents);
243	{A,S2} ->
244	    expand_attributes([{P,S2},A|T], Pos, Parents)
245    end;
246expand_attributes([{K, V} | T], Pos, Parents) ->
247    [#xmlAttribute{name = K,
248		   pos = Pos,
249		   parents = Parents,
250		   value = expand_value(V)}
251     | expand_attributes(T, Pos+1, Parents)];
252expand_attributes([], _Pos, _Parents) ->
253    [].
254
255expand_value(S) when is_atom(S) ->
256    atom_to_list(S);
257expand_value(S) when is_integer(S) ->
258    integer_to_list(S);
259expand_value(S) ->
260    flatten_text(S).
261
262%% We want simplification to yield a normal form, so we always generate
263%% three-tuples for elements. PI, Comment and Decl elements are
264%% discarded from content lists. Attribute values become flat
265%% strings. Text elements are not flattened.
266
267simplify_element(#xmlElement{expanded_name = [], name = Tag,
268			     attributes = Attrs, content = Content}) ->
269    {Tag, simplify_attributes(Attrs), simplify_content(Content)};
270simplify_element(#xmlElement{expanded_name = Name,
271			     attributes = Attrs, content = Content}) ->
272    {Name, simplify_attributes(Attrs), simplify_content(Content)};
273simplify_element(#xmlText{value = Text}) ->
274    Text;
275simplify_element({Tag, Attrs, Content}) when is_atom(Tag) ->
276    {Tag, simplify_attributes(Attrs), simplify_content(Content)};
277simplify_element({Tag, Content}) when is_atom(Tag) ->
278    {Tag, [], simplify_content(Content)};
279simplify_element(Tag) when is_atom(Tag) ->
280    {Tag, [], []};
281simplify_element(Text) when is_list(Text) ->
282    Text.
283
284simplify_content([#xmlPI{} | T]) ->
285    simplify_content(T);
286simplify_content([#xmlComment{} | T]) ->
287    simplify_content(T);
288simplify_content([#xmlDecl{} | T]) ->
289    simplify_content(T);
290simplify_content([H | T]) ->
291    [simplify_element(H) | simplify_content(T)];
292simplify_content([]) ->
293    [].
294
295simplify_attributes([#xmlAttribute{name = K, value = V} | T])
296  when is_atom(K) ->
297    [{K, expand_value(V)} | simplify_attributes(T)];
298simplify_attributes([H = {K, _} | T]) when is_atom(K) ->
299    [H | simplify_attributes(T)];
300simplify_attributes([]) ->
301    [].
302
303%% Looking up an attribute value
304
305find_attribute(Name, Attrs) ->
306    case lists:keysearch(Name, #xmlAttribute.name, Attrs) of
307	{value, #xmlAttribute{value = V}} ->
308	    {value, V};
309	false ->
310	    false
311    end.
312
313
314markup(Tag, Data) ->
315    markup(Tag, [], Data).
316
317markup(Tag, Attrs, []) ->
318    empty_tag(Tag, Attrs);
319markup(Tag, Attrs, Data) ->
320    [start_tag(Tag, Attrs), Data, end_tag(Tag)].
321
322start_tag(TagStr) ->
323    start_tag(TagStr, []).
324
325start_tag(Tag, Attrs) when is_atom(Tag) ->
326    start_tag(atom_to_list(Tag), Attrs);
327start_tag(TagStr, []) ->
328    ["<", TagStr, ">"];
329start_tag(TagStr, Attrs) ->
330    ["<", TagStr, attributes(Attrs), ">"].
331
332empty_tag(Tag) ->
333    empty_tag(Tag, []).
334
335empty_tag(Tag, Attrs) when is_atom(Tag) ->
336    empty_tag(atom_to_list(Tag), Attrs);
337empty_tag(TagStr, []) ->
338    ["<", TagStr, "/>"];
339empty_tag(TagStr, Attrs) ->
340    ["<", TagStr, attributes(Attrs), "/>"].
341
342end_tag(Tag) when is_atom(Tag) ->
343    end_tag(atom_to_list(Tag));
344end_tag(TagStr) ->
345    ["</", TagStr, ">"].
346
347attributes(Attrs) ->
348    [attr_string(A) || A <- Attrs].
349
350attr_string(#xmlAttribute{name = K, value = V}) ->
351    [" ", atom_to_list(K), "=\"", export_attribute(V), "\""].
352
353is_empty_data([]) ->
354    true;
355is_empty_data([X | Xs]) ->
356    case is_empty_data(X) of
357	false ->
358	    false;
359	true ->
360	    is_empty_data(Xs)
361    end;
362is_empty_data(_) ->
363    false.
364
365
366%% Removing normalised whitespace-only text segments.
367
368remove_whitespace([#xmlText{value = " "} | Data]) ->
369    remove_whitespace(Data);
370remove_whitespace([E = #xmlElement{content = Content} | Data]) ->
371    [E#xmlElement{content = remove_whitespace(Content)}
372     | remove_whitespace(Data)];
373remove_whitespace([Other | Data]) ->
374    [Other | remove_whitespace(Data)];
375remove_whitespace([]) ->
376    [].
377
378
379%%% ----------------------------------------------------------------------------
380%%% funs traversing the xmerl tree left-right and top-down
381
382%% mapxml
383%% Fun is fun(Old#xmlElement) -> New#xmlElement
384mapxml(Fun, #xmlElement{}= E) ->
385    C1 = Fun(E),
386    C2 = mapxml(Fun,lists:flatten(C1#xmlElement.content)),
387    C1#xmlElement{content=C2};
388mapxml(Fun, List) when is_list(List) ->
389    AFun = fun(E) -> mapxml(Fun, E) end,
390    lists:map(AFun, List);
391mapxml(Fun, E) ->
392    Fun(E).
393
394
395%% foldxml
396%% Fun is fun(#xmlElement, OldAccu) -> NewAccu
397foldxml(Fun, Accu0, #xmlElement{content=C}=E) ->
398    Accu1 = Fun(E, Accu0),
399    foldxml(Fun, Accu1, C);
400foldxml(Fun, Accu, List) when is_list(List) ->
401    AFun = fun(E,A) -> foldxml(Fun, A, E) end,
402    lists:foldl(AFun, Accu, List);
403foldxml(Fun, Accu, E) ->
404    Fun(E, Accu).
405
406
407%% mapfoldxml
408%% Fun is fun(Old#xmlElement, OldAccu) -> {New#xmlElement, NewAccu}
409mapfoldxml(Fun, Accu0, #xmlElement{}=E) ->
410    {C1,Accu1} = Fun(E, Accu0),
411    {C2,Accu2} = mapfoldxml(Fun, Accu1, lists:flatten(C1#xmlElement.content)),
412    {C1#xmlElement{content=C2},Accu2};
413mapfoldxml(Fun, Accu, List) when is_list(List) ->
414    AFun = fun(E,A) -> mapfoldxml(Fun, A, E) end,
415    lists:mapfoldl(AFun, Accu, List);
416mapfoldxml(Fun, Accu, E) ->
417    Fun(E,Accu).
418
419
420%%% @spec detect_charset(T::list()) -> charset_info()
421%%% @equiv detect_charset(undefined,T)
422detect_charset(Content) ->
423    detect_charset(undefined,Content).
424
425%%% FIXME! Whatabout aliases etc? Shouldn't transforming with ucs be optional?
426%%% @spec detect_charset(ExtCharset::atom(),T::list()) -> charset_info()
427%%% @doc Automatically decides character set used in XML document.
428%%%  charset_info() is
429%%%  <table>
430%%%    <tr><td><code>{auto,'iso-10646-utf-1',Content} |</code></td></tr>
431%%%    <tr><td><code>{external,'iso-10646-utf-1',Content} |</code></td></tr>
432%%%    <tr><td><code>{undefined,undefined,Content} |</code></td></tr>
433%%%    <tr><td><code>{external,ExtCharset,Content}</code></td></tr>
434%%%  </table>
435%%%   ExtCharset is any externally declared character set (e.g. in HTTP
436%%%   Content-Type header) and Content is an XML Document.
437%%%
438detect_charset(ExtCharset,Content) when is_list(ExtCharset) ->
439    %% FIXME! Don't allow both atom and list for character set names
440    detect_charset(list_to_atom(ExtCharset),Content);
441detect_charset(ExtCharset,Content) ->
442    case autodetect(ExtCharset,Content) of
443	{auto,Content1} ->
444	    {auto,'iso-10646-utf-1',Content1};
445	{external,Content1} ->
446	    {external,'iso-10646-utf-1',Content1};
447	{undefined,_} ->
448	    {undefined,undefined,Content};
449	{ExtCharset, Content} ->
450	    {external,ExtCharset,Content}
451    end.
452
453%%------------------------------------------------------------------------------
454%% Auto detect what kind of character set we are dealing with and transform
455%% to Erlang integer Unicode format if found.
456%% Appendix F, Page 56-57, XML 1.0 W3C Recommendation 6 October 2000
457%% (http://www.w3.org/TR/REC-xml)
458%% 00 00 00 3C ( "<" in UCS-4 big-endian)
459%% 3C 00 00 00 ( "<" in UCS-4 little-endian)
460%% FE FF (UTF-16 - big-endian Mark)
461%% FF FE (UTF-16 - little-endian Mark)
462%% 00 3C 00 3F ( "<?" in UTF-16 big-endian)
463%% 3C 00 3F 00 ( "<?" in UTF-16 big-endian)
464%% 3C 3F (7-bit,8-bit or mixed width encoding)
465%% 4C 6F A7 94 (EBCDIC) - Not Implemented!!!!
466
467%% Check byte-order mark and transform to Unicode, Erlang integer
468%%% --- With byte-order mark
469autodetect(undefined,[0,0,16#fe,16#ff | Input]) ->
470    {auto, xmerl_ucs:from_ucs4be(Input)};
471autodetect('iso-10646-utf-1',[0,0,16#fe,16#ff | Input]) ->
472    {external, xmerl_ucs:from_ucs4be(Input)};
473autodetect(undefined,[16#ff,16#fe,0,0 | Input]) ->
474    {auto, xmerl_ucs:from_ucs4le(Input)};
475autodetect('iso-10646-utf-1',[16#ff,16#fe,0,0 | Input]) ->
476    {external, xmerl_ucs:from_ucs4le(Input)};
477
478autodetect(undefined,[16#fe,16#ff | Input]) ->
479    {auto, xmerl_ucs:from_utf16be(Input)};
480autodetect('utf-16be',[16#fe,16#ff | Input]) ->
481    {external, xmerl_ucs:from_utf16be(Input)};
482autodetect(undefined,[16#ff,16#fe | Input]) ->
483    {auto, xmerl_ucs:from_utf16le(Input)};
484autodetect('utf-16le',[16#ff,16#fe | Input]) ->
485    {external, xmerl_ucs:from_utf16le(Input)};
486
487autodetect(undefined,[16#ef,16#bb,16#bf | Input]) ->
488    {auto, xmerl_ucs:from_utf8(Input)};
489autodetect('utf-8',[16#ef,16#bb,16#bf | Input]) ->
490    {external, xmerl_ucs:from_utf8(Input)};
491autodetect('utf-8',[16#ff,16#fe | Input]) ->
492    {external, xmerl_ucs:from_utf16le(Input)};
493autodetect('utf-8',[16#fe,16#ff | Input]) ->
494    {external, xmerl_ucs:from_utf16be(Input)};
495
496%%% --- Without byte-order mark
497autodetect(undefined,[0,0,0,16#3c|Input]) ->
498    {auto, xmerl_ucs:from_ucs4be([0,0,0,16#3c|Input])};
499autodetect('iso-10646-utf-1',[0,0,0,16#3c|Input]) ->
500    {external, xmerl_ucs:from_ucs4be([0,0,0,16#3c|Input])};
501autodetect(undefined,[16#3c,0,0,0|Input]) ->
502    {auto, xmerl_ucs:from_ucs4le([16#3c,0,0,0|Input])};
503autodetect('iso-10646-utf-1',[16#3c,0,0,0|Input]) ->
504    {external, xmerl_ucs:from_ucs4le([16#3c,0,0,0|Input])};
505
506autodetect(undefined,[0,16#3c,0,16#3f | Input]) ->
507    {auto, xmerl_ucs:from_utf16be([0,16#3c,0,16#3f|Input])};
508autodetect('utf-16be',[0,16#3c,0,16#3f | Input]) ->
509    {external, xmerl_ucs:from_utf16be([0,16#3c,0,16#3f|Input])};
510autodetect(undefined,[16#3c,0,16#3f,0 | Input]) ->
511    {auto, xmerl_ucs:from_utf16le([16#3c,0,16#3f,0|Input])};
512autodetect('utf-16le',[16#3c,0,16#3f,0 | Input]) ->
513    {external, xmerl_ucs:from_utf16le([16#3c,0,16#3f,0|Input])};
514
515autodetect(ExtCharset,Content) ->
516    {ExtCharset, Content}.
517
518
519is_ncname(A) when is_atom(A) ->
520    is_ncname(atom_to_list(A));
521is_ncname([$_|T]) ->
522    is_name1(T);
523is_ncname([H|T]) ->
524    case is_letter(H) of
525	true ->
526	    is_name1(T);
527	_ -> false
528    end.
529
530is_name(A) when is_atom(A) ->
531    is_name(atom_to_list(A));
532is_name([$_|T]) ->
533    is_name1(T);
534is_name([$:|T]) ->
535    is_name1(T);
536is_name([H|T]) ->
537    case is_letter(H) of
538	true ->
539	    is_name1(T);
540	_ -> false
541    end.
542
543is_name1([]) ->
544    true;
545is_name1([H|T]) ->
546    case is_namechar(H) of
547	true ->
548	    is_name1(T);
549	_ -> false
550    end.
551
552
553
554% =======
555%%% UNICODE character definitions
556
557%%%%%%%% [2] Char
558
559is_char(16#09) -> true;
560is_char(16#0A) -> true;
561is_char(16#0D) -> true;
562is_char(X) when X >= 16#20, X =< 16#D7FF -> true;
563is_char(X) when X >= 16#E000, X =< 16#FFFD -> true;
564is_char(X) when X >= 16#10000, X =< 16#10FFFF -> true;
565is_char(_) -> false.
566
567%% 0 - not classified,
568%% 1 - base_char or ideographic,
569%% 2 - combining_char or digit or extender,
570%% 3 - $. or $- or $_ or $:
571-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
572                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0,
573                0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
574                1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
575                1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
576                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
577                0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
578                1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
579                1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}).
580
581%% [4] NameChar
582is_namechar(X) ->
583    try element(X, ?SMALL) > 0
584    catch _:_ ->
585        case is_letter(X) of
586            true -> true;
587            false ->
588                case is_digit(X) of
589                    true -> true;
590                    false ->
591                        case is_combining_char(X) of
592                            true -> true;
593                            false ->
594                                is_extender(X)
595                        end
596                end
597        end
598    end.
599
600%% [84] Letter
601is_letter(X) ->
602    try element(X, ?SMALL) =:= 1
603    catch _:_ ->
604        case is_base_char(X) of
605	    false ->
606	        is_ideographic(X);
607    	    true ->
608	        true
609        end
610    end.
611
612%% [85] BaseChar
613is_base_char(X) when X >= 16#0041, X =< 16#005A -> true;
614is_base_char(X) when X >= 16#0061, X =< 16#007A -> true;
615is_base_char(X) when X >= 16#00C0, X =< 16#00D6 -> true;
616is_base_char(X) when X >= 16#00D8, X =< 16#00F6 -> true;
617is_base_char(X) when X >= 16#00F8, X =< 16#00FF -> true;
618is_base_char(X) when X >= 16#0100, X =< 16#0131 -> true;
619is_base_char(X) when X >= 16#0134, X =< 16#013E -> true;
620is_base_char(X) when X >= 16#0141, X =< 16#0148 -> true;
621is_base_char(X) when X >= 16#014A, X =< 16#017E -> true;
622is_base_char(X) when X >= 16#0180, X =< 16#01C3 -> true;
623is_base_char(X) when X >= 16#01CD, X =< 16#01F0 -> true;
624is_base_char(X) when X >= 16#01F4, X =< 16#01F5 -> true;
625is_base_char(X) when X >= 16#01FA, X =< 16#0217 -> true;
626is_base_char(X) when X >= 16#0250, X =< 16#02A8 -> true;
627is_base_char(X) when X >= 16#02BB, X =< 16#02C1 -> true;
628is_base_char(16#0386) -> true;
629is_base_char(X) when X >= 16#0388, X =< 16#038A -> true;
630is_base_char(16#038C) -> true;
631is_base_char(X) when X >= 16#038E, X =< 16#03A1 -> true;
632is_base_char(X) when X >= 16#03A3, X =< 16#03CE -> true;
633is_base_char(X) when X >= 16#03D0, X =< 16#03D6 -> true;
634is_base_char(16#03DA) -> true;
635is_base_char(16#03DC) -> true;
636is_base_char(16#03DE) -> true;
637is_base_char(16#03E0) -> true;
638is_base_char(X) when X >= 16#03E2, X =< 16#03F3 -> true;
639is_base_char(X) when X >= 16#0401, X =< 16#040C -> true;
640is_base_char(X) when X >= 16#040E, X =< 16#044F -> true;
641is_base_char(X) when X >= 16#0451, X =< 16#045C -> true;
642is_base_char(X) when X >= 16#045E, X =< 16#0481 -> true;
643is_base_char(X) when X >= 16#0490, X =< 16#04C4 -> true;
644is_base_char(X) when X >= 16#04C7, X =< 16#04C8 -> true;
645is_base_char(X) when X >= 16#04CB, X =< 16#04CC -> true;
646is_base_char(X) when X >= 16#04D0, X =< 16#04EB -> true;
647is_base_char(X) when X >= 16#04EE, X =< 16#04F5 -> true;
648is_base_char(X) when X >= 16#04F8, X =< 16#04F9 -> true;
649is_base_char(X) when X >= 16#0531, X =< 16#0556 -> true;
650is_base_char(16#0559) -> true;
651is_base_char(X) when X >= 16#0561, X =< 16#0586 -> true;
652is_base_char(X) when X >= 16#05D0, X =< 16#05EA -> true;
653is_base_char(X) when X >= 16#05F0, X =< 16#05F2 -> true;
654is_base_char(X) when X >= 16#0621, X =< 16#063A -> true;
655is_base_char(X) when X >= 16#0641, X =< 16#064A -> true;
656is_base_char(X) when X >= 16#0671, X =< 16#06B7 -> true;
657is_base_char(X) when X >= 16#06BA, X =< 16#06BE -> true;
658is_base_char(X) when X >= 16#06C0, X =< 16#06CE -> true;
659is_base_char(X) when X >= 16#06D0, X =< 16#06D3 -> true;
660is_base_char(16#06D5) -> true;
661is_base_char(X) when X >= 16#06E5, X =< 16#06E6 -> true;
662is_base_char(X) when X >= 16#0905, X =< 16#0939 -> true;
663is_base_char(16#093D) -> true;
664is_base_char(X) when X >= 16#0958, X =< 16#0961 -> true;
665is_base_char(X) when X >= 16#0985, X =< 16#098C -> true;
666is_base_char(X) when X >= 16#098F, X =< 16#0990 -> true;
667is_base_char(X) when X >= 16#0993, X =< 16#09A8 -> true;
668is_base_char(X) when X >= 16#09AA, X =< 16#09B0 -> true;
669is_base_char(16#09B2) -> true;
670is_base_char(X) when X >= 16#09B6, X =< 16#09B9 -> true;
671is_base_char(X) when X >= 16#09DC, X =< 16#09DD -> true;
672is_base_char(X) when X >= 16#09DF, X =< 16#09E1 -> true;
673is_base_char(X) when X >= 16#09F0, X =< 16#09F1 -> true;
674is_base_char(X) when X >= 16#0A05, X =< 16#0A0A -> true;
675is_base_char(X) when X >= 16#0A0F, X =< 16#0A10 -> true;
676is_base_char(X) when X >= 16#0A13, X =< 16#0A28 -> true;
677is_base_char(X) when X >= 16#0A2A, X =< 16#0A30 -> true;
678is_base_char(X) when X >= 16#0A32, X =< 16#0A33 -> true;
679is_base_char(X) when X >= 16#0A35, X =< 16#0A36 -> true;
680is_base_char(X) when X >= 16#0A38, X =< 16#0A39 -> true;
681is_base_char(X) when X >= 16#0A59, X =< 16#0A5C -> true;
682is_base_char(16#0A5E) -> true;
683is_base_char(X) when X >= 16#0A72, X =< 16#0A74 -> true;
684is_base_char(X) when X >= 16#0A85, X =< 16#0A8B -> true;
685is_base_char(16#0A8D) -> true;
686is_base_char(X) when X >= 16#0A8F, X =< 16#0A91 -> true;
687is_base_char(X) when X >= 16#0A93, X =< 16#0AA8 -> true;
688is_base_char(X) when X >= 16#0AAA, X =< 16#0AB0 -> true;
689is_base_char(X) when X >= 16#0AB2, X =< 16#0AB3 -> true;
690is_base_char(X) when X >= 16#0AB5, X =< 16#0AB9 -> true;
691is_base_char(16#0ABD) -> true;
692is_base_char(16#0AE0) -> true;
693is_base_char(X) when X >= 16#0B05, X =< 16#0B0C -> true;
694is_base_char(X) when X >= 16#0B0F, X =< 16#0B10 -> true;
695is_base_char(X) when X >= 16#0B13, X =< 16#0B28 -> true;
696is_base_char(X) when X >= 16#0B2A, X =< 16#0B30 -> true;
697is_base_char(X) when X >= 16#0B32, X =< 16#0B33 -> true;
698is_base_char(X) when X >= 16#0B36, X =< 16#0B39 -> true;
699is_base_char(16#0B3D) -> true;
700is_base_char(X) when X >= 16#0B5C, X =< 16#0B5D -> true;
701is_base_char(X) when X >= 16#0B5F, X =< 16#0B61 -> true;
702is_base_char(X) when X >= 16#0B85, X =< 16#0B8A -> true;
703is_base_char(X) when X >= 16#0B8E, X =< 16#0B90 -> true;
704is_base_char(X) when X >= 16#0B92, X =< 16#0B95 -> true;
705is_base_char(X) when X >= 16#0B99, X =< 16#0B9A -> true;
706is_base_char(16#0B9C) -> true;
707is_base_char(X) when X >= 16#0B9E, X =< 16#0B9F -> true;
708is_base_char(X) when X >= 16#0BA3, X =< 16#0BA4 -> true;
709is_base_char(X) when X >= 16#0BA8, X =< 16#0BAA -> true;
710is_base_char(X) when X >= 16#0BAE, X =< 16#0BB5 -> true;
711is_base_char(X) when X >= 16#0BB7, X =< 16#0BB9 -> true;
712is_base_char(X) when X >= 16#0C05, X =< 16#0C0C -> true;
713is_base_char(X) when X >= 16#0C0E, X =< 16#0C10 -> true;
714is_base_char(X) when X >= 16#0C12, X =< 16#0C28 -> true;
715is_base_char(X) when X >= 16#0C2A, X =< 16#0C33 -> true;
716is_base_char(X) when X >= 16#0C35, X =< 16#0C39 -> true;
717is_base_char(X) when X >= 16#0C60, X =< 16#0C61 -> true;
718is_base_char(X) when X >= 16#0C85, X =< 16#0C8C -> true;
719is_base_char(X) when X >= 16#0C8E, X =< 16#0C90 -> true;
720is_base_char(X) when X >= 16#0C92, X =< 16#0CA8 -> true;
721is_base_char(X) when X >= 16#0CAA, X =< 16#0CB3 -> true;
722is_base_char(X) when X >= 16#0CB5, X =< 16#0CB9 -> true;
723is_base_char(16#0CDE) -> true;
724is_base_char(X) when X >= 16#0CE0, X =< 16#0CE1 -> true;
725is_base_char(X) when X >= 16#0D05, X =< 16#0D0C -> true;
726is_base_char(X) when X >= 16#0D0E, X =< 16#0D10 -> true;
727is_base_char(X) when X >= 16#0D12, X =< 16#0D28 -> true;
728is_base_char(X) when X >= 16#0D2A, X =< 16#0D39 -> true;
729is_base_char(X) when X >= 16#0D60, X =< 16#0D61 -> true;
730is_base_char(X) when X >= 16#0E01, X =< 16#0E2E -> true;
731is_base_char(16#0E30) -> true;
732is_base_char(X) when X >= 16#0E32, X =< 16#0E33 -> true;
733is_base_char(X) when X >= 16#0E40, X =< 16#0E45 -> true;
734is_base_char(X) when X >= 16#0E81, X =< 16#0E82 -> true;
735is_base_char(16#0E84) -> true;
736is_base_char(X) when X >= 16#0E87, X =< 16#0E88 -> true;
737is_base_char(16#0E8A) -> true;
738is_base_char(16#0E8D) -> true;
739is_base_char(X) when X >= 16#0E94, X =< 16#0E97 -> true;
740is_base_char(X) when X >= 16#0E99, X =< 16#0E9F -> true;
741is_base_char(X) when X >= 16#0EA1, X =< 16#0EA3 -> true;
742is_base_char(16#0EA5) -> true;
743is_base_char(16#0EA7) -> true;
744is_base_char(X) when X >= 16#0EAA, X =< 16#0EAB -> true;
745is_base_char(X) when X >= 16#0EAD, X =< 16#0EAE -> true;
746is_base_char(16#0EB0) -> true;
747is_base_char(X) when X >= 16#0EB2, X =< 16#0EB3 -> true;
748is_base_char(16#0EBD) -> true;
749is_base_char(X) when X >= 16#0EC0, X =< 16#0EC4 -> true;
750is_base_char(X) when X >= 16#0F40, X =< 16#0F47 -> true;
751is_base_char(X) when X >= 16#0F49, X =< 16#0F69 -> true;
752is_base_char(X) when X >= 16#10A0, X =< 16#10C5 -> true;
753is_base_char(X) when X >= 16#10D0, X =< 16#10F6 -> true;
754is_base_char(16#1100) -> true;
755is_base_char(X) when X >= 16#1102, X =< 16#1103 -> true;
756is_base_char(X) when X >= 16#1105, X =< 16#1107 -> true;
757is_base_char(16#1109) -> true;
758is_base_char(X) when X >= 16#110B, X =< 16#110C -> true;
759is_base_char(X) when X >= 16#110E, X =< 16#1112 -> true;
760is_base_char(16#113C) -> true;
761is_base_char(16#113E) -> true;
762is_base_char(16#1140) -> true;
763is_base_char(16#114C) -> true;
764is_base_char(16#114E) -> true;
765is_base_char(16#1150) -> true;
766is_base_char(X) when X >= 16#1154, X =< 16#1155 -> true;
767is_base_char(16#1159) -> true;
768is_base_char(X) when X >= 16#115F, X =< 16#1161 -> true;
769is_base_char(16#1163) -> true;
770is_base_char(16#1165) -> true;
771is_base_char(16#1167) -> true;
772is_base_char(16#1169) -> true;
773is_base_char(X) when X >= 16#116D, X =< 16#116E -> true;
774is_base_char(X) when X >= 16#1172, X =< 16#1173 -> true;
775is_base_char(16#1175) -> true;
776is_base_char(16#119E) -> true;
777is_base_char(16#11A8) -> true;
778is_base_char(16#11AB) -> true;
779is_base_char(X) when X >= 16#11AE, X =< 16#11AF -> true;
780is_base_char(X) when X >= 16#11B7, X =< 16#11B8 -> true;
781is_base_char(16#11BA) -> true;
782is_base_char(X) when X >= 16#11BC, X =< 16#11C2 -> true;
783is_base_char(16#11EB) -> true;
784is_base_char(16#11F0) -> true;
785is_base_char(16#11F9) -> true;
786is_base_char(X) when X >= 16#1E00, X =< 16#1E9B -> true;
787is_base_char(X) when X >= 16#1EA0, X =< 16#1EF9 -> true;
788is_base_char(X) when X >= 16#1F00, X =< 16#1F15 -> true;
789is_base_char(X) when X >= 16#1F18, X =< 16#1F1D -> true;
790is_base_char(X) when X >= 16#1F20, X =< 16#1F45 -> true;
791is_base_char(X) when X >= 16#1F48, X =< 16#1F4D -> true;
792is_base_char(X) when X >= 16#1F50, X =< 16#1F57 -> true;
793is_base_char(16#1F59) -> true;
794is_base_char(16#1F5B) -> true;
795is_base_char(16#1F5D) -> true;
796is_base_char(X) when X >= 16#1F5F, X =< 16#1F7D -> true;
797is_base_char(X) when X >= 16#1F80, X =< 16#1FB4 -> true;
798is_base_char(X) when X >= 16#1FB6, X =< 16#1FBC -> true;
799is_base_char(16#1FBE) -> true;
800is_base_char(X) when X >= 16#1FC2, X =< 16#1FC4 -> true;
801is_base_char(X) when X >= 16#1FC6, X =< 16#1FCC -> true;
802is_base_char(X) when X >= 16#1FD0, X =< 16#1FD3 -> true;
803is_base_char(X) when X >= 16#1FD6, X =< 16#1FDB -> true;
804is_base_char(X) when X >= 16#1FE0, X =< 16#1FEC -> true;
805is_base_char(X) when X >= 16#1FF2, X =< 16#1FF4 -> true;
806is_base_char(X) when X >= 16#1FF6, X =< 16#1FFC -> true;
807is_base_char(16#2126) -> true;
808is_base_char(X) when X >= 16#212A, X =< 16#212B -> true;
809is_base_char(16#212E) -> true;
810is_base_char(X) when X >= 16#2180, X =< 16#2182 -> true;
811is_base_char(X) when X >= 16#3041, X =< 16#3094 -> true;
812is_base_char(X) when X >= 16#30A1, X =< 16#30FA -> true;
813is_base_char(X) when X >= 16#3105, X =< 16#312C -> true;
814is_base_char(X) when X >= 16#ac00, X =< 16#d7a3 -> true;
815is_base_char(_) ->
816    false.
817
818%% [86] Ideographic
819is_ideographic(X) when X >= 16#4e00, X =< 16#9fa5 -> true;
820is_ideographic(16#3007) -> true;
821is_ideographic(X) when X >= 16#3021, X =< 16#3029 -> true;
822is_ideographic(_) ->
823    false.
824
825%% [87] CombiningChar
826is_combining_char(X) when X >= 16#0300, X =< 16#0345 -> true;
827is_combining_char(X) when X >= 16#0360, X =< 16#0361 -> true;
828is_combining_char(X) when X >= 16#0483, X =< 16#0486 -> true;
829is_combining_char(X) when X >= 16#0591, X =< 16#05a1 -> true;
830is_combining_char(X) when X >= 16#05a3, X =< 16#05b9 -> true;
831is_combining_char(X) when X >= 16#05bb, X =< 16#05bd -> true;
832is_combining_char(16#05bf) -> true;
833is_combining_char(X) when X >= 16#05c1, X =< 16#05c2 -> true;
834is_combining_char(16#05c4) -> true;
835is_combining_char(X) when X >= 16#064b, X =< 16#0652 -> true;
836is_combining_char(16#0670) -> true;
837is_combining_char(X) when X >= 16#06d6, X =< 16#06dc -> true;
838is_combining_char(X) when X >= 16#06dd, X =< 16#06df -> true;
839is_combining_char(X) when X >= 16#06e0, X =< 16#06e4 -> true;
840is_combining_char(X) when X >= 16#06e7, X =< 16#06e8 -> true;
841is_combining_char(X) when X >= 16#06ea, X =< 16#06ed -> true;
842is_combining_char(X) when X >= 16#0901, X =< 16#0903 -> true;
843is_combining_char(16#093c) -> true;
844is_combining_char(X) when X >= 16#093e, X =< 16#094c -> true;
845is_combining_char(16#094d) -> true;
846is_combining_char(X) when X >= 16#0951, X =< 16#0954 -> true;
847is_combining_char(X) when X >= 16#0962, X =< 16#0963 -> true;
848is_combining_char(X) when X >= 16#0981, X =< 16#0983 -> true;
849is_combining_char(16#09bc) -> true;
850is_combining_char(16#09be) -> true;
851is_combining_char(16#09bf) -> true;
852is_combining_char(X) when X >= 16#09c0, X =< 16#09c4 -> true;
853is_combining_char(X) when X >= 16#09c7, X =< 16#09c8 -> true;
854is_combining_char(X) when X >= 16#09cb, X =< 16#09cd -> true;
855is_combining_char(16#09d7) -> true;
856is_combining_char(X) when X >= 16#09e2, X =< 16#09e3 -> true;
857is_combining_char(16#0a02) -> true;
858is_combining_char(16#0a3c) -> true;
859is_combining_char(16#0a3e) -> true;
860is_combining_char(16#0a3f) -> true;
861is_combining_char(X) when X >= 16#0a40, X =< 16#0a42 -> true;
862is_combining_char(X) when X >= 16#0a47, X =< 16#0a48 -> true;
863is_combining_char(X) when X >= 16#0a4b, X =< 16#0a4d -> true;
864is_combining_char(X) when X >= 16#0a70, X =< 16#0a71 -> true;
865is_combining_char(X) when X >= 16#0a81, X =< 16#0a83 -> true;
866is_combining_char(16#0abc) -> true;
867is_combining_char(X) when X >= 16#0abe, X =< 16#0ac5 -> true;
868is_combining_char(X) when X >= 16#0ac7, X =< 16#0ac9 -> true;
869is_combining_char(X) when X >= 16#0acb, X =< 16#0acd -> true;
870is_combining_char(X) when X >= 16#0b01, X =< 16#0b03 -> true;
871is_combining_char(16#0b3c) -> true;
872is_combining_char(X) when X >= 16#0b3e, X =< 16#0b43 -> true;
873is_combining_char(X) when X >= 16#0b47, X =< 16#0b48 -> true;
874is_combining_char(X) when X >= 16#0b4b, X =< 16#0b4d -> true;
875is_combining_char(X) when X >= 16#0b56, X =< 16#0b57 -> true;
876is_combining_char(X) when X >= 16#0b82, X =< 16#0b83 -> true;
877is_combining_char(X) when X >= 16#0bbe, X =< 16#0bc2 -> true;
878is_combining_char(X) when X >= 16#0bc6, X =< 16#0bc8 -> true;
879is_combining_char(X) when X >= 16#0bca, X =< 16#0bcd -> true;
880is_combining_char(16#0bd7) -> true;
881is_combining_char(X) when X >= 16#0c01, X =< 16#0c03 -> true;
882is_combining_char(X) when X >= 16#0c3e, X =< 16#0c44 -> true;
883is_combining_char(X) when X >= 16#0c46, X =< 16#0c48 -> true;
884is_combining_char(X) when X >= 16#0c4a, X =< 16#0c4d -> true;
885is_combining_char(X) when X >= 16#0c55, X =< 16#0c56 -> true;
886is_combining_char(X) when X >= 16#0c82, X =< 16#0c83 -> true;
887is_combining_char(X) when X >= 16#0cbe, X =< 16#0cc4 -> true;
888is_combining_char(X) when X >= 16#0cc6, X =< 16#0cc8 -> true;
889is_combining_char(X) when X >= 16#0cca, X =< 16#0ccd -> true;
890is_combining_char(X) when X >= 16#0cd5, X =< 16#0cd6 -> true;
891is_combining_char(X) when X >= 16#0d02, X =< 16#0d03 -> true;
892is_combining_char(X) when X >= 16#0d3e, X =< 16#0d43 -> true;
893is_combining_char(X) when X >= 16#0d46, X =< 16#0d48 -> true;
894is_combining_char(X) when X >= 16#0d4a, X =< 16#0d4d -> true;
895is_combining_char(16#0d57) -> true;
896is_combining_char(16#0e31) -> true;
897is_combining_char(X) when X >= 16#0e34, X =< 16#0e3a -> true;
898is_combining_char(X) when X >= 16#0e47, X =< 16#0e4e -> true;
899is_combining_char(16#0eb1) -> true;
900is_combining_char(X) when X >= 16#0eb4, X =< 16#0eb9 -> true;
901is_combining_char(X) when X >= 16#0ebb, X =< 16#0ebc -> true;
902is_combining_char(X) when X >= 16#0ec8, X =< 16#0ecd -> true;
903is_combining_char(X) when X >= 16#0f18, X =< 16#0f19 -> true;
904is_combining_char(16#0f35) -> true;
905is_combining_char(16#0f37) -> true;
906is_combining_char(16#0f39) -> true;
907is_combining_char(16#0f3e) -> true;
908is_combining_char(16#0f3f) -> true;
909is_combining_char(X) when X >= 16#0f71, X =< 16#0f84 -> true;
910is_combining_char(X) when X >= 16#0f86, X =< 16#0f8b -> true;
911is_combining_char(X) when X >= 16#0f90, X =< 16#0f95 -> true;
912is_combining_char(16#0f97) -> true;
913is_combining_char(X) when X >= 16#0f99, X =< 16#0fad -> true;
914is_combining_char(X) when X >= 16#0fb1, X =< 16#0fb7 -> true;
915is_combining_char(16#0fb9) -> true;
916is_combining_char(X) when X >= 16#20d0, X =< 16#20dc -> true;
917is_combining_char(16#20e1) -> true;
918is_combining_char(X) when X >= 16#302a, X =< 16#302f -> true;
919is_combining_char(16#3099) -> true;
920is_combining_char(16#309a) -> true;
921is_combining_char(_) -> false.
922
923%% [88] Digit
924is_digit(X) when X >= 16#0030, X =< 16#0039 -> true;
925is_digit(X) when X >= 16#0660, X =< 16#0669 -> true;
926is_digit(X) when X >= 16#06F0, X =< 16#06F9 -> true;
927is_digit(X) when X >= 16#0966, X =< 16#096f -> true;
928is_digit(X) when X >= 16#09e6, X =< 16#09ef -> true;
929is_digit(X) when X >= 16#0a66, X =< 16#0a6f -> true;
930is_digit(X) when X >= 16#0ae6, X =< 16#0aef -> true;
931is_digit(X) when X >= 16#0b66, X =< 16#0b6f -> true;
932is_digit(X) when X >= 16#0be7, X =< 16#0bef -> true;
933is_digit(X) when X >= 16#0c66, X =< 16#0c6f -> true;
934is_digit(X) when X >= 16#0ce6, X =< 16#0cef -> true;
935is_digit(X) when X >= 16#0d66, X =< 16#0d6f -> true;
936is_digit(X) when X >= 16#0e50, X =< 16#0e59 -> true;
937is_digit(X) when X >= 16#0ed0, X =< 16#0ed9 -> true;
938is_digit(X) when X >= 16#0f20, X =< 16#0f29 -> true;
939is_digit(_) -> false.
940
941%% [89] Extender
942is_extender(16#00b7) -> true;
943is_extender(16#02d0) -> true;
944is_extender(16#02d1) -> true;
945is_extender(16#0387) -> true;
946is_extender(16#0640) -> true;
947is_extender(16#0e46) -> true;
948is_extender(16#0ec6) -> true;
949is_extender(16#3005) -> true;
950is_extender(X) when X >= 16#3031, X =< 16#3035 -> true;
951is_extender(X) when X >= 16#309d, X =< 16#309e -> true;
952is_extender(X) when X >= 16#30fc, X =< 16#30fe -> true;
953is_extender(_) -> false.
954
955to_lower(Str) ->
956    to_lower(Str, []).
957to_lower([C|Cs], Acc) when C >= $A, C =< $Z ->
958    to_lower(Cs, [C+($a-$A)| Acc]);
959to_lower([C|Cs], Acc) ->
960    to_lower(Cs, [C| Acc]);
961to_lower([], Acc) ->
962    lists:reverse(Acc).
963
964%%% XSD helpers
965
966is_facet(length) -> true;
967is_facet(minLength) -> true;
968is_facet(maxLength) -> true;
969is_facet(pattern) -> true;
970is_facet(enumeration) -> true;
971is_facet(whiteSpace) -> true;
972is_facet(maxInclusive) -> true;
973is_facet(maxExclusive) -> true;
974is_facet(minInclusive) -> true;
975is_facet(minExclusive) -> true;
976is_facet(totalDigits) -> true;
977is_facet(fractionDigits) -> true;
978is_facet(_) -> false.
979
980
981is_builtin_simple_type({Type,_,?XSD_NAMESPACE}) when is_atom(Type) ->
982    is_builtin_simple_type(atom_to_list(Type));
983is_builtin_simple_type({Type,_,?XSD_NAMESPACE}) ->
984    is_builtin_simple_type(Type);
985is_builtin_simple_type({_,_,_}) ->
986    false;
987is_builtin_simple_type("string") -> true;
988is_builtin_simple_type("normalizedString") -> true;
989is_builtin_simple_type("token") -> true;
990is_builtin_simple_type("base64Binary") -> true;
991is_builtin_simple_type("hexBinary") -> true;
992is_builtin_simple_type("integer") -> true;
993is_builtin_simple_type("positiveInteger") -> true;
994is_builtin_simple_type("negativeInteger") -> true;
995is_builtin_simple_type("nonNegativeInteger") -> true;
996is_builtin_simple_type("nonPositiveInteger") -> true;
997is_builtin_simple_type("long") -> true;
998is_builtin_simple_type("unsignedLong") -> true;
999is_builtin_simple_type("int") -> true;
1000is_builtin_simple_type("unsignedInt") -> true;
1001is_builtin_simple_type("short") -> true;
1002is_builtin_simple_type("unsignedShort") -> true;
1003is_builtin_simple_type("decimal") -> true;
1004is_builtin_simple_type("float") -> true;
1005is_builtin_simple_type("double") -> true;
1006is_builtin_simple_type("boolean") -> true;
1007is_builtin_simple_type("duration") -> true;
1008is_builtin_simple_type("dateTime") -> true;
1009is_builtin_simple_type("date") -> true;
1010is_builtin_simple_type("time") -> true;
1011is_builtin_simple_type("gYear") -> true;
1012is_builtin_simple_type("gYearMonth") -> true;
1013is_builtin_simple_type("gMonth") -> true;
1014is_builtin_simple_type("gMonthDay") -> true;
1015is_builtin_simple_type("gDay") -> true;
1016is_builtin_simple_type("Name") -> true;
1017is_builtin_simple_type("QName") -> true;
1018is_builtin_simple_type("NCName") -> true;
1019is_builtin_simple_type("anyURI") -> true;
1020is_builtin_simple_type("language") -> true;
1021is_builtin_simple_type("ID") -> true;
1022is_builtin_simple_type("IDREF") -> true;
1023is_builtin_simple_type("IDREFS") -> true;
1024is_builtin_simple_type("ENTITY") -> true;
1025is_builtin_simple_type("ENTITIES") ->true;
1026is_builtin_simple_type("NOTATION") -> true;
1027is_builtin_simple_type("NMTOKEN") -> true;
1028is_builtin_simple_type("NMTOKENS") -> true;
1029is_builtin_simple_type("byte") -> true;
1030is_builtin_simple_type("unsignedByte") -> true;
1031is_builtin_simple_type(_) -> false.
1032
1033is_xsd_string({Type,_,?XSD_NAMESPACE}) when is_atom(Type) ->
1034    is_xsd_string(Type);
1035is_xsd_string({Type,_,?XSD_NAMESPACE}) ->
1036    is_xsd_string(Type);
1037is_xsd_string({_,_,_}) ->
1038    false;
1039is_xsd_string(Atom) when is_atom(Atom) ->
1040    is_xsd_string(atom_to_list(Atom));
1041is_xsd_string("string") ->
1042    true;
1043is_xsd_string("normalizedString") ->
1044    true;
1045is_xsd_string("token") ->
1046    true;
1047is_xsd_string("language") ->
1048    true;
1049is_xsd_string("Name") ->
1050    true;
1051is_xsd_string("NMTOKEN") ->
1052    true;
1053is_xsd_string("NMTOKENS") ->
1054    true;
1055is_xsd_string("NCName") ->
1056    true;
1057is_xsd_string("ID") ->
1058    true;
1059is_xsd_string("IDREF") ->
1060    true;
1061is_xsd_string("IDREFS") ->
1062    true;
1063is_xsd_string("ENTITY") ->
1064    true;
1065is_xsd_string("ENTITIES") ->
1066    true;
1067is_xsd_string(_) ->
1068    false.
1069