1/*  Part of SWI-Prolog
2
3    Author:        Jan Wielemaker
4    E-mail:        J.Wielemaker@vu.nl
5    WWW:           http://www.swi-prolog.org
6    Copyright (c)  2006-2011, University of Amsterdam
7    All rights reserved.
8
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions
11    are met:
12
13    1. Redistributions of source code must retain the above copyright
14       notice, this list of conditions and the following disclaimer.
15
16    2. Redistributions in binary form must reproduce the above copyright
17       notice, this list of conditions and the following disclaimer in
18       the documentation and/or other materials provided with the
19       distribution.
20
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32    POSSIBILITY OF SUCH DAMAGE.
33*/
34
35:- module(unicode_derived_core_properties,
36	  [ unicode_derived_core_property/2,	% ?Code, ?Prop
37	    unicode_property/3			% +File, ?Code, ?Prop
38	  ]).
39:- use_module(library(debug), [debug/3]).
40:- use_module(library(lists), [member/2, numlist/3]).
41:- use_module(library(readutil), [read_line_to_codes/2]).
42
43:- dynamic
44	derived_property/3,
45	loaded/1.
46
47%	unicode_derived_core_property(+File, ?Code, ?Prop)
48%
49%
50
51unicode_derived_core_property(Code, Prop) :-
52	unicode_property('DerivedCoreProperties.txt', Code, Prop).
53
54unicode_property(File, Code, Prop) :-
55	loaded(File), !,
56	derived_property(Code, Prop, File).
57unicode_property(File, Code, Prop) :-
58	retractall(derived_property(_, _,File)),
59	process_file(File),
60	assert(loaded(File)),
61	unicode_property(File, Code, Prop).
62
63
64process_file(File) :-
65	open(File, read, In),
66	call_cleanup(process_stream(In, File), close(In)).
67
68process_stream(In, File) :-
69	read_line_to_codes(In, Line),
70	(   Line == end_of_file
71	->  true
72	;   process_line(Line, File),
73	    process_stream(In, File)
74	).
75
76process_line(Line, File) :-
77	debug(unicode_data, 'Line "~s"', [Line]),
78	(   phrase(line(Codes, Class), Line)
79	->  forall(member(C, Codes),
80		   assert(derived_property(C, Class, File)))
81	;   format('ERROR: Could not parse "~s"~n', [Line]),
82	    abort
83	).
84
85
86
87line([], -) -->
88	ws, "#", skip_rest, !.
89line([], -) -->
90	ws.
91line(Codes, Class) -->
92	ucc(First),
93	(   ".."
94	->  ucc(Last),
95	    { numlist(First, Last, Codes) }
96	;   { Codes = [First] }
97	),
98	ws, ";", ws,
99	class(Class),
100	ws,
101	"#",
102	skip_rest.
103
104class(Class) -->
105	identifier(Id),
106	{ downcase_atom(Id, Class) }.
107
108identifier(Word) -->
109	[C0], { code_type(C0, csymf) },
110	csyms(Cs),
111	{ atom_codes(Word, [C0|Cs]) }.
112
113csyms([H|T]) -->
114	[H], { code_type(H, csym) }, !,
115	csyms(T).
116csyms([]) -->
117	[].
118
119ucc(Val) -->
120	hex_digit(D0),
121	hex_digit(D1),
122	hex_digit(D2),
123	hex_digit(D3),
124	{ Val0 is D0<<12 + D1<<8 + D2<<4 + D3 },
125	xucc(Val0, Val).
126
127xucc(Val0, Val) -->
128	hex_digit(D), !,
129	{ Val1 is Val0<<4 + D },
130	xucc(Val1, Val).
131xucc(Val, Val) -->
132	[].
133
134hex_digit(D) -->
135	[C],
136	{ code_type(C, xdigit(D)) }.
137
138w -->
139	[C],
140	{ code_type(C, white) }.
141
142ws -->
143	w, !,
144	ws.
145ws -->
146	[].
147
148skip_rest(_, []).
149