1%
2% routines to manipulate distributions
3%
4
5:- module(clpbn_dist,
6	  [
7	   dist/1,
8	   dist/4,
9	   dists/1,
10	   dist_new_table/2,
11	   get_dist/4,
12	   get_dist_matrix/5,
13	   get_possibly_deterministic_dist_matrix/5,
14	   get_dist_domain/2,
15	   get_dist_domain_size/2,
16	   get_dist_params/2,
17	   get_dist_key/2,
18	   get_evidence_position/3,
19	   get_evidence_from_position/3,
20	   dist_to_term/2,
21	   empty_dist/2,
22	   all_dist_ids/1,
23	   randomise_all_dists/0,
24	   randomise_dist/1,
25	   uniformise_all_dists/0,
26	   uniformise_dist/1,
27	   reset_all_dists/0,
28	   additive_dists/6
29	]).
30
31:- use_module(library(lists),[nth0/3,append/3]).
32
33:- use_module(library(matrix),
34	      [matrix_new/4,
35	       matrix_new/3,
36	       matrix_to_list/2,
37	       matrix_to_logs/1]).
38
39:- use_module(library('clpbn/matrix_cpt_utils'),
40	      [random_CPT/2,
41	       uniform_CPT/2]).
42
43/*
44:- mode dist(+, -).
45
46:- mode get_dist(+, -, -, -).
47
48:- mode get_dist_params(+, -).
49
50:- mode get_dist_domain_size(+, -).
51
52:- mode get_dist_domain(+, -).
53
54:- mode get_dist_nparams(+, -).
55
56:- mode dist(?).
57
58:- mode dist_to_term(+,-).
59*/
60
61/*******************************************
62
63store stuff in a DB of the form:
64	db(Id, Key, CPT, Type, Domain, CPTSize, DSize)
65
66where Id is the id,
67  Key is a skeleton of the key(main functor only)
68  cptsize is the table size or -1,
69  DSize is the domain size,
70  Type is
71    tab for tabular
72    avg for average
73    max for maximum
74    min for minimum
75    trans  for HMMs
76    continuous
77  Domain is
78    a list of values
79    bool for [t,f]
80    aminoacids for [a,c,d,e,f,g,h,i,k,l,m,n,p,q,r,s,t,v,w,y]
81    dna for [a,c,g,t]
82    rna for [a,c,g,u]
83    reals
84
85
86********************************************/
87
88 :- dynamic id/1.
89
90id(1).
91
92new_id(Id) :-
93	retract(id(Id)),
94	Id1 is Id+1,
95	assert(id(Id1)).
96
97reset_id :-
98	retract(id(_)),
99	assert(id(1)).
100
101dists(X) :- id(X1), X is X1-1.
102
103dist(V, Id, Key, Parents) :-
104	dist_unbound(V, Culprit), !,
105	when(Culprit, dist(V, Id, Key, Parents)).
106dist(V, Id, Key, Parents) :-
107	var(Key), !,
108	when(Key, dist(V, Id, Key, Parents)).
109dist(avg(Domain, Parents), avg(Domain), _, Parents).
110dist(ip(Domain, Tabs, Parents), ip(Domain,Tabs), _, Parents).
111dist(max(Domain, Parents), max(Domain), _, Parents).
112dist(min(Domain, Parents), min(Domain), _, Parents).
113dist(cons(Domain, Parent), cons(Domain), _, Parent).
114dist(p(Type, CPT), Id, Key, FParents) :-
115	copy_structure(Key, Key0),
116	distribution(Type, CPT, Id, Key0, [], FParents).
117dist(p(Type, CPT, Parents), Id, Key, FParents) :-
118	copy_structure(Key, Key0),
119	distribution(Type, CPT, Id, Key0, Parents, FParents).
120
121dist_unbound(V, ground(V)) :-
122	var(V), !.
123dist_unbound(p(Type,_), ground(Type)) :-
124	\+ ground(Type), !.
125dist_unbound(p(_,CPT), ground(CPT)) :-
126	\+ ground(CPT).
127dist_unbound(p(Type,_,_), ground(Type)) :-
128	\+ ground(Type), !.
129dist_unbound(p(_,CPT,_), ground(CPT)) :-
130	\+ ground(CPT).
131
132distribution(bool, trans(CPT), Id, Key, Parents, FParents) :-
133	is_list(CPT), !,
134	compress_hmm_table(CPT, Parents, Tab, FParents),
135	add_dist([t,f], trans, Tab, Parents, Key, Id).
136distribution(bool, CPT, Id, Key, Parents, Parents) :-
137	is_list(CPT), !,
138	add_dist([t,f], tab, CPT, Parents, Key, Id).
139distribution(aminoacids, trans(CPT), Id, Key, Parents, FParents) :-
140	is_list(CPT), !,
141	compress_hmm_table(CPT, Parents, Tab, FParents),
142	add_dist([a,c,d,e,f,g,h,i,k,l,m,n,p,q,r,s,t,v,w,y], trans, Tab, FParents, Key, Id).
143distribution(aminoacids, CPT, Id, Key, Parents, Parents) :-
144	is_list(CPT), !,
145	add_dist([a,c,d,e,f,g,h,i,k,l,m,n,p,q,r,s,t,v,w,y], tab, CPT, Parents, Key, Id).
146distribution(dna, trans(CPT), Key, Id, Parents, FParents) :-
147	is_list(CPT), !,
148	compress_hmm_table(CPT, Parents, Tab, FParents),
149	add_dist([a,c,g,t], trans, Tab, FParents, Key, Id).
150distribution(dna, CPT, Id, Key, Parents, Parents) :-
151	is_list(CPT), !,
152	add_dist([a,c,g,t], tab, CPT, Key, Id).
153distribution(rna, trans(CPT), Id, Key, Parents, FParents) :-
154	is_list(CPT), !,
155	compress_hmm_table(CPT, Parents, Tab, FParents, FParents),
156	add_dist([a,c,g,u], trans, Tab, Key, Id).
157distribution(rna, CPT, Id, Key, Parents, Parents) :-
158	is_list(CPT), !,
159	add_dist([a,c,g,u], tab, CPT, Parents, Key, Id).
160distribution(Domain, trans(CPT), Id, Key, Parents, FParents) :-
161	is_list(Domain),
162	is_list(CPT), !,
163	compress_hmm_table(CPT, Parents, Tab, FParents),
164	add_dist(Domain, trans, Tab, FParents, Key, Id).
165distribution(Domain, CPT, Id, Key, Parents, Parents) :-
166	is_list(Domain),
167	is_list(CPT), !,
168	add_dist(Domain, tab, CPT, Parents, Key, Id).
169
170add_dist(Domain, Type, CPT, _, Key, Id) :-
171	recorded(clpbn_dist_db, db(Id, Key, CPT, Type, Domain, _, _), _), !.
172add_dist(Domain, Type, CPT, Parents, Key, Id) :-
173	length(CPT, CPTSize),
174	length(Domain, DSize),
175	new_id(Id),
176	record_parent_sizes(Parents, Id, PSizes, [DSize|PSizes]),
177	recordz(clpbn_dist_db,db(Id, Key, CPT, Type, Domain, CPTSize, DSize),_).
178
179
180record_parent_sizes([], Id, [], DSizes) :-
181	recordz(clpbn_dist_psizes,db(Id, DSizes),_).
182record_parent_sizes([P|Parents], Id, [Size|Sizes], DSizes) :-
183	clpbn:get_atts(P,dist(Dist,_)), !,
184	get_dist_domain_size(Dist, Size),
185	record_parent_sizes(Parents, Id, Sizes, DSizes).
186record_parent_sizes([_|_], _, _, _).
187
188%
189% Often, * is used to code empty in HMMs.
190%
191compress_hmm_table([], [], [], []).
192compress_hmm_table([*|L],[_|Parents],NL,NParents) :- !,
193	compress_hmm_table(L,Parents,NL,NParents).
194compress_hmm_table([Prob|L],[P|Parents],[Prob|NL],[P|NParents]) :-
195	compress_hmm_table(L,Parents,NL,NParents).
196
197dist(Id) :-
198	recorded(clpbn_dist_db, db(Id, _, _, _, _, _, _), _).
199
200get_dist(Id, Type, Domain, Tab) :-
201	recorded(clpbn_dist_db, db(Id, _, Tab, Type, Domain, _, _), _).
202
203get_dist_matrix(Id, Parents, Type, Domain, Mat) :-
204	recorded(clpbn_dist_db, db(Id, _, Tab, Type, Domain, _, DomainSize), _),
205	get_dsizes(Parents, Sizes, []),
206	matrix_new(floats, [DomainSize|Sizes], Tab, Mat),
207	matrix_to_logs(Mat).
208
209get_possibly_deterministic_dist_matrix(Id, Parents, Type, Domain, Mat) :-
210	recorded(clpbn_dist_db, db(Id, _, Tab, Type, Domain, _, DomainSize), _),
211	get_dsizes(Parents, Sizes, []),
212	matrix_new(floats, [DomainSize|Sizes], Tab, Mat).
213
214get_dsizes([], Sizes, Sizes).
215get_dsizes([P|Parents], [Sz|Sizes], Sizes0) :-
216	clpbn:get_atts(P,dist(Dist,_)),
217	get_dist_domain_size(Dist, Sz),
218	get_dsizes(Parents, Sizes, Sizes0).
219
220get_dist_params(Id, Parms) :-
221	recorded(clpbn_dist_db, db(Id, _, Parms, _, _, _, _), _).
222
223get_dist_domain_size(avg(D,_), DSize) :- !,
224	length(D, DSize).
225get_dist_domain_size(ip(D,_,_), DSize) :- !,
226	length(D, DSize).
227get_dist_domain_size(Id, DSize) :-
228	recorded(clpbn_dist_db, db(Id, _, _, _, _, _, DSize), _).
229
230get_dist_domain(Id, Domain) :-
231	recorded(clpbn_dist_db, db(Id, _, _, _, Domain, _, _), _).
232
233get_dist_key(Id, Key) :-
234	recorded(clpbn_dist_db, db(Id, Key, _, _, _, _, _), _).
235
236get_dist_nparams(Id, NParms) :-
237	recorded(clpbn_dist_db, db(Id, _, _, _, _, NParms, _), _).
238
239get_evidence_position(El, ip(Domain,_,_), Pos) :- !,
240	nth0(Pos, Domain, El), !.
241get_evidence_position(El, avg(Domain), Pos) :- !,
242	nth0(Pos, Domain, El), !.
243get_evidence_position(El, Id, Pos) :-
244	recorded(clpbn_dist_db, db(Id, _, _, _, Domain, _, _), _),
245	nth0(Pos, Domain, El), !.
246get_evidence_position(El, Id, Pos) :-
247	recorded(clpbn_dist_db, db(Id, _,  _, _, _, _, _), _), !,
248	throw(error(domain_error(evidence,Id),get_evidence_position(El, Id, Pos))).
249get_evidence_position(El, Id, Pos) :-
250	throw(error(domain_error(no_distribution,Id),get_evidence_position(El, Id, Pos))).
251
252get_evidence_from_position(El, Id, Pos) :-
253	recorded(clpbn_dist_db, db(Id, _, _, _, Domain, _, _), _),
254	nth0(Pos, Domain, El), !.
255get_evidence_from_position(El, Id, Pos) :-
256	recorded(clpbn_dist_db, db(Id, _, _, _, _, _, _), _), !,
257	throw(error(domain_error(evidence,Id),get_evidence_from_position(El, Id, Pos))).
258get_evidence_from_position(El, Id, Pos) :-
259	throw(error(domain_error(no_distribution,Id),get_evidence_from_position(El, Id, Pos))).
260
261dist_to_term(_Id,_Term).
262
263empty_dist(Dist, TAB) :-
264	recorded(clpbn_dist_psizes,db(Dist, DSizes),_), !,
265	matrix_new(floats, DSizes, TAB).
266empty_dist(Dist, TAB) :-
267	throw(error(domain_error(no_distribution,Dist),empty_dist(Dist,TAB))).
268
269dist_new_table(Id, NewMat) :-
270	matrix_to_list(NewMat, List),
271	recorded(clpbn_dist_db, db(Id, Key, _, A, B, C, D), R),
272	erase(R),
273	recorda(clpbn_dist_db, db(Id, Key, List, A, B, C, D), _),
274	fail.
275dist_new_table(_, _).
276
277copy_structure(V, _) :- attvar(V), !.
278copy_structure(V, V) :- var(V), !.
279copy_structure(V, _) :- primitive(V), !.
280copy_structure(Key, Key0) :-
281	Key =.. [A|LKey],
282	copy_Lstructure(LKey, LKey0),
283	Key0 =.. [A|LKey0].
284
285copy_Lstructure([], []).
286copy_Lstructure([H|LKey], [NH|LKey0]) :-
287	copy_structure(H, NH),
288	copy_Lstructure(LKey, LKey0).
289
290randomise_all_dists :-
291	randomise_dist(_),
292	fail.
293randomise_all_dists.
294
295randomise_dist(Dist) :-
296	recorded(clpbn_dist_psizes, db(Dist,DSizes), _),
297	random_CPT(DSizes, NewCPT),
298	dist_new_table(Dist, NewCPT).
299
300uniformise_all_dists :-
301	uniformise_dist(_),
302	fail.
303uniformise_all_dists.
304
305uniformise_dist(Dist) :-
306	recorded(clpbn_dist_psizes, db(Dist,DSizes), _),
307	uniform_CPT(DSizes, NewCPT),
308	dist_new_table(Dist, NewCPT).
309
310
311reset_all_dists :-
312	recorded(clpbn_dist_psizes, _, R),
313	erase(R),
314	fail.
315reset_all_dists :-
316	recorded(clpbn_dist_db, _, R),
317	erase(R),
318	fail.
319reset_all_dists :-
320	reset_id,
321	fail.
322reset_all_dists.
323
324
325additive_dists(ip(Domain,Tabs1), ip(Domain,Tabs2), Parents1, Parents2, ip(Domain,Tabs), Parents) :-
326	append(Tabs1, Tabs2, Tabs),
327	append(Parents1, Parents2, Parents).
328