1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%
3% Relational database to ontology.
4% Matching:
5%     -nbA: number of attributes to match.
6%     -attribute_domains: possible matches for each attribute
7%     -match_cost[a,b]: cost of matching a to value b.
8%     -matching is bijective
9%
10% Treeness:
11%     -matches selected by the matching problem must appear
12%         int the tree
13%     -the tree must be connected accyclic
14%     -therefore, higher nodes are used to connect the
15%     -matched nodes
16%     -dnodes are the nodes that *can* get matched
17%     -cnodes are nodes above, used to connect dnodes
18%     -ws is the cost of each edge
19%
20% Objective:
21%     -minimize the cost of the matching and the tree.
22%
23% To compile:
24% mzn2fzn model.mzn alignment.dzn X.integration.dzn
25% alignment.dzn contains data for the tree part
26% X.integration.dzn constains data for the Xth instance of
27%    the matching
28%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
29%-----------------------------------------------------------------------------%
30% Includes
31
32include "alldifferent.mzn";
33include "arg_sort.mzn";
34
35%-----------------------------------------------------------------------------%
36% Parameters
37
38    % Graph parameters
39    %
40int: nbV;
41int: nbE;
42set of int: nodes = 1..nbV;
43set of int: edges = 1..nbE;
44set of int: cnodes;
45set of int: dnodes;
46set of int: anodes;  %Is always empty on these data files.
47array[nodes,edges] of bool: adjacent;
48array[edges] of nodes: heads;
49array[edges] of nodes: tails;
50array[1..2,edges] of int: endpairs = array2d(1..2,edges,tails++heads);
51array[edges] of int: ws;
52array[edges] of int: sorted_edges = arg_sort(ws);
53
54% Bounds for the edge cost variable
55int: w_lb = sum(e in edges)(min(0, ws[e]));
56int: w_ub = sum(e in edges)(max(0, ws[e]));
57
58    % Attribute parameters
59    %
60int: nbA;
61set of int: atts = 1..nbA;
62array[atts] of set of int: attribute_domains;
63array[atts] of int: attribute_names; % just for printing
64
65array[atts,int] of int: match_costs;
66array[atts,int] of int: match_costs_sorted;
67
68% Bounds for the matching variables
69int: match_lb = min([min(attribute_domains[a]) | a in atts]);
70int: match_ub = max([max(attribute_domains[a]) | a in atts]);
71
72% Bounds for the matching cost variable
73int: wm_lb = sum(a in atts)(min(i in attribute_domains[a])(match_costs[a,i]));
74int: wm_ub = sum(a in atts)(max(i in attribute_domains[a])(match_costs[a,i]));
75
76%-----------------------------------------------------------------------------%
77% Variables
78
79array[nodes] of var bool: vs;
80array[edges] of var bool: es;
81array[atts] of var match_lb..match_ub: match;
82var w_lb..w_ub: w;
83var wm_lb..wm_ub: wm;
84var w_lb+wm_lb..w_ub+wm_ub: objective;
85
86%-----------------------------------------------------------------------------%
87% Functions
88
89function int: other(array[1..2,int] of int: endpairs, int: snd, int: r) =
90	if endpairs[1,snd] == r then endpairs[2,snd] else endpairs[1,snd] endif;
91
92%-----------------------------------------------------------------------------%
93% Predicates
94
95predicate treep(
96    array[int] of var bool: vs, array[int] of var bool: es,
97    array [int,int] of bool: adjacent, array[1..2,int] of int: endpairs
98) =
99	forall(e in index_set(es))(
100        es[e] -> (vs[endpairs[1,e]] /\ vs[endpairs[2,e]])
101    )
102/\  let {
103		% Parent node:
104		array[index_set(vs)] of var index_set(vs): p;
105		% A variable root for the tree
106		var index_set(vs): root ;
107		% Adjacency matrix: node->node->edge
108		array[index_set(vs),index_set(vs)] of int: m =
109	        array2d(index_set(vs),index_set(vs),
110                [ sum(
111                    % the intersect will contain only one element or 0 elements.
112                    (   { e| e in index_set(es) where adjacent[i,e]}
113                        intersect { e| e in index_set(es) where adjacent[j,e]}
114                    ) union {0}
115                  )
116                | i,j in index_set(vs)]
117            );
118    } in
119	    vs[root]
120    /\  p[root] = root
121	    % Parent must be adjacent
122    /\  forall(a in index_set(vs) where a != root /\ vs[a])(
123            p[a] in {other(endpairs,b,a) | b in index_set(es) where adjacent[a,b]}
124        )
125	    % Edge between child and parent, force parent in, avoid cyclic
126        % relationships
127    /\  forall(a in index_set(vs) where a != root /\ vs[a])(
128            es[m[a,p[a]]] /\ vs[p[a]] /\ p[p[a]] != a
129        )
130	    % Set out-nodes to a value.
131    /\  forall(a in index_set(vs) where not(vs[a]))(
132            p[a] = a
133        )
134	    % Redundant:
135    /\  redundant_constraint(
136            sum(i in index_set(es))(bool2int(es[i]))
137                = sum(i in index_set(vs))(bool2int(vs[i])) - 1
138        )
139    ;
140
141%-----------------------------------------------------------------------------%
142% Constraints
143
144% Attrbiutes are in the tree (empty constraint for these data files)
145constraint forall(att in anodes)( vs[att] == true );
146
147% Enforce the treeness
148constraint treep(vs,es,adjacent,endpairs);
149
150% Matching problem:
151constraint forall(a in atts)( match[a] in attribute_domains[a] );
152constraint forall(a in atts)( vs[match[a]] );
153constraint alldifferent(match);
154
155% Matched node is in the tree.
156constraint forall(d in dnodes)( exists(a in atts)(match[a] == d) <-> vs[d] );
157% Onlly one edge comming out of the match.
158constraint forall(d in dnodes)( vs[d] <-> sum(e in edges where adjacent[d,e])(es[e]) == 1 );
159
160% Objective related constraints
161constraint w = sum(i in edges)( bool2int(es[i]) * ws[i] ); % Weight of the tree
162constraint wm = sum(a in atts)( match_costs[a, match[a]] );
163constraint objective = w + wm;
164
165
166%-----------------------------------------------------------------------------%
167% Search strategies
168
169
170ann: cheap_matches = int_search(
171    [ match[a] == match_costs_sorted[a,i] |a in atts, i in index_set_2of2(match_costs_sorted) ],
172    input_order, indomain_max, complete
173);
174
175ann: naive_e  = int_search(es, input_order, indomain_min, complete);
176ann: kruskal  = int_search([es[sorted_edges[e]] | e in edges], input_order, indomain_max, complete);
177ann: kruskal2 = int_search([es[sorted_edges[nbE + 1 - e]] | e in edges], input_order, indomain_min, complete);
178
179
180solve
181    ::seq_search([
182        %cheap_matches,
183        int_search(match, first_fail, indomain_min, complete),
184        kruskal2,
185    ])
186    minimize objective;
187
188output [
189    "es = \(es);\n",
190    "vs = \(vs);\n",
191    "match = \(match);\n",
192    "objective = \(objective);\n"
193];
194
195%output ["digraph {\n"]++
196%[ "  \(n-1)"++ (if n in cnodes then "[fillcolor=\"#59d0a0\",style=filled];" else if n in dnodes then "[fillcolor=gold,style=filled];" else "" endif endif)++"\n" | n in nodes  where fix(vs[n])] ++
197%[ "  "++if fix(heads[e]) in cnodes then "\(heads[e]-1) -> \(tails[e]-1)" else "\(tails[e]-1) -> \(heads[e]-1)" endif ++" [label=\"w=\(ws[e]/10000)\"];\n" | e in edges  where fix(es[e])] ++
198%[ "  \(attribute_names[a]) [shape=hexagon];\n" | a in atts] ++
199%[ "  \(match[a]-1) -> \(attribute_names[a]) [label=\"w=\(match_costs[a,fix(match[a])]/10000)\"];\n" | a in atts]++
200%["}\nobjective = \(objective)\nwm = \(wm)\nw =\(w)"];
201
202