1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2% 3% Relational database to ontology. 4% Matching: 5% -nbA: number of attributes to match. 6% -attribute_domains: possible matches for each attribute 7% -match_cost[a,b]: cost of matching a to value b. 8% -matching is bijective 9% 10% Treeness: 11% -matches selected by the matching problem must appear 12% int the tree 13% -the tree must be connected accyclic 14% -therefore, higher nodes are used to connect the 15% -matched nodes 16% -dnodes are the nodes that *can* get matched 17% -cnodes are nodes above, used to connect dnodes 18% -ws is the cost of each edge 19% 20% Objective: 21% -minimize the cost of the matching and the tree. 22% 23% To compile: 24% mzn2fzn model.mzn alignment.dzn X.integration.dzn 25% alignment.dzn contains data for the tree part 26% X.integration.dzn constains data for the Xth instance of 27% the matching 28%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 29%-----------------------------------------------------------------------------% 30% Includes 31 32include "alldifferent.mzn"; 33include "arg_sort.mzn"; 34 35%-----------------------------------------------------------------------------% 36% Parameters 37 38 % Graph parameters 39 % 40int: nbV; 41int: nbE; 42set of int: nodes = 1..nbV; 43set of int: edges = 1..nbE; 44set of int: cnodes; 45set of int: dnodes; 46set of int: anodes; %Is always empty on these data files. 47array[nodes,edges] of bool: adjacent; 48array[edges] of nodes: heads; 49array[edges] of nodes: tails; 50array[1..2,edges] of int: endpairs = array2d(1..2,edges,tails++heads); 51array[edges] of int: ws; 52array[edges] of int: sorted_edges = arg_sort(ws); 53 54% Bounds for the edge cost variable 55int: w_lb = sum(e in edges)(min(0, ws[e])); 56int: w_ub = sum(e in edges)(max(0, ws[e])); 57 58 % Attribute parameters 59 % 60int: nbA; 61set of int: atts = 1..nbA; 62array[atts] of set of int: attribute_domains; 63array[atts] of int: attribute_names; % just for printing 64 65array[atts,int] of int: match_costs; 66array[atts,int] of int: match_costs_sorted; 67 68% Bounds for the matching variables 69int: match_lb = min([min(attribute_domains[a]) | a in atts]); 70int: match_ub = max([max(attribute_domains[a]) | a in atts]); 71 72% Bounds for the matching cost variable 73int: wm_lb = sum(a in atts)(min(i in attribute_domains[a])(match_costs[a,i])); 74int: wm_ub = sum(a in atts)(max(i in attribute_domains[a])(match_costs[a,i])); 75 76%-----------------------------------------------------------------------------% 77% Variables 78 79array[nodes] of var bool: vs; 80array[edges] of var bool: es; 81array[atts] of var match_lb..match_ub: match; 82var w_lb..w_ub: w; 83var wm_lb..wm_ub: wm; 84var w_lb+wm_lb..w_ub+wm_ub: objective; 85 86%-----------------------------------------------------------------------------% 87% Functions 88 89function int: other(array[1..2,int] of int: endpairs, int: snd, int: r) = 90 if endpairs[1,snd] == r then endpairs[2,snd] else endpairs[1,snd] endif; 91 92%-----------------------------------------------------------------------------% 93% Predicates 94 95predicate treep( 96 array[int] of var bool: vs, array[int] of var bool: es, 97 array [int,int] of bool: adjacent, array[1..2,int] of int: endpairs 98) = 99 forall(e in index_set(es))( 100 es[e] -> (vs[endpairs[1,e]] /\ vs[endpairs[2,e]]) 101 ) 102/\ let { 103 % Parent node: 104 array[index_set(vs)] of var index_set(vs): p; 105 % A variable root for the tree 106 var index_set(vs): root ; 107 % Adjacency matrix: node->node->edge 108 array[index_set(vs),index_set(vs)] of int: m = 109 array2d(index_set(vs),index_set(vs), 110 [ sum( 111 % the intersect will contain only one element or 0 elements. 112 ( { e| e in index_set(es) where adjacent[i,e]} 113 intersect { e| e in index_set(es) where adjacent[j,e]} 114 ) union {0} 115 ) 116 | i,j in index_set(vs)] 117 ); 118 } in 119 vs[root] 120 /\ p[root] = root 121 % Parent must be adjacent 122 /\ forall(a in index_set(vs) where a != root /\ vs[a])( 123 p[a] in {other(endpairs,b,a) | b in index_set(es) where adjacent[a,b]} 124 ) 125 % Edge between child and parent, force parent in, avoid cyclic 126 % relationships 127 /\ forall(a in index_set(vs) where a != root /\ vs[a])( 128 es[m[a,p[a]]] /\ vs[p[a]] /\ p[p[a]] != a 129 ) 130 % Set out-nodes to a value. 131 /\ forall(a in index_set(vs) where not(vs[a]))( 132 p[a] = a 133 ) 134 % Redundant: 135 /\ redundant_constraint( 136 sum(i in index_set(es))(bool2int(es[i])) 137 = sum(i in index_set(vs))(bool2int(vs[i])) - 1 138 ) 139 ; 140 141%-----------------------------------------------------------------------------% 142% Constraints 143 144% Attrbiutes are in the tree (empty constraint for these data files) 145constraint forall(att in anodes)( vs[att] == true ); 146 147% Enforce the treeness 148constraint treep(vs,es,adjacent,endpairs); 149 150% Matching problem: 151constraint forall(a in atts)( match[a] in attribute_domains[a] ); 152constraint forall(a in atts)( vs[match[a]] ); 153constraint alldifferent(match); 154 155% Matched node is in the tree. 156constraint forall(d in dnodes)( exists(a in atts)(match[a] == d) <-> vs[d] ); 157% Onlly one edge comming out of the match. 158constraint forall(d in dnodes)( vs[d] <-> sum(e in edges where adjacent[d,e])(es[e]) == 1 ); 159 160% Objective related constraints 161constraint w = sum(i in edges)( bool2int(es[i]) * ws[i] ); % Weight of the tree 162constraint wm = sum(a in atts)( match_costs[a, match[a]] ); 163constraint objective = w + wm; 164 165 166%-----------------------------------------------------------------------------% 167% Search strategies 168 169 170ann: cheap_matches = int_search( 171 [ match[a] == match_costs_sorted[a,i] |a in atts, i in index_set_2of2(match_costs_sorted) ], 172 input_order, indomain_max, complete 173); 174 175ann: naive_e = int_search(es, input_order, indomain_min, complete); 176ann: kruskal = int_search([es[sorted_edges[e]] | e in edges], input_order, indomain_max, complete); 177ann: kruskal2 = int_search([es[sorted_edges[nbE + 1 - e]] | e in edges], input_order, indomain_min, complete); 178 179 180solve 181 ::seq_search([ 182 %cheap_matches, 183 int_search(match, first_fail, indomain_min, complete), 184 kruskal2, 185 ]) 186 minimize objective; 187 188output [ 189 "es = \(es);\n", 190 "vs = \(vs);\n", 191 "match = \(match);\n", 192 "objective = \(objective);\n" 193]; 194 195%output ["digraph {\n"]++ 196%[ " \(n-1)"++ (if n in cnodes then "[fillcolor=\"#59d0a0\",style=filled];" else if n in dnodes then "[fillcolor=gold,style=filled];" else "" endif endif)++"\n" | n in nodes where fix(vs[n])] ++ 197%[ " "++if fix(heads[e]) in cnodes then "\(heads[e]-1) -> \(tails[e]-1)" else "\(tails[e]-1) -> \(heads[e]-1)" endif ++" [label=\"w=\(ws[e]/10000)\"];\n" | e in edges where fix(es[e])] ++ 198%[ " \(attribute_names[a]) [shape=hexagon];\n" | a in atts] ++ 199%[ " \(match[a]-1) -> \(attribute_names[a]) [label=\"w=\(match_costs[a,fix(match[a])]/10000)\"];\n" | a in atts]++ 200%["}\nobjective = \(objective)\nwm = \(wm)\nw =\(w)"]; 201 202