1%%% ``Licensed under the Apache License, Version 2.0 (the "License");
2%%% you may not use this file except in compliance with the License.
3%%% You may obtain a copy of the License at
4%%%
5%%%     http://www.apache.org/licenses/LICENSE-2.0
6%%%
7%%% Unless required by applicable law or agreed to in writing, software
8%%% distributed under the License is distributed on an "AS IS" BASIS,
9%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%%% See the License for the specific language governing permissions and
11%%% limitations under the License.
12%%%
13%%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
14%%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
15%%% AB. All Rights Reserved.''
16%%%
17%%%     $Id: mnesia_frag.erl,v 1.1 2008/12/17 09:53:38 mikpe Exp $
18%%%----------------------------------------------------------------------
19%%% Purpose : Support tables so large that they need
20%%%           to be divided into several fragments.
21%%%----------------------------------------------------------------------
22
23%header_doc_include
24
25-module(mnesia_frag).
26-behaviour(mnesia_access).
27
28%% Callback functions when accessed within an activity
29-export([
30	 lock/4,
31	 write/5, delete/5, delete_object/5,
32	 read/5, match_object/5, all_keys/4,
33	 select/5,
34	 index_match_object/6, index_read/6,
35	 foldl/6, foldr/6,
36	 table_info/4
37       ]).
38
39%header_doc_include
40
41-export([
42	 change_table_frag/2,
43	 remove_node/2,
44	 expand_cstruct/1,
45	 lookup_frag_hash/1,
46	 lookup_foreigners/1,
47	 frag_names/1,
48	 set_frag_hash/2,
49	 local_select/4,
50	 remote_select/4
51	]).
52
53-include("mnesia.hrl").
54
55-define(OLD_HASH_MOD, mnesia_frag_old_hash).
56-define(DEFAULT_HASH_MOD, mnesia_frag_hash).
57%%-define(DEFAULT_HASH_MOD, ?OLD_HASH_MOD). %%  BUGBUG: New should be default
58
59-record(frag_state,
60	{foreign_key,
61	 n_fragments,
62	 hash_module,
63	 hash_state}).
64
65%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
66%% Access functions
67
68%impl_doc_include
69
70%% Callback functions which provides transparent
71%% access of fragmented tables from any activity
72%% access context.
73
74lock(ActivityId, Opaque, {table , Tab}, LockKind) ->
75    case frag_names(Tab) of
76	[Tab] ->
77	    mnesia:lock(ActivityId, Opaque, {table, Tab}, LockKind);
78	Frags ->
79	    DeepNs = [mnesia:lock(ActivityId, Opaque, {table, F}, LockKind) ||
80			 F <- Frags],
81	    mnesia_lib:uniq(lists:append(DeepNs))
82    end;
83
84lock(ActivityId, Opaque, LockItem, LockKind) ->
85    mnesia:lock(ActivityId, Opaque, LockItem, LockKind).
86
87write(ActivityId, Opaque, Tab, Rec, LockKind) ->
88    Frag = record_to_frag_name(Tab, Rec),
89    mnesia:write(ActivityId, Opaque, Frag, Rec, LockKind).
90
91delete(ActivityId, Opaque, Tab, Key, LockKind) ->
92    Frag = key_to_frag_name(Tab, Key),
93    mnesia:delete(ActivityId, Opaque, Frag, Key, LockKind).
94
95delete_object(ActivityId, Opaque, Tab, Rec, LockKind) ->
96    Frag = record_to_frag_name(Tab, Rec),
97    mnesia:delete_object(ActivityId, Opaque, Frag, Rec, LockKind).
98
99read(ActivityId, Opaque, Tab, Key, LockKind) ->
100    Frag = key_to_frag_name(Tab, Key),
101    mnesia:read(ActivityId, Opaque, Frag, Key, LockKind).
102
103match_object(ActivityId, Opaque, Tab, HeadPat, LockKind) ->
104    MatchSpec = [{HeadPat, [], ['$_']}],
105    select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
106
107select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
108    do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind).
109
110all_keys(ActivityId, Opaque, Tab, LockKind) ->
111    Match = [mnesia:all_keys(ActivityId, Opaque, Frag, LockKind)
112	     || Frag <- frag_names(Tab)],
113    lists:append(Match).
114
115index_match_object(ActivityId, Opaque, Tab, Pat, Attr, LockKind) ->
116    Match =
117	[mnesia:index_match_object(ActivityId, Opaque, Frag, Pat, Attr, LockKind)
118	 || Frag <- frag_names(Tab)],
119    lists:append(Match).
120
121index_read(ActivityId, Opaque, Tab, Key, Attr, LockKind) ->
122    Match =
123	[mnesia:index_read(ActivityId, Opaque, Frag, Key, Attr, LockKind)
124	     || Frag <- frag_names(Tab)],
125    lists:append(Match).
126
127foldl(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
128    Fun2 = fun(Frag, A) ->
129		   mnesia:foldl(ActivityId, Opaque, Fun, A, Frag, LockKind)
130	   end,
131    lists:foldl(Fun2, Acc, frag_names(Tab)).
132
133foldr(ActivityId, Opaque, Fun, Acc, Tab, LockKind) ->
134    Fun2 = fun(Frag, A) ->
135		   mnesia:foldr(ActivityId, Opaque, Fun, A, Frag, LockKind)
136	   end,
137    lists:foldr(Fun2, Acc, frag_names(Tab)).
138
139table_info(ActivityId, Opaque, {Tab, Key}, Item) ->
140    Frag = key_to_frag_name(Tab, Key),
141    table_info2(ActivityId, Opaque, Tab, Frag, Item);
142table_info(ActivityId, Opaque, Tab, Item) ->
143    table_info2(ActivityId, Opaque, Tab, Tab, Item).
144
145table_info2(ActivityId, Opaque, Tab, Frag, Item) ->
146    case Item of
147	size ->
148	    SumFun = fun({_, Size}, Acc) -> Acc + Size end,
149	    lists:foldl(SumFun, 0, frag_size(ActivityId, Opaque, Tab));
150	memory ->
151	    SumFun = fun({_, Size}, Acc) -> Acc + Size end,
152	    lists:foldl(SumFun, 0, frag_memory(ActivityId, Opaque, Tab));
153	base_table ->
154	    lookup_prop(Tab, base_table);
155	node_pool ->
156	    lookup_prop(Tab, node_pool);
157	n_fragments ->
158	    FH = lookup_frag_hash(Tab),
159	    FH#frag_state.n_fragments;
160	foreign_key ->
161	    FH = lookup_frag_hash(Tab),
162	    FH#frag_state.foreign_key;
163	foreigners ->
164	    lookup_foreigners(Tab);
165	n_ram_copies ->
166	    length(val({Tab, ram_copies}));
167	n_disc_copies ->
168	    length(val({Tab, disc_copies}));
169	n_disc_only_copies ->
170	    length(val({Tab, disc_only_copies}));
171
172	frag_names ->
173	    frag_names(Tab);
174	frag_dist ->
175	    frag_dist(Tab);
176	frag_size ->
177	    frag_size(ActivityId, Opaque, Tab);
178	frag_memory ->
179	    frag_memory(ActivityId, Opaque, Tab);
180	_ ->
181	    mnesia:table_info(ActivityId, Opaque, Frag, Item)
182    end.
183%impl_doc_include
184
185frag_size(ActivityId, Opaque, Tab) ->
186    [{F, remote_table_info(ActivityId, Opaque, F, size)} || F <- frag_names(Tab)].
187
188frag_memory(ActivityId, Opaque, Tab) ->
189    [{F, remote_table_info(ActivityId, Opaque, F, memory)} || F <- frag_names(Tab)].
190
191
192
193remote_table_info(ActivityId, Opaque, Tab, Item) ->
194    N = val({Tab, where_to_read}),
195    case rpc:call(N, mnesia, table_info, [ActivityId, Opaque, Tab, Item]) of
196	{badrpc, _} ->
197	    mnesia:abort({no_exists, Tab, Item});
198	Info ->
199	    Info
200    end.
201
202do_select(ActivityId, Opaque, Tab, MatchSpec, LockKind) ->
203    case ?catch_val({Tab, frag_hash}) of
204	{'EXIT', _} ->
205	    mnesia:select(ActivityId, Opaque, Tab, MatchSpec, LockKind);
206	FH ->
207	    HashState = FH#frag_state.hash_state,
208	    FragNumbers =
209		case FH#frag_state.hash_module of
210		    HashMod when HashMod == ?DEFAULT_HASH_MOD ->
211			?DEFAULT_HASH_MOD:match_spec_to_frag_numbers(HashState, MatchSpec);
212		    HashMod ->
213			HashMod:match_spec_to_frag_numbers(HashState, MatchSpec)
214		end,
215	    N = FH#frag_state.n_fragments,
216	    VerifyFun = fun(F) when integer(F), F >= 1, F =< N -> false;
217			   (_F) -> true
218			end,
219	    case catch lists:filter(VerifyFun, FragNumbers) of
220		[] ->
221		    Fun = fun(Num) ->
222				  Name = n_to_frag_name(Tab, Num),
223				  Node = val({Name, where_to_read}),
224				  mnesia:lock(ActivityId, Opaque, {table, Name}, LockKind),
225				  {Name, Node}
226			  end,
227		    NameNodes = lists:map(Fun, FragNumbers),
228		    SelectAllFun =
229			fun(PatchedMatchSpec) ->
230				Match = [mnesia:dirty_select(Name, PatchedMatchSpec)
231					 || {Name, _Node} <- NameNodes],
232				lists:append(Match)
233			end,
234		    case [{Name, Node} || {Name, Node} <- NameNodes, Node /= node()] of
235			[] ->
236			    %% All fragments are local
237			    mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectAllFun);
238			RemoteNameNodes ->
239			    SelectFun =
240				fun(PatchedMatchSpec) ->
241					Ref = make_ref(),
242					Args = [self(), Ref, RemoteNameNodes, PatchedMatchSpec],
243					Pid = spawn_link(?MODULE, local_select, Args),
244					LocalMatch = [mnesia:dirty_select(Name, PatchedMatchSpec)
245						      || {Name, Node} <- NameNodes, Node == node()],
246					OldSelectFun = fun() -> SelectAllFun(PatchedMatchSpec) end,
247					local_collect(Ref, Pid, lists:append(LocalMatch), OldSelectFun)
248				end,
249			    mnesia:fun_select(ActivityId, Opaque, Tab, MatchSpec, none, '_', SelectFun)
250		    end;
251		BadFrags ->
252		    mnesia:abort({"match_spec_to_frag_numbers: Fragment numbers out of range",
253				  BadFrags, {range, 1, N}})
254	    end
255    end.
256
257local_select(ReplyTo, Ref, RemoteNameNodes, MatchSpec) ->
258    RemoteNodes = mnesia_lib:uniq([Node || {_Name, Node} <- RemoteNameNodes]),
259    Args = [ReplyTo, Ref, RemoteNameNodes, MatchSpec],
260    {Replies, BadNodes} = rpc:multicall(RemoteNodes, ?MODULE, remote_select, Args),
261    case mnesia_lib:uniq(Replies) -- [ok] of
262	[] when BadNodes == [] ->
263	    ReplyTo ! {local_select, Ref, ok};
264	_ when BadNodes /= [] ->
265	    ReplyTo ! {local_select, Ref, {error, {node_not_running, hd(BadNodes)}}};
266	[{badrpc, {'EXIT', Reason}} | _] ->
267	    ReplyTo ! {local_select, Ref, {error, Reason}};
268	[Reason | _] ->
269	    ReplyTo ! {local_select, Ref, {error, Reason}}
270    end,
271    unlink(ReplyTo),
272    exit(normal).
273
274remote_select(ReplyTo, Ref, NameNodes, MatchSpec) ->
275    do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec).
276
277do_remote_select(ReplyTo, Ref, [{Name, Node} | NameNodes], MatchSpec) ->
278    if
279	Node == node() ->
280	    Res = (catch {ok, mnesia:dirty_select(Name, MatchSpec)}),
281	    ReplyTo ! {remote_select, Ref, Node, Res},
282	    do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec);
283	true ->
284	    do_remote_select(ReplyTo, Ref, NameNodes, MatchSpec)
285    end;
286do_remote_select(_ReplyTo, _Ref, [], _MatchSpec) ->
287    ok.
288
289local_collect(Ref, Pid, LocalMatch, OldSelectFun) ->
290    receive
291	{local_select, Ref, LocalRes} ->
292	    remote_collect(Ref, LocalRes, LocalMatch, OldSelectFun);
293	{'EXIT', Pid, Reason} ->
294	    remote_collect(Ref, {error, Reason}, [], OldSelectFun)
295    end.
296
297remote_collect(Ref, LocalRes = ok, Acc, OldSelectFun) ->
298    receive
299	{remote_select, Ref, Node, RemoteRes} ->
300	    case RemoteRes of
301		{ok, RemoteMatch} ->
302		    remote_collect(Ref, LocalRes, RemoteMatch ++ Acc, OldSelectFun);
303		_ ->
304		    remote_collect(Ref, {error, {node_not_running, Node}}, [], OldSelectFun)
305	    end
306    after 0 ->
307	    Acc
308    end;
309remote_collect(Ref, LocalRes = {error, Reason}, _Acc, OldSelectFun) ->
310    receive
311	{remote_select, Ref, _Node, _RemoteRes} ->
312	    remote_collect(Ref, LocalRes, [], OldSelectFun)
313    after 0 ->
314	    mnesia:abort(Reason)
315    end.
316
317%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
318%% Returns a list of cstructs
319
320expand_cstruct(Cs) ->
321    expand_cstruct(Cs, create).
322
323expand_cstruct(Cs, Mode) ->
324    Tab = Cs#cstruct.name,
325    Props = Cs#cstruct.frag_properties,
326    mnesia_schema:verify({alt, [nil, list]}, mnesia_lib:etype(Props),
327			 {badarg, Tab, Props}),
328    %% Verify keys
329    ValidKeys = [foreign_key, n_fragments, node_pool,
330		 n_ram_copies, n_disc_copies, n_disc_only_copies,
331		 hash_module, hash_state],
332    Keys = mnesia_schema:check_keys(Tab, Props, ValidKeys),
333    mnesia_schema:check_duplicates(Tab, Keys),
334
335    %% Pick fragmentation props
336    ForeignKey = mnesia_schema:pick(Tab, foreign_key, Props, undefined),
337    {ForeignKey2, N, Pool, DefaultNR, DefaultND, DefaultNDO} =
338	pick_props(Tab, Cs, ForeignKey),
339
340    %% Verify node_pool
341    BadPool = {bad_type, Tab, {node_pool, Pool}},
342    mnesia_schema:verify(list, mnesia_lib:etype(Pool), BadPool),
343    NotAtom = fun(A) when atom(A) -> false;
344		 (_A) -> true
345	      end,
346    mnesia_schema:verify([], [P || P <- Pool, NotAtom(P)], BadPool),
347
348    NR  = mnesia_schema:pick(Tab, n_ram_copies, Props, 0),
349    ND  = mnesia_schema:pick(Tab, n_disc_copies, Props, 0),
350    NDO = mnesia_schema:pick(Tab, n_disc_only_copies, Props, 0),
351
352    PosInt = fun(I) when integer(I), I >= 0 -> true;
353		(_I) -> false
354	     end,
355    mnesia_schema:verify(true, PosInt(NR),
356			 {bad_type, Tab, {n_ram_copies, NR}}),
357    mnesia_schema:verify(true, PosInt(ND),
358			 {bad_type, Tab, {n_disc_copies, ND}}),
359    mnesia_schema:verify(true, PosInt(NDO),
360			 {bad_type, Tab, {n_disc_only_copies, NDO}}),
361
362    %% Verify n_fragments
363    Cs2 = verify_n_fragments(N, Cs, Mode),
364
365    %% Verify hash callback
366    HashMod = mnesia_schema:pick(Tab, hash_module, Props, ?DEFAULT_HASH_MOD),
367    HashState = mnesia_schema:pick(Tab, hash_state, Props, undefined),
368    HashState2 = HashMod:init_state(Tab, HashState), %% BUGBUG: Catch?
369
370    FH = #frag_state{foreign_key = ForeignKey2,
371		     n_fragments = 1,
372		     hash_module = HashMod,
373		     hash_state  = HashState2},
374    if
375	NR == 0, ND == 0, NDO == 0 ->
376	    do_expand_cstruct(Cs2, FH, N, Pool, DefaultNR, DefaultND, DefaultNDO, Mode);
377	true ->
378	    do_expand_cstruct(Cs2, FH, N, Pool, NR, ND, NDO, Mode)
379    end.
380
381do_expand_cstruct(Cs, FH, N, Pool, NR, ND, NDO, Mode) ->
382    Tab = Cs#cstruct.name,
383
384    LC = Cs#cstruct.local_content,
385    mnesia_schema:verify(false, LC,
386			 {combine_error, Tab, {local_content, LC}}),
387
388    Snmp = Cs#cstruct.snmp,
389    mnesia_schema:verify([], Snmp,
390			 {combine_error, Tab, {snmp, Snmp}}),
391
392    %% Add empty fragments
393    CommonProps = [{base_table, Tab}],
394    Cs2 = Cs#cstruct{frag_properties = lists:sort(CommonProps)},
395    expand_frag_cstructs(N, NR, ND, NDO, Cs2, Pool, Pool, FH, Mode).
396
397verify_n_fragments(N, Cs, Mode) when integer(N), N >= 1 ->
398    case Mode of
399	create ->
400	    Cs#cstruct{ram_copies = [],
401		       disc_copies = [],
402		       disc_only_copies = []};
403	activate  ->
404	    Reason = {combine_error, Cs#cstruct.name, {n_fragments, N}},
405	    mnesia_schema:verify(1, N, Reason),
406	    Cs
407    end;
408verify_n_fragments(N, Cs, _Mode) ->
409    mnesia:abort({bad_type, Cs#cstruct.name, {n_fragments, N}}).
410
411pick_props(Tab, Cs, {ForeignTab, Attr}) ->
412    mnesia_schema:verify(true, ForeignTab /= Tab,
413			 {combine_error, Tab, {ForeignTab, Attr}}),
414    Props = Cs#cstruct.frag_properties,
415    Attrs = Cs#cstruct.attributes,
416
417    ForeignKey  = lookup_prop(ForeignTab, foreign_key),
418    ForeignN    = lookup_prop(ForeignTab, n_fragments),
419    ForeignPool = lookup_prop(ForeignTab, node_pool),
420    N           = mnesia_schema:pick(Tab, n_fragments, Props,  ForeignN),
421    Pool        = mnesia_schema:pick(Tab, node_pool, Props, ForeignPool),
422
423    mnesia_schema:verify(ForeignN, N,
424			 {combine_error, Tab, {n_fragments, N},
425			  ForeignTab, {n_fragments, ForeignN}}),
426
427    mnesia_schema:verify(ForeignPool, Pool,
428			 {combine_error, Tab, {node_pool, Pool},
429			  ForeignTab, {node_pool, ForeignPool}}),
430
431    mnesia_schema:verify(undefined, ForeignKey,
432			 {combine_error, Tab,
433			  "Multiple levels of foreign_key dependencies",
434			  {ForeignTab, Attr}, ForeignKey}),
435
436    Key = {ForeignTab, mnesia_schema:attr_to_pos(Attr, Attrs)},
437    DefaultNR = length(val({ForeignTab, ram_copies})),
438    DefaultND = length(val({ForeignTab, disc_copies})),
439    DefaultNDO = length(val({ForeignTab, disc_only_copies})),
440    {Key, N, Pool, DefaultNR, DefaultND, DefaultNDO};
441pick_props(Tab, Cs, undefined) ->
442    Props = Cs#cstruct.frag_properties,
443    DefaultN = 1,
444    DefaultPool = mnesia:system_info(db_nodes),
445    N    = mnesia_schema:pick(Tab, n_fragments, Props,  DefaultN),
446    Pool = mnesia_schema:pick(Tab, node_pool, Props, DefaultPool),
447    DefaultNR = 1,
448    DefaultND = 0,
449    DefaultNDO = 0,
450    {undefined, N, Pool, DefaultNR, DefaultND, DefaultNDO};
451pick_props(Tab, _Cs, BadKey) ->
452    mnesia:abort({bad_type, Tab, {foreign_key, BadKey}}).
453
454expand_frag_cstructs(N, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode)
455  when N > 1, Mode == create ->
456    Frag = n_to_frag_name(CommonCs#cstruct.name, N),
457    Cs = CommonCs#cstruct{name = Frag},
458    {Cs2, RevModDist, RestDist} = set_frag_nodes(NR, ND, NDO, Cs, Dist, []),
459    ModDist = lists:reverse(RevModDist),
460    Dist2 = rearrange_dist(Cs, ModDist, RestDist, Pool),
461    %% Adjusts backwards, but it doesn't matter.
462    {FH2, _FromFrags, _AdditionalWriteFrags} = adjust_before_split(FH),
463    CsList = expand_frag_cstructs(N - 1, NR, ND, NDO, CommonCs, Dist2, Pool, FH2, Mode),
464    [Cs2 | CsList];
465expand_frag_cstructs(1, NR, ND, NDO, CommonCs, Dist, Pool, FH, Mode) ->
466    BaseProps = CommonCs#cstruct.frag_properties ++
467	[{foreign_key, FH#frag_state.foreign_key},
468	 {hash_module, FH#frag_state.hash_module},
469	 {hash_state,  FH#frag_state.hash_state},
470	 {n_fragments, FH#frag_state.n_fragments},
471	 {node_pool, Pool}
472	],
473    BaseCs = CommonCs#cstruct{frag_properties = lists:sort(BaseProps)},
474    case Mode of
475	activate ->
476	    [BaseCs];
477	create ->
478	    {BaseCs2, _, _} = set_frag_nodes(NR, ND, NDO, BaseCs, Dist, []),
479	    [BaseCs2]
480    end.
481
482set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NR > 0 ->
483    Pos = #cstruct.ram_copies,
484    {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
485    set_frag_nodes(NR - 1, ND, NDO, Cs2, Tail, [Head2 | Acc]);
486set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when ND > 0 ->
487    Pos = #cstruct.disc_copies,
488    {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
489    set_frag_nodes(NR, ND - 1, NDO, Cs2, Tail, [Head2 | Acc]);
490set_frag_nodes(NR, ND, NDO, Cs, [Head | Tail], Acc) when NDO > 0 ->
491    Pos = #cstruct.disc_only_copies,
492    {Cs2, Head2} = set_frag_node(Cs, Pos, Head),
493    set_frag_nodes(NR, ND, NDO - 1, Cs2, Tail, [Head2 | Acc]);
494set_frag_nodes(0, 0, 0, Cs, RestDist, ModDist) ->
495    {Cs, ModDist, RestDist};
496set_frag_nodes(_, _, _, Cs, [], _) ->
497    mnesia:abort({combine_error,  Cs#cstruct.name, "Too few nodes in node_pool"}).
498
499set_frag_node(Cs, Pos, Head) ->
500    Ns = element(Pos, Cs),
501    {Node, Count2} =
502	case Head of
503	    {N, Count} when atom(N), integer(Count), Count >= 0 ->
504		{N, Count + 1};
505	    N when atom(N) ->
506		{N, 1};
507	    BadNode ->
508		mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
509	end,
510    Cs2 = setelement(Pos, Cs, [Node | Ns]),
511    {Cs2, {Node, Count2}}.
512
513rearrange_dist(Cs, [{Node, Count} | ModDist], Dist, Pool) ->
514    Dist2 = insert_dist(Cs, Node, Count, Dist, Pool),
515    rearrange_dist(Cs, ModDist, Dist2, Pool);
516rearrange_dist(_Cs, [], Dist, _) ->
517    Dist.
518
519insert_dist(Cs, Node, Count, [Head | Tail], Pool) ->
520    case Head of
521	{Node2, Count2} when atom(Node2), integer(Count2), Count2 >= 0 ->
522	    case node_diff(Node, Count, Node2, Count2, Pool) of
523		less ->
524		    [{Node, Count}, Head | Tail];
525		greater ->
526		    [Head | insert_dist(Cs, Node, Count, Tail, Pool)]
527	    end;
528	Node2 when atom(Node2) ->
529	    insert_dist(Cs, Node, Count, [{Node2, 0} | Tail], Pool);
530	BadNode ->
531	    mnesia:abort({bad_type, Cs#cstruct.name, BadNode})
532    end;
533insert_dist(_Cs, Node, Count, [], _Pool) ->
534    [{Node, Count}];
535insert_dist(_Cs, _Node, _Count, Dist, _Pool) ->
536    mnesia:abort({bad_type, Dist}).
537
538node_diff(_Node, Count, _Node2, Count2, _Pool) when Count < Count2 ->
539    less;
540node_diff(Node, Count, Node2, Count2, Pool) when Count == Count2 ->
541    Pos = list_pos(Node, Pool, 1),
542    Pos2 = list_pos(Node2, Pool, 1),
543    if
544	Pos < Pos2 ->
545	    less;
546	Pos > Pos2 ->
547	    greater
548    end;
549node_diff(_Node, Count, _Node2, Count2, _Pool) when Count > Count2 ->
550    greater.
551
552%% Returns position of element in list
553list_pos(H,  [H | _T], Pos) ->
554    Pos;
555list_pos(E,  [_H | T], Pos) ->
556    list_pos(E,  T, Pos + 1).
557
558%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
559%% Switch function for changing of table fragmentation
560%%
561%% Returns a list of lists of schema ops
562
563change_table_frag(Tab, {activate, FragProps}) ->
564    make_activate(Tab, FragProps);
565change_table_frag(Tab, deactivate) ->
566    make_deactivate(Tab);
567change_table_frag(Tab,  {add_frag, SortedNodes}) ->
568    make_multi_add_frag(Tab, SortedNodes);
569change_table_frag(Tab,  del_frag) ->
570    make_multi_del_frag(Tab);
571change_table_frag(Tab,  {add_node, Node}) ->
572    make_multi_add_node(Tab, Node);
573change_table_frag(Tab,  {del_node, Node}) ->
574    make_multi_del_node(Tab, Node);
575change_table_frag(Tab,  Change) ->
576    mnesia:abort({bad_type, Tab, Change}).
577
578%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
579%% Turn a normal table into a fragmented table
580%%
581%% The storage type must be the same on all nodes
582
583make_activate(Tab, Props) ->
584    Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
585    mnesia_schema:ensure_active(Cs),
586    case Cs#cstruct.frag_properties of
587	[] ->
588	    Cs2 = Cs#cstruct{frag_properties = Props},
589	    [Cs3] = expand_cstruct(Cs2, activate),
590	    TabDef = mnesia_schema:cs2list(Cs3),
591	    Op = {op, change_table_frag, activate, TabDef},
592	    [[Op]];
593	BadProps ->
594	    mnesia:abort({already_exists, Tab, {frag_properties, BadProps}})
595    end.
596
597%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
598%% Turn a table into a normal defragmented table
599
600make_deactivate(Tab) ->
601    Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
602    mnesia_schema:ensure_active(Cs),
603    Foreigners = lookup_foreigners(Tab),
604    BaseTab = lookup_prop(Tab, base_table),
605    FH = lookup_frag_hash(Tab),
606    if
607	BaseTab /= Tab ->
608	    mnesia:abort({combine_error, Tab, "Not a base table"});
609	Foreigners /= [] ->
610	    mnesia:abort({combine_error, Tab, "Too many foreigners", Foreigners});
611	FH#frag_state.n_fragments > 1 ->
612	    mnesia:abort({combine_error, Tab, "Too many fragments"});
613	true ->
614	    Cs2 = Cs#cstruct{frag_properties = []},
615	    TabDef = mnesia_schema:cs2list(Cs2),
616	    Op = {op, change_table_frag, deactivate, TabDef},
617	    [[Op]]
618    end.
619
620%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
621%% Add a fragment to a fragmented table  and fill it with half of
622%% the records from one of the old fragments
623
624make_multi_add_frag(Tab, SortedNs) when list(SortedNs) ->
625    verify_multi(Tab),
626    Ops = make_add_frag(Tab, SortedNs),
627
628    %% Propagate to foreigners
629    MoreOps = [make_add_frag(T, SortedNs) || T <- lookup_foreigners(Tab)],
630    [Ops | MoreOps];
631make_multi_add_frag(Tab, SortedNs) ->
632    mnesia:abort({bad_type, Tab, SortedNs}).
633
634verify_multi(Tab) ->
635    FH = lookup_frag_hash(Tab),
636    ForeignKey = FH#frag_state.foreign_key,
637    mnesia_schema:verify(undefined, ForeignKey,
638			 {combine_error, Tab,
639			  "Op only allowed via foreign table",
640			  {foreign_key, ForeignKey}}).
641
642make_frag_names_and_acquire_locks(Tab, N, FragIndecies, DoNotLockN) ->
643    mnesia_schema:get_tid_ts_and_lock(Tab, write),
644    Fun = fun(Index, FN) ->
645		  if
646		      DoNotLockN == true, Index == N ->
647			  Name = n_to_frag_name(Tab, Index),
648			  setelement(Index, FN, Name);
649		      true ->
650			  Name = n_to_frag_name(Tab, Index),
651			  mnesia_schema:get_tid_ts_and_lock(Name, write),
652			  setelement(Index , FN, Name)
653		  end
654	  end,
655    FragNames = erlang:make_tuple(N, undefined),
656    lists:foldl(Fun, FragNames, FragIndecies).
657
658make_add_frag(Tab, SortedNs) ->
659    Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
660    mnesia_schema:ensure_active(Cs),
661    FH = lookup_frag_hash(Tab),
662    {FH2, FromIndecies, WriteIndecies} = adjust_before_split(FH),
663    N = FH2#frag_state.n_fragments,
664    FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, true),
665    NewFrag = element(N, FragNames),
666
667    NR = length(Cs#cstruct.ram_copies),
668    ND = length(Cs#cstruct.disc_copies),
669    NDO = length(Cs#cstruct.disc_only_copies),
670    NewCs = Cs#cstruct{name = NewFrag,
671		       frag_properties = [{base_table, Tab}],
672		       ram_copies = [],
673		       disc_copies = [],
674		       disc_only_copies = []},
675    {NewCs2, _, _} = set_frag_nodes(NR, ND, NDO, NewCs, SortedNs, []),
676    [NewOp] = mnesia_schema:make_create_table(NewCs2),
677
678    SplitOps = split(Tab, FH2, FromIndecies, FragNames, []),
679
680    Cs2 = replace_frag_hash(Cs, FH2),
681    TabDef = mnesia_schema:cs2list(Cs2),
682    BaseOp = {op, change_table_frag, {add_frag, SortedNs}, TabDef},
683
684    [BaseOp, NewOp | SplitOps].
685
686replace_frag_hash(Cs, FH) when record(FH, frag_state) ->
687    Fun = fun(Prop) ->
688		  case Prop of
689		      {n_fragments, _} ->
690			  {true, {n_fragments, FH#frag_state.n_fragments}};
691		      {hash_module, _} ->
692			  {true, {hash_module, FH#frag_state.hash_module}};
693		      {hash_state, _} ->
694			  {true, {hash_state, FH#frag_state.hash_state}};
695		      {next_n_to_split, _} ->
696			  false;
697		      {n_doubles, _} ->
698			  false;
699		      _ ->
700			  true
701		  end
702	  end,
703    Props = lists:zf(Fun, Cs#cstruct.frag_properties),
704    Cs#cstruct{frag_properties = Props}.
705
706%% Adjust table info before split
707adjust_before_split(FH) ->
708    HashState = FH#frag_state.hash_state,
709    {HashState2, FromFrags, AdditionalWriteFrags} =
710	case FH#frag_state.hash_module of
711	    HashMod when HashMod == ?DEFAULT_HASH_MOD ->
712		?DEFAULT_HASH_MOD:add_frag(HashState);
713	    HashMod ->
714		HashMod:add_frag(HashState)
715	end,
716    N = FH#frag_state.n_fragments + 1,
717    FromFrags2 = (catch lists:sort(FromFrags)),
718    UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
719    VerifyFun = fun(F) when integer(F), F >= 1, F =< N -> false;
720		   (_F) -> true
721		end,
722    case catch lists:filter(VerifyFun, UnionFrags) of
723	[] ->
724	    FH2 = FH#frag_state{n_fragments = N,
725				hash_state  = HashState2},
726	    {FH2, FromFrags2, UnionFrags};
727	BadFrags ->
728	    mnesia:abort({"add_frag: Fragment numbers out of range",
729			  BadFrags, {range, 1, N}})
730    end.
731
732split(Tab, FH, [SplitN | SplitNs], FragNames, Ops) ->
733    SplitFrag = element(SplitN, FragNames),
734    Pat = mnesia:table_info(SplitFrag, wild_pattern),
735    {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
736    Recs = mnesia:match_object(Tid, Ts, SplitFrag, Pat, read),
737    Ops2 = do_split(FH, SplitN, FragNames, Recs, Ops),
738    split(Tab, FH, SplitNs, FragNames, Ops2);
739split(_Tab, _FH, [], _FragNames, Ops) ->
740    Ops.
741
742%% Perform the split of the table
743do_split(FH, OldN, FragNames, [Rec | Recs], Ops) ->
744    Pos = key_pos(FH),
745    HashKey = element(Pos, Rec),
746    case key_to_n(FH, HashKey) of
747	NewN when NewN == OldN ->
748	    %% Keep record in the same fragment. No need to move it.
749	    do_split(FH, OldN, FragNames, Recs, Ops);
750	NewN ->
751	    case element(NewN, FragNames) of
752		NewFrag when NewFrag /= undefined ->
753		    OldFrag = element(OldN, FragNames),
754		    Key = element(2, Rec),
755		    NewOid = {NewFrag, Key},
756		    OldOid = {OldFrag, Key},
757		    Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
758			    {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
759		    do_split(FH, OldN, FragNames, Recs, Ops2);
760		_NewFrag ->
761		    %% Tried to move record to fragment that not is locked
762		    mnesia:abort({"add_frag: Fragment not locked", NewN})
763	    end
764    end;
765do_split(_FH, _OldN, _FragNames, [], Ops) ->
766    Ops.
767
768%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
769%% Delete a fragment from a fragmented table
770%% and merge its records with an other fragment
771
772make_multi_del_frag(Tab) ->
773    verify_multi(Tab),
774    Ops = make_del_frag(Tab),
775
776    %% Propagate to foreigners
777    MoreOps = [make_del_frag(T) || T <- lookup_foreigners(Tab)],
778    [Ops | MoreOps].
779
780make_del_frag(Tab) ->
781    FH = lookup_frag_hash(Tab),
782    case FH#frag_state.n_fragments of
783	N when N > 1 ->
784	    Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
785	    mnesia_schema:ensure_active(Cs),
786	    {FH2, FromIndecies, WriteIndecies} = adjust_before_merge(FH),
787	    FragNames = make_frag_names_and_acquire_locks(Tab, N, WriteIndecies, false),
788
789	    MergeOps = merge(Tab, FH2, FromIndecies, FragNames, []),
790	    LastFrag = element(N, FragNames),
791	    [LastOp] = mnesia_schema:make_delete_table(LastFrag, single_frag),
792	    Cs2 = replace_frag_hash(Cs, FH2),
793	    TabDef = mnesia_schema:cs2list(Cs2),
794	    BaseOp = {op, change_table_frag, del_frag, TabDef},
795	    [BaseOp, LastOp | MergeOps];
796	_ ->
797	    %% Cannot remove the last fragment
798	    mnesia:abort({no_exists, Tab})
799    end.
800
801%% Adjust tab info before merge
802adjust_before_merge(FH) ->
803    HashState = FH#frag_state.hash_state,
804    {HashState2, FromFrags, AdditionalWriteFrags} =
805	case FH#frag_state.hash_module of
806	    HashMod when HashMod == ?DEFAULT_HASH_MOD ->
807		?DEFAULT_HASH_MOD:del_frag(HashState);
808	    HashMod ->
809		HashMod:del_frag(HashState)
810	end,
811    N = FH#frag_state.n_fragments,
812    FromFrags2 = (catch lists:sort(FromFrags)),
813    UnionFrags = (catch lists:merge(FromFrags2, lists:sort(AdditionalWriteFrags))),
814    VerifyFun = fun(F) when integer(F), F >= 1, F =< N -> false;
815		   (_F) -> true
816		end,
817    case catch lists:filter(VerifyFun, UnionFrags) of
818	[] ->
819	    case lists:member(N, FromFrags2) of
820		true ->
821		    FH2 = FH#frag_state{n_fragments = N - 1,
822					hash_state  = HashState2},
823		    {FH2, FromFrags2, UnionFrags};
824		false ->
825		    mnesia:abort({"del_frag: Last fragment number not included", N})
826	    end;
827	BadFrags ->
828	    mnesia:abort({"del_frag: Fragment numbers out of range",
829			  BadFrags, {range, 1, N}})
830    end.
831
832merge(Tab, FH, [FromN | FromNs], FragNames, Ops) ->
833    FromFrag = element(FromN, FragNames),
834    Pat = mnesia:table_info(FromFrag, wild_pattern),
835    {_Mod, Tid, Ts} = mnesia_schema:get_tid_ts_and_lock(Tab, none),
836    Recs = mnesia:match_object(Tid, Ts, FromFrag, Pat, read),
837    Ops2 = do_merge(FH, FromN, FragNames, Recs, Ops),
838    merge(Tab, FH, FromNs, FragNames, Ops2);
839merge(_Tab, _FH, [], _FragNames, Ops) ->
840    Ops.
841
842%% Perform the merge of the table
843do_merge(FH, OldN, FragNames, [Rec | Recs], Ops) ->
844    Pos = key_pos(FH),
845    LastN = FH#frag_state.n_fragments + 1,
846    HashKey = element(Pos, Rec),
847    case key_to_n(FH, HashKey) of
848	NewN when NewN == LastN ->
849	    %% Tried to leave a record in the fragment that is to be deleted
850	    mnesia:abort({"del_frag: Fragment number out of range",
851			  NewN, {range, 1, LastN}});
852	NewN when NewN == OldN ->
853	    %% Keep record in the same fragment. No need to move it.
854	    do_merge(FH, OldN, FragNames, Recs, Ops);
855	NewN when OldN == LastN ->
856	    %% Move record from the fragment that is to be deleted
857	    %% No need to create a delete op for each record.
858	    case element(NewN, FragNames) of
859		NewFrag when NewFrag /= undefined ->
860		    Key = element(2, Rec),
861		    NewOid = {NewFrag, Key},
862		    Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}} | Ops],
863		    do_merge(FH, OldN, FragNames, Recs, Ops2);
864		_NewFrag ->
865		    %% Tried to move record to fragment that not is locked
866		    mnesia:abort({"del_frag: Fragment not locked", NewN})
867	    end;
868	NewN ->
869	    case element(NewN, FragNames) of
870		NewFrag when NewFrag /= undefined ->
871		    OldFrag = element(OldN, FragNames),
872		    Key = element(2, Rec),
873		    NewOid = {NewFrag, Key},
874		    OldOid = {OldFrag, Key},
875		    Ops2 = [{op, rec, unknown, {NewOid, [Rec], write}},
876			    {op, rec, unknown, {OldOid, [OldOid], delete}} | Ops],
877		    do_merge(FH, OldN, FragNames, Recs, Ops2);
878		_NewFrag ->
879		    %% Tried to move record to fragment that not is locked
880		    mnesia:abort({"del_frag: Fragment not locked", NewN})
881	    end
882    end;
883 do_merge(_FH, _OldN, _FragNames, [], Ops) ->
884   Ops.
885
886%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
887%% Add a node to the node pool of a fragmented table
888
889make_multi_add_node(Tab, Node)  ->
890    verify_multi(Tab),
891    Ops = make_add_node(Tab, Node),
892
893    %% Propagate to foreigners
894    MoreOps = [make_add_node(T, Node) || T <- lookup_foreigners(Tab)],
895    [Ops | MoreOps].
896
897make_add_node(Tab, Node) when atom(Node)  ->
898    Pool = lookup_prop(Tab, node_pool),
899    case lists:member(Node, Pool) of
900	false ->
901	    Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
902	    Pool2 = Pool ++ [Node],
903	    Props = Cs#cstruct.frag_properties,
904	    Props2 = lists:keyreplace(node_pool, 1, Props, {node_pool, Pool2}),
905	    Cs2 = Cs#cstruct{frag_properties = Props2},
906	    TabDef = mnesia_schema:cs2list(Cs2),
907	    Op = {op, change_table_frag, {add_node, Node}, TabDef},
908	    [Op];
909	true ->
910	    mnesia:abort({already_exists, Tab, Node})
911    end;
912make_add_node(Tab, Node) ->
913    mnesia:abort({bad_type, Tab, Node}).
914
915%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
916%% Delet a node from the node pool of a fragmented table
917
918make_multi_del_node(Tab, Node)  ->
919    verify_multi(Tab),
920    Ops = make_del_node(Tab, Node),
921
922    %% Propagate to foreigners
923    MoreOps = [make_del_node(T, Node) || T <- lookup_foreigners(Tab)],
924    [Ops | MoreOps].
925
926make_del_node(Tab, Node) when atom(Node) ->
927    Cs = mnesia_schema:incr_version(val({Tab, cstruct})),
928    mnesia_schema:ensure_active(Cs),
929    Pool = lookup_prop(Tab, node_pool),
930    case lists:member(Node, Pool) of
931	true ->
932	    Pool2 = Pool -- [Node],
933	    Props = lists:keyreplace(node_pool, 1, Cs#cstruct.frag_properties, {node_pool, Pool2}),
934	    Cs2 = Cs#cstruct{frag_properties = Props},
935	    TabDef = mnesia_schema:cs2list(Cs2),
936	    Op = {op, change_table_frag, {del_node, Node}, TabDef},
937	    [Op];
938	false ->
939	    mnesia:abort({no_exists, Tab, Node})
940    end;
941make_del_node(Tab, Node) ->
942    mnesia:abort({bad_type, Tab, Node}).
943
944%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
945%% Special case used to remove all references to a node during
946%% mnesia:del_table_copy(schema, Node)
947
948remove_node(Node, Cs) ->
949    Tab = Cs#cstruct.name,
950    case is_top_frag(Tab) of
951	false ->
952	    {Cs, false};
953	true ->
954	    Pool = lookup_prop(Tab, node_pool),
955	    case lists:member(Node, Pool) of
956		true ->
957		    Pool2 = Pool -- [Node],
958		    Props = lists:keyreplace(node_pool, 1,
959					     Cs#cstruct.frag_properties,
960					     {node_pool, Pool2}),
961		    {Cs#cstruct{frag_properties = Props}, true};
962		false ->
963		    {Cs, false}
964	    end
965    end.
966
967%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
968%% Helpers
969
970val(Var) ->
971    case ?catch_val(Var) of
972	{'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason);
973	Value -> Value
974    end.
975
976set_frag_hash(Tab, Props) ->
977    case props_to_frag_hash(Tab, Props) of
978	FH when record(FH, frag_state) ->
979	    mnesia_lib:set({Tab, frag_hash}, FH);
980	no_hash ->
981	    mnesia_lib:unset({Tab, frag_hash})
982    end.
983
984props_to_frag_hash(_Tab, []) ->
985    no_hash;
986props_to_frag_hash(Tab, Props) ->
987    case mnesia_schema:pick(Tab, base_table, Props, undefined) of
988	T when T == Tab ->
989	    Foreign = mnesia_schema:pick(Tab, foreign_key, Props, must),
990	    N = mnesia_schema:pick(Tab, n_fragments, Props, must),
991
992	    case mnesia_schema:pick(Tab, hash_module, Props, undefined) of
993		undefined ->
994		    Split = mnesia_schema:pick(Tab, next_n_to_split, Props, must),
995		    Doubles = mnesia_schema:pick(Tab, n_doubles, Props, must),
996		    FH = {frag_hash, Foreign, N, Split, Doubles},
997		    HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
998		    #frag_state{foreign_key = Foreign,
999				n_fragments = N,
1000				hash_module = ?OLD_HASH_MOD,
1001				hash_state  = HashState};
1002		HashMod ->
1003		    HashState = mnesia_schema:pick(Tab, hash_state, Props, must),
1004		    #frag_state{foreign_key = Foreign,
1005				n_fragments = N,
1006				hash_module = HashMod,
1007				hash_state  = HashState}
1008		    %% Old style. Kept for backwards compatibility.
1009	    end;
1010	_ ->
1011	    no_hash
1012    end.
1013
1014lookup_prop(Tab, Prop) ->
1015    Props = val({Tab, frag_properties}),
1016    case lists:keysearch(Prop, 1,  Props) of
1017	{value, {Prop, Val}} ->
1018	    Val;
1019	false ->
1020	    mnesia:abort({no_exists, Tab, Prop, {frag_properties, Props}})
1021    end.
1022
1023lookup_frag_hash(Tab) ->
1024    case ?catch_val({Tab, frag_hash}) of
1025	FH when record(FH, frag_state) ->
1026	    FH;
1027	{frag_hash, K, N, _S, _D} = FH ->
1028	    %% Old style. Kept for backwards compatibility.
1029	    HashState = ?OLD_HASH_MOD:init_state(Tab, FH),
1030	    #frag_state{foreign_key = K,
1031			n_fragments = N,
1032			hash_module = ?OLD_HASH_MOD,
1033			hash_state  = HashState};
1034	{'EXIT', _} ->
1035	    mnesia:abort({no_exists, Tab, frag_properties, frag_hash})
1036    end.
1037
1038is_top_frag(Tab) ->
1039    case ?catch_val({Tab, frag_hash}) of
1040	{'EXIT', _} ->
1041	    false;
1042	_ ->
1043	    [] == lookup_foreigners(Tab)
1044    end.
1045
1046%% Returns a list of tables
1047lookup_foreigners(Tab) ->
1048    %% First field in HashPat is either frag_hash or frag_state
1049    HashPat = {'_', {Tab, '_'}, '_', '_', '_'},
1050    [T || [T] <- ?ets_match(mnesia_gvar, {{'$1', frag_hash}, HashPat})].
1051
1052%% Returns name of fragment table
1053record_to_frag_name(Tab, Rec) ->
1054    case ?catch_val({Tab, frag_hash}) of
1055	{'EXIT', _} ->
1056	    Tab;
1057	FH ->
1058	    Pos = key_pos(FH),
1059	    Key = element(Pos, Rec),
1060	    N = key_to_n(FH, Key),
1061	    n_to_frag_name(Tab, N)
1062    end.
1063
1064key_pos(FH) ->
1065    case FH#frag_state.foreign_key of
1066	undefined ->
1067	    2;
1068	{_ForeignTab, Pos} ->
1069	    Pos
1070    end.
1071
1072%% Returns name of fragment table
1073key_to_frag_name({BaseTab, _} = Tab, Key) ->
1074    N = key_to_frag_number(Tab, Key),
1075    n_to_frag_name(BaseTab, N);
1076key_to_frag_name(Tab, Key) ->
1077    N = key_to_frag_number(Tab, Key),
1078    n_to_frag_name(Tab, N).
1079
1080%% Returns name of fragment table
1081n_to_frag_name(Tab, 1) ->
1082    Tab;
1083n_to_frag_name(Tab, N) when atom(Tab), integer(N) ->
1084    list_to_atom(atom_to_list(Tab) ++ "_frag" ++ integer_to_list(N));
1085n_to_frag_name(Tab, N) ->
1086    mnesia:abort({bad_type, Tab, N}).
1087
1088%% Returns name of fragment table
1089key_to_frag_number({Tab, ForeignKey}, _Key) ->
1090    FH = val({Tab, frag_hash}),
1091    case FH#frag_state.foreign_key of
1092	{_ForeignTab, _Pos} ->
1093	    key_to_n(FH, ForeignKey);
1094	undefined ->
1095	    mnesia:abort({combine_error, Tab, frag_properties,
1096			  {foreign_key, undefined}})
1097    end;
1098key_to_frag_number(Tab, Key) ->
1099    case ?catch_val({Tab, frag_hash}) of
1100	{'EXIT', _} ->
1101	    1;
1102	FH ->
1103	    key_to_n(FH, Key)
1104    end.
1105
1106%% Returns fragment number
1107key_to_n(FH, Key) ->
1108    HashState = FH#frag_state.hash_state,
1109    N =
1110	case FH#frag_state.hash_module of
1111	    HashMod when HashMod == ?DEFAULT_HASH_MOD ->
1112		?DEFAULT_HASH_MOD:key_to_frag_number(HashState, Key);
1113	    HashMod ->
1114		HashMod:key_to_frag_number(HashState, Key)
1115	end,
1116    if
1117	integer(N), N >= 1, N =< FH#frag_state.n_fragments ->
1118	    N;
1119	true ->
1120	    mnesia:abort({"key_to_frag_number: Fragment number out of range",
1121			  N, {range, 1, FH#frag_state.n_fragments}})
1122    end.
1123
1124%% Returns a list of frament table names
1125frag_names(Tab) ->
1126    case ?catch_val({Tab, frag_hash}) of
1127	{'EXIT', _} ->
1128	    [Tab];
1129	FH ->
1130	    N = FH#frag_state.n_fragments,
1131	    frag_names(Tab, N, [])
1132    end.
1133
1134frag_names(Tab, 1, Acc) ->
1135    [Tab | Acc];
1136frag_names(Tab, N, Acc) ->
1137    Frag = n_to_frag_name(Tab, N),
1138    frag_names(Tab, N - 1, [Frag | Acc]).
1139
1140%% Returns a list of {Node, FragCount} tuples
1141%% sorted on FragCounts
1142frag_dist(Tab) ->
1143    Pool = lookup_prop(Tab, node_pool),
1144    Dist = [{good, Node, 0} || Node <- Pool],
1145    Dist2 = count_frag(frag_names(Tab), Dist),
1146    sort_dist(Dist2).
1147
1148count_frag([Frag | Frags], Dist) ->
1149    Dist2 =  incr_nodes(val({Frag, ram_copies}), Dist),
1150    Dist3 =  incr_nodes(val({Frag, disc_copies}), Dist2),
1151    Dist4 =  incr_nodes(val({Frag, disc_only_copies}), Dist3),
1152    count_frag(Frags, Dist4);
1153count_frag([], Dist) ->
1154    Dist.
1155
1156incr_nodes([Node | Nodes], Dist) ->
1157    Dist2 = incr_node(Node, Dist),
1158    incr_nodes(Nodes, Dist2);
1159incr_nodes([], Dist) ->
1160    Dist.
1161
1162incr_node(Node, [{Kind, Node, Count} | Tail]) ->
1163    [{Kind, Node, Count + 1} | Tail];
1164incr_node(Node, [Head | Tail]) ->
1165    [Head | incr_node(Node, Tail)];
1166incr_node(Node, []) ->
1167    [{bad, Node, 1}].
1168
1169%% Sorts dist according in decreasing count order
1170sort_dist(Dist) ->
1171    Dist2 = deep_dist(Dist, []),
1172    Dist3 = lists:keysort(1, Dist2),
1173    shallow_dist(Dist3).
1174
1175deep_dist([Head | Tail], Deep) ->
1176    {Kind, _Node, Count} = Head,
1177    {Tag, Same, Other} = pick_count(Kind, Count, [Head | Tail]),
1178    deep_dist(Other, [{Tag, Same} | Deep]);
1179deep_dist([], Deep) ->
1180    Deep.
1181
1182pick_count(Kind, Count, [{Kind2, Node2, Count2} | Tail]) ->
1183    Head = {Node2, Count2},
1184    {_, Same, Other} = pick_count(Kind, Count, Tail),
1185    if
1186	Kind == bad ->
1187	    {bad, [Head | Same], Other};
1188	Kind2 == bad ->
1189	    {Count, Same, [{Kind2, Node2, Count2} | Other]};
1190	Count == Count2 ->
1191	    {Count, [Head | Same], Other};
1192	true ->
1193	    {Count, Same, [{Kind2, Node2, Count2} | Other]}
1194    end;
1195pick_count(_Kind, Count, []) ->
1196    {Count, [], []}.
1197
1198shallow_dist([{_Tag, Shallow} | Deep]) ->
1199    Shallow ++ shallow_dist(Deep);
1200shallow_dist([]) ->
1201    [].
1202