1%% This Source Code Form is subject to the terms of the Mozilla Public
2%% License, v. 2.0. If a copy of the MPL was not distributed with this
3%% file, You can obtain one at https://mozilla.org/MPL/2.0/.
4%%
5%% Copyright (c) 2018-2021 VMware, Inc. or its affiliates.  All rights reserved.
6%%
7
8%% @author The RabbitMQ team
9%% @copyright 2018-2021 VMware, Inc. or its affiliates.
10%%
11%% @doc
12%% This module offers a framework to declare capabilities a RabbitMQ node
13%% supports and therefore a way to determine if multiple RabbitMQ nodes in
14%% a cluster are compatible and can work together.
15%%
16%% == What a feature flag is ==
17%%
18%% A <strong>feature flag</strong> is a name and several properties given
19%% to a change in RabbitMQ which impacts its communication with other
20%% RabbitMQ nodes. This kind of change can be:
21%% <ul>
22%% <li>an update to an Erlang record</li>
23%% <li>a modification to a replicated Mnesia table schema</li>
24%% <li>a modification to Erlang messages exchanged between Erlang processes
25%%   which might run on remote nodes</li>
26%% </ul>
27%%
28%% A feature flag is qualified by:
29%% <ul>
30%% <li>a <strong>name</strong></li>
31%% <li>a <strong>description</strong> (optional)</li>
32%% <li>a list of other <strong>feature flags this feature flag depends on
33%%   </strong> (optional). This can be useful when the change builds up on
34%%   top of a previous change. For instance, it expands a record which was
35%%   already modified by a previous feature flag.</li>
36%% <li>a <strong>migration function</strong> (optional). If provided, this
37%%   function is called when the feature flag is enabled. It is responsible
38%%   for doing all the data conversion, if any, and confirming the feature
39%%   flag can be enabled.</li>
40%% <li>a level of stability (stable or experimental). For now, this is only
41%%   informational. But it might be used for specific purposes in the
42%%   future.</li>
43%% </ul>
44%%
45%% == How to declare a feature flag ==
46%%
47%% To define a new feature flag, you need to use the
48%% `rabbit_feature_flag()' module attribute:
49%%
50%% ```
51%% -rabbit_feature_flag(FeatureFlag).
52%% '''
53%%
54%% `FeatureFlag' is a {@type feature_flag_modattr()}.
55%%
56%% == How to enable a feature flag ==
57%%
58%% To enable a supported feature flag, you have the following solutions:
59%%
60%% <ul>
61%% <li>Using this module API:
62%% ```
63%% rabbit_feature_flags:enable(FeatureFlagName).
64%% '''
65%% </li>
66%% <li>Using the `rabbitmqctl' CLI:
67%% ```
68%% rabbitmqctl enable_feature_flag "$feature_flag_name"
69%% '''
70%% </li>
71%% </ul>
72%%
73%% == How to disable a feature flag ==
74%%
75%% Once enabled, there is <strong>currently no way to disable</strong> a
76%% feature flag.
77
78-module(rabbit_feature_flags).
79
80-export([list/0,
81         list/1,
82         list/2,
83         enable/1,
84         enable_all/0,
85         disable/1,
86         disable_all/0,
87         is_supported/1,
88         is_supported/2,
89         is_supported_locally/1,
90         is_supported_remotely/1,
91         is_supported_remotely/2,
92         is_supported_remotely/3,
93         is_enabled/1,
94         is_enabled/2,
95         is_disabled/1,
96         is_disabled/2,
97         info/0,
98         info/1,
99         init/0,
100         get_state/1,
101         get_stability/1,
102         check_node_compatibility/1,
103         check_node_compatibility/2,
104         is_node_compatible/1,
105         is_node_compatible/2,
106         sync_feature_flags_with_cluster/2,
107         sync_feature_flags_with_cluster/3,
108         refresh_feature_flags_after_app_load/1,
109         enabled_feature_flags_list_file/0
110        ]).
111
112%% RabbitMQ internal use only.
113-export([initialize_registry/0,
114         initialize_registry/1,
115         mark_as_enabled_locally/2,
116         remote_nodes/0,
117         running_remote_nodes/0,
118         does_node_support/3,
119         merge_feature_flags_from_unknown_apps/1,
120         do_sync_feature_flags_with_node/1]).
121
122-ifdef(TEST).
123-export([inject_test_feature_flags/1,
124         initialize_registry/3,
125         query_supported_feature_flags/0,
126         mark_as_enabled_remotely/2,
127         mark_as_enabled_remotely/4,
128         registry_loading_lock/0]).
129-endif.
130
131%% Default timeout for operations on remote nodes.
132-define(TIMEOUT, 60000).
133
134-define(FF_REGISTRY_LOADING_LOCK, {feature_flags_registry_loading, self()}).
135-define(FF_STATE_CHANGE_LOCK,     {feature_flags_state_change, self()}).
136
137-type feature_flag_modattr() :: {feature_name(),
138                                 feature_props()}.
139%% The value of a `-rabbitmq_feature_flag()' module attribute used to
140%% declare a new feature flag.
141
142-type feature_name() :: atom().
143%% The feature flag's name. It is used in many places to identify a
144%% specific feature flag. In particular, this is how an end-user (or
145%% the CLI) can enable a feature flag. This is also the only bit which
146%% is persisted so a node remember which feature flags are enabled.
147
148-type feature_props() :: #{desc => string(),
149                           doc_url => string(),
150                           stability => stability(),
151                           depends_on => [feature_name()],
152                           migration_fun => migration_fun_name()}.
153%% The feature flag properties.
154%%
155%% All properties are optional.
156%%
157%% The properties are:
158%% <ul>
159%% <li>`desc': a description of the feature flag</li>
160%% <li>`doc_url': a URL pointing to more documentation about the feature
161%%   flag</li>
162%% <li>`stability': the level of stability</li>
163%% <li>`depends_on': a list of feature flags name which must be enabled
164%%   before this one</li>
165%% <li>`migration_fun': a migration function specified by its module and
166%%   function names</li>
167%% </ul>
168%%
169%% Note that the `migration_fun' is a {@type migration_fun_name()},
170%% not a {@type migration_fun()}. However, the function signature
171%% must conform to the {@type migration_fun()} signature. The reason
172%% is that we must be able to represent it as an Erlang term when
173%% we regenerate the registry module source code (using {@link
174%% erl_syntax:abstract/1}).
175
176-type feature_flags() :: #{feature_name() => feature_props_extended()}.
177%% The feature flags map as returned or accepted by several functions in
178%% this module. In particular, this what the {@link list/0} function
179%% returns.
180
181-type feature_props_extended() :: #{desc => string(),
182                                    doc_url => string(),
183                                    stability => stability(),
184                                    migration_fun => migration_fun_name(),
185                                    depends_on => [feature_name()],
186                                    provided_by => atom()}.
187%% The feature flag properties, once expanded by this module when feature
188%% flags are discovered.
189%%
190%% The new properties compared to {@type feature_props()} are:
191%% <ul>
192%% <li>`provided_by': the name of the application providing the feature flag</li>
193%% </ul>
194
195-type feature_state() :: boolean() | state_changing.
196%% The state of the feature flag: enabled if `true', disabled if `false'
197%% or `state_changing'.
198
199-type feature_states() :: #{feature_name() => feature_state()}.
200
201-type stability() :: stable | experimental.
202%% The level of stability of a feature flag. Currently, only informational.
203
204-type migration_fun_name() :: {Module :: atom(), Function :: atom()}.
205%% The name of the module and function to call when changing the state of
206%% the feature flag.
207
208-type migration_fun() :: fun((feature_name(),
209                              feature_props_extended(),
210                              migration_fun_context())
211                             -> ok | {error, any()} |   % context = enable
212                                boolean() | undefined). % context = is_enabled
213%% The migration function signature.
214%%
215%% It is called with context `enable' when a feature flag is being enabled.
216%% The function is responsible for this feature-flag-specific verification
217%% and data conversion. It returns `ok' if RabbitMQ can mark the feature
218%% flag as enabled an continue with the next one, if any. Otherwise, it
219%% returns `{error, any()}' if there is an error and the feature flag should
220%% remain disabled. The function must be idempotent: if the feature flag is
221%% already enabled on another node and the local node is running this function
222%% again because it is syncing its feature flags state, it should succeed.
223%%
224%% It is called with the context `is_enabled' to check if a feature flag
225%% is actually enabled. It is useful on RabbitMQ startup, just in case
226%% the previous instance failed to write the feature flags list file.
227
228-type migration_fun_context() :: enable | is_enabled.
229
230-type registry_vsn() :: term().
231
232-export_type([feature_flag_modattr/0,
233              feature_props/0,
234              feature_name/0,
235              feature_flags/0,
236              feature_props_extended/0,
237              feature_state/0,
238              feature_states/0,
239              stability/0,
240              migration_fun_name/0,
241              migration_fun/0,
242              migration_fun_context/0]).
243
244-on_load(on_load/0).
245
246-spec list() -> feature_flags().
247%% @doc
248%% Lists all supported feature flags.
249%%
250%% @returns A map of all supported feature flags.
251
252list() -> list(all).
253
254-spec list(Which :: all | enabled | disabled) -> feature_flags().
255%% @doc
256%% Lists all, enabled or disabled feature flags, depending on the argument.
257%%
258%% @param Which The group of feature flags to return: `all', `enabled' or
259%% `disabled'.
260%% @returns A map of selected feature flags.
261
262list(all)      -> rabbit_ff_registry:list(all);
263list(enabled)  -> rabbit_ff_registry:list(enabled);
264list(disabled) -> maps:filter(
265                    fun(FeatureName, _) -> is_disabled(FeatureName) end,
266                    list(all)).
267
268-spec list(all | enabled | disabled, stability()) -> feature_flags().
269%% @doc
270%% Lists all, enabled or disabled feature flags, depending on the first
271%% argument, only keeping those having the specified stability.
272%%
273%% @param Which The group of feature flags to return: `all', `enabled' or
274%% `disabled'.
275%% @param Stability The level of stability used to filter the map of feature
276%% flags.
277%% @returns A map of selected feature flags.
278
279list(Which, Stability)
280  when Stability =:= stable orelse Stability =:= experimental ->
281    maps:filter(fun(_, FeatureProps) ->
282                        Stability =:= get_stability(FeatureProps)
283                end, list(Which)).
284
285-spec enable(feature_name() | [feature_name()]) -> ok |
286                                                   {error, Reason :: any()}.
287%% @doc
288%% Enables the specified feature flag or set of feature flags.
289%%
290%% @param FeatureName The name or the list of names of feature flags to
291%%   enable.
292%% @returns `ok' if the feature flags (and all the feature flags they
293%%   depend on) were successfully enabled, or `{error, Reason}' if one
294%%   feature flag could not be enabled (subsequent feature flags in the
295%%   dependency tree are left unchanged).
296
297enable(FeatureName) when is_atom(FeatureName) ->
298    rabbit_log_feature_flags:debug(
299      "Feature flag `~s`: REQUEST TO ENABLE",
300      [FeatureName]),
301    case is_enabled(FeatureName) of
302        true ->
303            rabbit_log_feature_flags:debug(
304              "Feature flag `~s`: already enabled",
305              [FeatureName]),
306            ok;
307        false ->
308            rabbit_log_feature_flags:debug(
309              "Feature flag `~s`: not enabled, check if supported by cluster",
310              [FeatureName]),
311            %% The feature flag must be supported locally and remotely
312            %% (i.e. by all members of the cluster).
313            case is_supported(FeatureName) of
314                true ->
315                    rabbit_log_feature_flags:info(
316                      "Feature flag `~s`: supported, attempt to enable...",
317                      [FeatureName]),
318                    do_enable(FeatureName);
319                false ->
320                    rabbit_log_feature_flags:error(
321                      "Feature flag `~s`: not supported",
322                      [FeatureName]),
323                    {error, unsupported}
324            end
325    end;
326enable(FeatureNames) when is_list(FeatureNames) ->
327    with_feature_flags(FeatureNames, fun enable/1).
328
329-spec enable_all() -> ok | {error, any()}.
330%% @doc
331%% Enables all supported feature flags.
332%%
333%% @returns `ok' if the feature flags were successfully enabled,
334%%   or `{error, Reason}' if one feature flag could not be enabled
335%%   (subsequent feature flags in the dependency tree are left
336%%   unchanged).
337
338enable_all() ->
339    with_feature_flags(maps:keys(list(all)), fun enable/1).
340
341-spec disable(feature_name() | [feature_name()]) -> ok | {error, any()}.
342%% @doc
343%% Disables the specified feature flag or set of feature flags.
344%%
345%% @param FeatureName The name or the list of names of feature flags to
346%%   disable.
347%% @returns `ok' if the feature flags (and all the feature flags they
348%%   depend on) were successfully disabled, or `{error, Reason}' if one
349%%   feature flag could not be disabled (subsequent feature flags in the
350%%   dependency tree are left unchanged).
351
352disable(FeatureName) when is_atom(FeatureName) ->
353    {error, unsupported};
354disable(FeatureNames) when is_list(FeatureNames) ->
355    with_feature_flags(FeatureNames, fun disable/1).
356
357-spec disable_all() -> ok | {error, any()}.
358%% @doc
359%% Disables all supported feature flags.
360%%
361%% @returns `ok' if the feature flags were successfully disabled,
362%%   or `{error, Reason}' if one feature flag could not be disabled
363%%   (subsequent feature flags in the dependency tree are left
364%%   unchanged).
365
366disable_all() ->
367    with_feature_flags(maps:keys(list(all)), fun disable/1).
368
369-spec with_feature_flags([feature_name()],
370                         fun((feature_name()) -> ok | {error, any()})) ->
371    ok | {error, any()}.
372%% @private
373
374with_feature_flags([FeatureName | Rest], Fun) ->
375    case Fun(FeatureName) of
376        ok    -> with_feature_flags(Rest, Fun);
377        Error -> Error
378    end;
379with_feature_flags([], _) ->
380    ok.
381
382-spec is_supported(feature_name() | [feature_name()]) -> boolean().
383%% @doc
384%% Returns if a single feature flag or a set of feature flags is
385%% supported by the entire cluster.
386%%
387%% This is the same as calling both {@link is_supported_locally/1} and
388%% {@link is_supported_remotely/1} with a logical AND.
389%%
390%% @param FeatureNames The name or a list of names of the feature flag(s)
391%%   to be checked.
392%% @returns `true' if the set of feature flags is entirely supported, or
393%%   `false' if one of them is not or the RPC timed out.
394
395is_supported(FeatureNames) ->
396    is_supported_locally(FeatureNames) andalso
397    is_supported_remotely(FeatureNames).
398
399-spec is_supported(feature_name() | [feature_name()], timeout()) ->
400    boolean().
401%% @doc
402%% Returns if a single feature flag or a set of feature flags is
403%% supported by the entire cluster.
404%%
405%% This is the same as calling both {@link is_supported_locally/1} and
406%% {@link is_supported_remotely/2} with a logical AND.
407%%
408%% @param FeatureNames The name or a list of names of the feature flag(s)
409%%   to be checked.
410%% @param Timeout Time in milliseconds after which the RPC gives up.
411%% @returns `true' if the set of feature flags is entirely supported, or
412%%   `false' if one of them is not or the RPC timed out.
413
414is_supported(FeatureNames, Timeout) ->
415    is_supported_locally(FeatureNames) andalso
416    is_supported_remotely(FeatureNames, Timeout).
417
418-spec is_supported_locally(feature_name() | [feature_name()]) -> boolean().
419%% @doc
420%% Returns if a single feature flag or a set of feature flags is
421%% supported by the local node.
422%%
423%% @param FeatureNames The name or a list of names of the feature flag(s)
424%%   to be checked.
425%% @returns `true' if the set of feature flags is entirely supported, or
426%%   `false' if one of them is not.
427
428is_supported_locally(FeatureName) when is_atom(FeatureName) ->
429    rabbit_ff_registry:is_supported(FeatureName);
430is_supported_locally(FeatureNames) when is_list(FeatureNames) ->
431    lists:all(fun(F) -> rabbit_ff_registry:is_supported(F) end, FeatureNames).
432
433-spec is_supported_remotely(feature_name() | [feature_name()]) -> boolean().
434%% @doc
435%% Returns if a single feature flag or a set of feature flags is
436%% supported by all remote nodes.
437%%
438%% @param FeatureNames The name or a list of names of the feature flag(s)
439%%   to be checked.
440%% @returns `true' if the set of feature flags is entirely supported, or
441%%   `false' if one of them is not or the RPC timed out.
442
443is_supported_remotely(FeatureNames) ->
444    is_supported_remotely(FeatureNames, ?TIMEOUT).
445
446-spec is_supported_remotely(feature_name() | [feature_name()], timeout()) -> boolean().
447%% @doc
448%% Returns if a single feature flag or a set of feature flags is
449%% supported by all remote nodes.
450%%
451%% @param FeatureNames The name or a list of names of the feature flag(s)
452%%   to be checked.
453%% @param Timeout Time in milliseconds after which the RPC gives up.
454%% @returns `true' if the set of feature flags is entirely supported, or
455%%   `false' if one of them is not or the RPC timed out.
456
457is_supported_remotely(FeatureName, Timeout) when is_atom(FeatureName) ->
458    is_supported_remotely([FeatureName], Timeout);
459is_supported_remotely([], _) ->
460    rabbit_log_feature_flags:debug(
461      "Feature flags: skipping query for feature flags support as the "
462      "given list is empty"),
463    true;
464is_supported_remotely(FeatureNames, Timeout) when is_list(FeatureNames) ->
465    case running_remote_nodes() of
466        [] ->
467            rabbit_log_feature_flags:debug(
468              "Feature flags: isolated node; skipping remote node query "
469              "=> consider `~p` supported",
470              [FeatureNames]),
471            true;
472        RemoteNodes ->
473            rabbit_log_feature_flags:debug(
474              "Feature flags: about to query these remote nodes about "
475              "support for `~p`: ~p",
476              [FeatureNames, RemoteNodes]),
477            is_supported_remotely(RemoteNodes, FeatureNames, Timeout)
478    end.
479
480-spec is_supported_remotely([node()],
481                            feature_name() | [feature_name()],
482                            timeout()) -> boolean().
483%% @doc
484%% Returns if a single feature flag or a set of feature flags is
485%% supported by specified remote nodes.
486%%
487%% @param RemoteNodes The list of remote nodes to query.
488%% @param FeatureNames The name or a list of names of the feature flag(s)
489%%   to be checked.
490%% @param Timeout Time in milliseconds after which the RPC gives up.
491%% @returns `true' if the set of feature flags is entirely supported by
492%%   all nodes, or `false' if one of them is not or the RPC timed out.
493
494is_supported_remotely(_, [], _) ->
495    rabbit_log_feature_flags:debug(
496      "Feature flags: skipping query for feature flags support as the "
497      "given list is empty"),
498    true;
499is_supported_remotely([Node | Rest], FeatureNames, Timeout) ->
500    case does_node_support(Node, FeatureNames, Timeout) of
501        true ->
502            is_supported_remotely(Rest, FeatureNames, Timeout);
503        false ->
504            rabbit_log_feature_flags:debug(
505              "Feature flags: stopping query for support for `~p` here",
506              [FeatureNames]),
507            false
508    end;
509is_supported_remotely([], FeatureNames, _) ->
510    rabbit_log_feature_flags:debug(
511      "Feature flags: all running remote nodes support `~p`",
512      [FeatureNames]),
513    true.
514
515-spec is_enabled(feature_name() | [feature_name()]) -> boolean().
516%% @doc
517%% Returns if a single feature flag or a set of feature flags is
518%% enabled.
519%%
520%% This is the same as calling {@link is_enabled/2} as a `blocking'
521%% call.
522%%
523%% @param FeatureNames The name or a list of names of the feature flag(s)
524%%   to be checked.
525%% @returns `true' if the set of feature flags is enabled, or
526%%   `false' if one of them is not.
527
528is_enabled(FeatureNames) ->
529    is_enabled(FeatureNames, blocking).
530
531-spec is_enabled
532(feature_name() | [feature_name()], blocking) ->
533    boolean();
534(feature_name() | [feature_name()], non_blocking) ->
535    feature_state().
536%% @doc
537%% Returns if a single feature flag or a set of feature flags is
538%% enabled.
539%%
540%% When `blocking' is passed, the function waits (blocks) for the
541%% state of a feature flag being disabled or enabled stabilizes before
542%% returning its final state.
543%%
544%% When `non_blocking' is passed, the function returns immediately with
545%% the state of the feature flag (`true' if enabled, `false' otherwise)
546%% or `state_changing' is the state is being changed at the time of the
547%% call.
548%%
549%% @param FeatureNames The name or a list of names of the feature flag(s)
550%%   to be checked.
551%% @returns `true' if the set of feature flags is enabled,
552%%   `false' if one of them is not, or `state_changing' if one of them
553%%   is being worked on. Note that `state_changing' has precedence over
554%%   `false', so if one is `false' and another one is `state_changing',
555%%   `state_changing' is returned.
556
557is_enabled(FeatureNames, non_blocking) ->
558    is_enabled_nb(FeatureNames);
559is_enabled(FeatureNames, blocking) ->
560    case is_enabled_nb(FeatureNames) of
561        state_changing ->
562            global:set_lock(?FF_STATE_CHANGE_LOCK),
563            global:del_lock(?FF_STATE_CHANGE_LOCK),
564            is_enabled(FeatureNames, blocking);
565        IsEnabled ->
566            IsEnabled
567    end.
568
569is_enabled_nb(FeatureName) when is_atom(FeatureName) ->
570    rabbit_ff_registry:is_enabled(FeatureName);
571is_enabled_nb(FeatureNames) when is_list(FeatureNames) ->
572    lists:foldl(
573      fun
574          (_F, state_changing = Acc) ->
575              Acc;
576          (F, false = Acc) ->
577              case rabbit_ff_registry:is_enabled(F) of
578                  state_changing -> state_changing;
579                  _              -> Acc
580              end;
581          (F, _) ->
582              rabbit_ff_registry:is_enabled(F)
583      end,
584      true, FeatureNames).
585
586-spec is_disabled(feature_name() | [feature_name()]) -> boolean().
587%% @doc
588%% Returns if a single feature flag or one feature flag in a set of
589%% feature flags is disabled.
590%%
591%% This is the same as negating the result of {@link is_enabled/1}.
592%%
593%% @param FeatureNames The name or a list of names of the feature flag(s)
594%%   to be checked.
595%% @returns `true' if one of the feature flags is disabled, or
596%%   `false' if they are all enabled.
597
598is_disabled(FeatureNames) ->
599    is_disabled(FeatureNames, blocking).
600
601-spec is_disabled
602(feature_name() | [feature_name()], blocking) ->
603    boolean();
604(feature_name() | [feature_name()], non_blocking) ->
605    feature_state().
606%% @doc
607%% Returns if a single feature flag or one feature flag in a set of
608%% feature flags is disabled.
609%%
610%% This is the same as negating the result of {@link is_enabled/2},
611%% except that `state_changing' is returned as is.
612%%
613%% See {@link is_enabled/2} for a description of the `blocking' and
614%% `non_blocking' modes.
615%%
616%% @param FeatureNames The name or a list of names of the feature flag(s)
617%%   to be checked.
618%% @returns `true' if one feature flag in the set of feature flags is
619%%   disabled, `false' if they are all enabled, or `state_changing' if
620%%   one of them is being worked on. Note that `state_changing' has
621%%   precedence over `true', so if one is `true' (i.e. disabled) and
622%%   another one is `state_changing', `state_changing' is returned.
623%%
624%% @see is_enabled/2
625
626is_disabled(FeatureName, Blocking) ->
627    case is_enabled(FeatureName, Blocking) of
628        state_changing -> state_changing;
629        IsEnabled      -> not IsEnabled
630    end.
631
632-spec info() -> ok.
633%% @doc
634%% Displays a table on stdout summing up the supported feature flags,
635%% their state and various informations about them.
636
637info() ->
638    info(#{}).
639
640-spec info(#{color => boolean(),
641             lines => boolean(),
642             verbose => non_neg_integer()}) -> ok.
643%% @doc
644%% Displays a table on stdout summing up the supported feature flags,
645%% their state and various informations about them.
646%%
647%% Supported options are:
648%% <ul>
649%% <li>`color': a boolean to indicate if colors should be used to
650%%   highlight some elements.</li>
651%% <li>`lines': a boolean to indicate if table borders should be drawn
652%%   using ASCII lines instead of regular characters.</li>
653%% <li>`verbose': a non-negative integer to specify the level of
654%%   verbosity.</li>
655%% </ul>
656%%
657%% @param Options A map of various options to tune the displayed table.
658
659info(Options) when is_map(Options) ->
660    rabbit_ff_extra:info(Options).
661
662-spec get_state(feature_name()) -> enabled | disabled | unavailable.
663%% @doc
664%% Returns the state of a feature flag.
665%%
666%% The possible states are:
667%% <ul>
668%% <li>`enabled': the feature flag is enabled.</li>
669%% <li>`disabled': the feature flag is supported by all nodes in the
670%%   cluster but currently disabled.</li>
671%% <li>`unavailable': the feature flag is unsupported by at least one
672%%   node in the cluster and can not be enabled for now.</li>
673%% </ul>
674%%
675%% @param FeatureName The name of the feature flag to check.
676%% @returns `enabled', `disabled' or `unavailable'.
677
678get_state(FeatureName) when is_atom(FeatureName) ->
679    IsEnabled = is_enabled(FeatureName),
680    IsSupported = is_supported(FeatureName),
681    case IsEnabled of
682        true  -> enabled;
683        false -> case IsSupported of
684                     true  -> disabled;
685                     false -> unavailable
686                 end
687    end.
688
689-spec get_stability(feature_name() | feature_props_extended()) -> stability().
690%% @doc
691%% Returns the stability of a feature flag.
692%%
693%% The possible stability levels are:
694%% <ul>
695%% <li>`stable': the feature flag is stable and will not change in future
696%%   releases: it can be enabled in production.</li>
697%% <li>`experimental': the feature flag is experimental and may change in
698%%   the future (without a guaranteed upgrade path): enabling it in
699%%   production is not recommended.</li>
700%% <li>`unavailable': the feature flag is unsupported by at least one
701%%   node in the cluster and can not be enabled for now.</li>
702%% </ul>
703%%
704%% @param FeatureName The name of the feature flag to check.
705%% @returns `stable' or `experimental'.
706
707get_stability(FeatureName) when is_atom(FeatureName) ->
708    case rabbit_ff_registry:get(FeatureName) of
709        undefined    -> undefined;
710        FeatureProps -> get_stability(FeatureProps)
711    end;
712get_stability(FeatureProps) when is_map(FeatureProps) ->
713    maps:get(stability, FeatureProps, stable).
714
715%% -------------------------------------------------------------------
716%% Feature flags registry.
717%% -------------------------------------------------------------------
718
719-spec init() -> ok | no_return().
720%% @private
721
722init() ->
723    %% We want to make sure the `feature_flags` file exists once
724    %% RabbitMQ was started at least once. This is not required by
725    %% this module (it works fine if the file is missing) but it helps
726    %% external tools.
727    _ = ensure_enabled_feature_flags_list_file_exists(),
728
729    %% We also "list" supported feature flags. We are not interested in
730    %% that list, however, it triggers the first initialization of the
731    %% registry.
732    _ = list(all),
733    ok.
734
735-spec initialize_registry() -> ok | {error, any()} | no_return().
736%% @private
737%% @doc
738%% Initializes or reinitializes the registry.
739%%
740%% The registry is an Erlang module recompiled at runtime to hold the
741%% state of all supported feature flags.
742%%
743%% That Erlang module is called {@link rabbit_ff_registry}. The initial
744%% source code of this module simply calls this function so it is
745%% replaced by a proper registry.
746%%
747%% Once replaced, the registry contains the map of all supported feature
748%% flags and their state. This is makes it very efficient to query a
749%% feature flag state or property.
750%%
751%% The registry is local to all RabbitMQ nodes.
752
753initialize_registry() ->
754    initialize_registry(#{}).
755
756-spec initialize_registry(feature_flags()) ->
757    ok | {error, any()} | no_return().
758%% @private
759%% @doc
760%% Initializes or reinitializes the registry.
761%%
762%% See {@link initialize_registry/0} for a description of the registry.
763%%
764%% This function takes a map of new supported feature flags (so their
765%% name and extended properties) to add to the existing known feature
766%% flags.
767
768initialize_registry(NewSupportedFeatureFlags) ->
769    %% The first step is to get the feature flag states: if this is the
770    %% first time we initialize it, we read the list from disk (the
771    %% `feature_flags` file). Otherwise we query the existing registry
772    %% before it is replaced.
773    RegistryInitialized = rabbit_ff_registry:is_registry_initialized(),
774    FeatureStates = case RegistryInitialized of
775                        true ->
776                            rabbit_ff_registry:states();
777                        false ->
778                            EnabledFeatureNames =
779                            read_enabled_feature_flags_list(),
780                            list_of_enabled_feature_flags_to_feature_states(
781                              EnabledFeatureNames)
782                    end,
783
784    %% We also record if the feature flags state was correctly written
785    %% to disk. Currently we don't use this information, but in the
786    %% future, we might want to retry the write if it failed so far.
787    %%
788    %% TODO: Retry to write the feature flags state if the first try
789    %% failed.
790    WrittenToDisk = case RegistryInitialized of
791                        true ->
792                            rabbit_ff_registry:is_registry_written_to_disk();
793                        false ->
794                            true
795                    end,
796    initialize_registry(NewSupportedFeatureFlags,
797                        FeatureStates,
798                        WrittenToDisk).
799
800-spec list_of_enabled_feature_flags_to_feature_states([feature_name()]) ->
801    feature_states().
802
803list_of_enabled_feature_flags_to_feature_states(FeatureNames) ->
804    maps:from_list([{FeatureName, true} || FeatureName <- FeatureNames]).
805
806-spec initialize_registry(feature_flags(),
807                          feature_states(),
808                          boolean()) ->
809    ok | {error, any()} | no_return().
810%% @private
811%% @doc
812%% Initializes or reinitializes the registry.
813%%
814%% See {@link initialize_registry/0} for a description of the registry.
815%%
816%% This function takes a map of new supported feature flags (so their
817%% name and extended properties) to add to the existing known feature
818%% flags, a map of the new feature flag states (whether they are
819%% enabled, disabled or `state_changing'), and a flag to indicate if the
820%% feature flag states was recorded to disk.
821%%
822%% The latter is used to block callers asking if a feature flag is
823%% enabled or disabled while its state is changing.
824
825initialize_registry(NewSupportedFeatureFlags,
826                    NewFeatureStates,
827                    WrittenToDisk) ->
828    Ret = maybe_initialize_registry(NewSupportedFeatureFlags,
829                                    NewFeatureStates,
830                                    WrittenToDisk),
831    case Ret of
832        ok      -> ok;
833        restart -> initialize_registry(NewSupportedFeatureFlags,
834                                       NewFeatureStates,
835                                       WrittenToDisk);
836        Error   -> Error
837    end.
838
839-spec maybe_initialize_registry(feature_flags(),
840                                feature_states(),
841                                boolean()) ->
842    ok | restart | {error, any()} | no_return().
843
844maybe_initialize_registry(NewSupportedFeatureFlags,
845                          NewFeatureStates,
846                          WrittenToDisk) ->
847    %% We save the version of the current registry before computing
848    %% the new one. This is used when we do the actual reload: if the
849    %% current registry was reloaded in the meantime, we need to restart
850    %% the computation to make sure we don't loose data.
851    RegistryVsn = registry_vsn(),
852
853    %% We take the feature flags already registered.
854    RegistryInitialized = rabbit_ff_registry:is_registry_initialized(),
855    KnownFeatureFlags1 = case RegistryInitialized of
856                             true  -> rabbit_ff_registry:list(all);
857                             false -> #{}
858                         end,
859
860    %% Query the list (it's a map to be exact) of known
861    %% supported feature flags. That list comes from the
862    %% `-rabbitmq_feature_flag().` module attributes exposed by all
863    %% currently loaded Erlang modules.
864    KnownFeatureFlags2 = query_supported_feature_flags(),
865
866    %% We merge the feature flags we already knew about
867    %% (KnownFeatureFlags1), those found in the loaded applications
868    %% (KnownFeatureFlags2) and those specified in arguments
869    %% (NewSupportedFeatureFlags). The latter come from remote nodes
870    %% usually: for example, they can come from plugins loaded on remote
871    %% node but the plugins are missing locally. In this case, we
872    %% consider those feature flags supported because there is no code
873    %% locally which would cause issues.
874    %%
875    %% It means that the list of feature flags only grows. we don't try
876    %% to clean it at some point because we want to remember about the
877    %% feature flags we saw (and their state). It should be fine because
878    %% that list should remain small.
879    KnownFeatureFlags = maps:merge(KnownFeatureFlags1,
880                                   KnownFeatureFlags2),
881    AllFeatureFlags = maps:merge(KnownFeatureFlags,
882                                 NewSupportedFeatureFlags),
883
884    %% Next we want to update the feature states, based on the new
885    %% states passed as arguments.
886    FeatureStates0 = case RegistryInitialized of
887                         true ->
888                             maps:merge(rabbit_ff_registry:states(),
889                                        NewFeatureStates);
890                         false ->
891                             NewFeatureStates
892                     end,
893    FeatureStates = maps:filter(
894                      fun(_, true) -> true;
895                         (_, state_changing) -> true;
896                         (_, false) -> false
897                      end, FeatureStates0),
898
899    Proceed = does_registry_need_refresh(AllFeatureFlags,
900                                         FeatureStates,
901                                         WrittenToDisk),
902
903    case Proceed of
904        true ->
905            rabbit_log_feature_flags:debug(
906              "Feature flags: (re)initialize registry (~p)",
907              [self()]),
908            T0 = erlang:timestamp(),
909            Ret = do_initialize_registry(RegistryVsn,
910                                         AllFeatureFlags,
911                                         FeatureStates,
912                                         WrittenToDisk),
913            T1 = erlang:timestamp(),
914            rabbit_log_feature_flags:debug(
915              "Feature flags: time to regen registry: ~p µs",
916              [timer:now_diff(T1, T0)]),
917            Ret;
918        false ->
919            rabbit_log_feature_flags:debug(
920              "Feature flags: registry already up-to-date, skipping init"),
921            ok
922    end.
923
924-spec does_registry_need_refresh(feature_flags(),
925                                 feature_states(),
926                                 boolean()) ->
927    boolean().
928
929does_registry_need_refresh(AllFeatureFlags,
930                           FeatureStates,
931                           WrittenToDisk) ->
932    case rabbit_ff_registry:is_registry_initialized() of
933        true ->
934            %% Before proceeding with the actual
935            %% (re)initialization, let's see if there are any
936            %% changes.
937            CurrentAllFeatureFlags = rabbit_ff_registry:list(all),
938            CurrentFeatureStates = rabbit_ff_registry:states(),
939            CurrentWrittenToDisk =
940            rabbit_ff_registry:is_registry_written_to_disk(),
941
942            if
943                AllFeatureFlags =/= CurrentAllFeatureFlags ->
944                    rabbit_log_feature_flags:debug(
945                      "Feature flags: registry refresh needed: "
946                      "yes, list of feature flags differs"),
947                    true;
948                FeatureStates =/= CurrentFeatureStates ->
949                    rabbit_log_feature_flags:debug(
950                      "Feature flags: registry refresh needed: "
951                      "yes, feature flag states differ"),
952                    true;
953                WrittenToDisk =/= CurrentWrittenToDisk ->
954                    rabbit_log_feature_flags:debug(
955                      "Feature flags: registry refresh needed: "
956                      "yes, \"written to disk\" state changed"),
957                    true;
958                true ->
959                    rabbit_log_feature_flags:debug(
960                      "Feature flags: registry refresh needed: no"),
961                    false
962            end;
963        false ->
964            rabbit_log_feature_flags:debug(
965              "Feature flags: registry refresh needed: "
966              "yes, first-time initialization"),
967            true
968    end.
969
970-spec do_initialize_registry(registry_vsn(),
971                             feature_flags(),
972                             feature_states(),
973                             boolean()) ->
974    ok | restart | {error, any()} | no_return().
975%% @private
976
977do_initialize_registry(RegistryVsn,
978                       AllFeatureFlags,
979                       FeatureStates,
980                       WrittenToDisk) ->
981    %% We log the state of those feature flags.
982    rabbit_log_feature_flags:info(
983      "Feature flags: list of feature flags found:"),
984    lists:foreach(
985      fun(FeatureName) ->
986              rabbit_log_feature_flags:info(
987                "Feature flags:   [~s] ~s",
988                [case maps:is_key(FeatureName, FeatureStates) of
989                     true ->
990                         case maps:get(FeatureName, FeatureStates) of
991                             true           -> "x";
992                             state_changing -> "~"
993                         end;
994                     false ->
995                         " "
996                 end,
997                 FeatureName])
998      end, lists:sort(maps:keys(AllFeatureFlags))),
999    rabbit_log_feature_flags:info(
1000      "Feature flags: feature flag states written to disk: ~s",
1001      [case WrittenToDisk of
1002           true  -> "yes";
1003           false -> "no"
1004       end]),
1005
1006    %% We request the registry to be regenerated and reloaded with the
1007    %% new state.
1008    regen_registry_mod(RegistryVsn,
1009                       AllFeatureFlags,
1010                       FeatureStates,
1011                       WrittenToDisk).
1012
1013-spec query_supported_feature_flags() -> feature_flags().
1014%% @private
1015
1016-ifdef(TEST).
1017-define(PT_TESTSUITE_ATTRS, {?MODULE, testsuite_feature_flags_attrs}).
1018
1019inject_test_feature_flags(AttributesFromTestsuite) ->
1020    rabbit_log_feature_flags:debug(
1021      "Feature flags: injecting feature flags from testsuite: ~p",
1022      [AttributesFromTestsuite]),
1023    ok = persistent_term:put(?PT_TESTSUITE_ATTRS, AttributesFromTestsuite),
1024    initialize_registry().
1025
1026module_attributes_from_testsuite() ->
1027    persistent_term:get(?PT_TESTSUITE_ATTRS, []).
1028
1029query_supported_feature_flags() ->
1030    rabbit_log_feature_flags:debug(
1031      "Feature flags: query feature flags in loaded applications "
1032      "+ testsuite"),
1033    T0 = erlang:timestamp(),
1034    AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes(
1035                         rabbit_feature_flag),
1036    AttributesFromTestsuite = module_attributes_from_testsuite(),
1037    T1 = erlang:timestamp(),
1038    rabbit_log_feature_flags:debug(
1039      "Feature flags: time to find supported feature flags: ~p µs",
1040      [timer:now_diff(T1, T0)]),
1041    AllAttributes = AttributesPerApp ++ AttributesFromTestsuite,
1042    prepare_queried_feature_flags(AllAttributes, #{}).
1043-else.
1044query_supported_feature_flags() ->
1045    rabbit_log_feature_flags:debug(
1046      "Feature flags: query feature flags in loaded applications"),
1047    T0 = erlang:timestamp(),
1048    AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes(
1049                         rabbit_feature_flag),
1050    T1 = erlang:timestamp(),
1051    rabbit_log_feature_flags:debug(
1052      "Feature flags: time to find supported feature flags: ~p µs",
1053      [timer:now_diff(T1, T0)]),
1054    prepare_queried_feature_flags(AttributesPerApp, #{}).
1055-endif.
1056
1057prepare_queried_feature_flags([{App, _Module, Attributes} | Rest],
1058                              AllFeatureFlags) ->
1059    rabbit_log_feature_flags:debug(
1060      "Feature flags: application `~s` has ~b feature flags",
1061      [App, length(Attributes)]),
1062    AllFeatureFlags1 = lists:foldl(
1063                         fun({FeatureName, FeatureProps}, AllFF) ->
1064                                 merge_new_feature_flags(AllFF,
1065                                                         App,
1066                                                         FeatureName,
1067                                                         FeatureProps)
1068                         end, AllFeatureFlags, Attributes),
1069    prepare_queried_feature_flags(Rest, AllFeatureFlags1);
1070prepare_queried_feature_flags([], AllFeatureFlags) ->
1071    AllFeatureFlags.
1072
1073-spec merge_new_feature_flags(feature_flags(),
1074                              atom(),
1075                              feature_name(),
1076                              feature_props()) -> feature_flags().
1077%% @private
1078
1079merge_new_feature_flags(AllFeatureFlags, App, FeatureName, FeatureProps)
1080  when is_atom(FeatureName) andalso is_map(FeatureProps) ->
1081    %% We expand the feature flag properties map with:
1082    %%   - the name of the application providing it: only informational
1083    %%     for now, but can be handy to understand that a feature flag
1084    %%     comes from a plugin.
1085    FeatureProps1 = maps:put(provided_by, App, FeatureProps),
1086    maps:merge(AllFeatureFlags,
1087               #{FeatureName => FeatureProps1}).
1088
1089-spec regen_registry_mod(registry_vsn(),
1090                         feature_flags(),
1091                         feature_states(),
1092                         boolean()) ->
1093    ok | restart | {error, any()} | no_return().
1094%% @private
1095
1096regen_registry_mod(RegistryVsn,
1097                   AllFeatureFlags,
1098                   FeatureStates,
1099                   WrittenToDisk) ->
1100    %% Here, we recreate the source code of the `rabbit_ff_registry`
1101    %% module from scratch.
1102    %%
1103    %% IMPORTANT: We want both modules to have the exact same public
1104    %% API in order to simplify the life of developers and their tools
1105    %% (Dialyzer, completion, and so on).
1106
1107    %% -module(rabbit_ff_registry).
1108    ModuleAttr = erl_syntax:attribute(
1109                   erl_syntax:atom(module),
1110                   [erl_syntax:atom(rabbit_ff_registry)]),
1111    ModuleForm = erl_syntax:revert(ModuleAttr),
1112    %% -export([...]).
1113    ExportAttr = erl_syntax:attribute(
1114                   erl_syntax:atom(export),
1115                   [erl_syntax:list(
1116                      [erl_syntax:arity_qualifier(
1117                         erl_syntax:atom(F),
1118                         erl_syntax:integer(A))
1119                       || {F, A} <- [{get, 1},
1120                                     {list, 1},
1121                                     {states, 0},
1122                                     {is_supported, 1},
1123                                     {is_enabled, 1},
1124                                     {is_registry_initialized, 0},
1125                                     {is_registry_written_to_disk, 0}]]
1126                     )
1127                   ]
1128                  ),
1129    ExportForm = erl_syntax:revert(ExportAttr),
1130    %% get(_) -> ...
1131    GetClauses = [erl_syntax:clause(
1132                    [erl_syntax:atom(FeatureName)],
1133                    [],
1134                    [erl_syntax:abstract(maps:get(FeatureName,
1135                                                  AllFeatureFlags))])
1136                     || FeatureName <- maps:keys(AllFeatureFlags)
1137                    ],
1138    GetUnknownClause = erl_syntax:clause(
1139                         [erl_syntax:variable("_")],
1140                         [],
1141                         [erl_syntax:atom(undefined)]),
1142    GetFun = erl_syntax:function(
1143               erl_syntax:atom(get),
1144               GetClauses ++ [GetUnknownClause]),
1145    GetFunForm = erl_syntax:revert(GetFun),
1146    %% list(_) -> ...
1147    ListAllBody = erl_syntax:abstract(AllFeatureFlags),
1148    ListAllClause = erl_syntax:clause([erl_syntax:atom(all)],
1149                                      [],
1150                                      [ListAllBody]),
1151    EnabledFeatureFlags = maps:filter(
1152                            fun(FeatureName, _) ->
1153                                    maps:is_key(FeatureName,
1154                                                FeatureStates)
1155                                    andalso
1156                                    maps:get(FeatureName, FeatureStates)
1157                                    =:=
1158                                    true
1159                            end, AllFeatureFlags),
1160    ListEnabledBody = erl_syntax:abstract(EnabledFeatureFlags),
1161    ListEnabledClause = erl_syntax:clause(
1162                          [erl_syntax:atom(enabled)],
1163                          [],
1164                          [ListEnabledBody]),
1165    DisabledFeatureFlags = maps:filter(
1166                            fun(FeatureName, _) ->
1167                                    not maps:is_key(FeatureName,
1168                                                    FeatureStates)
1169                            end, AllFeatureFlags),
1170    ListDisabledBody = erl_syntax:abstract(DisabledFeatureFlags),
1171    ListDisabledClause = erl_syntax:clause(
1172                          [erl_syntax:atom(disabled)],
1173                          [],
1174                          [ListDisabledBody]),
1175    StateChangingFeatureFlags = maps:filter(
1176                                  fun(FeatureName, _) ->
1177                                          maps:is_key(FeatureName,
1178                                                      FeatureStates)
1179                                          andalso
1180                                          maps:get(FeatureName, FeatureStates)
1181                                          =:=
1182                                          state_changing
1183                                  end, AllFeatureFlags),
1184    ListStateChangingBody = erl_syntax:abstract(StateChangingFeatureFlags),
1185    ListStateChangingClause = erl_syntax:clause(
1186                                [erl_syntax:atom(state_changing)],
1187                                [],
1188                                [ListStateChangingBody]),
1189    ListFun = erl_syntax:function(
1190                erl_syntax:atom(list),
1191                [ListAllClause,
1192                 ListEnabledClause,
1193                 ListDisabledClause,
1194                 ListStateChangingClause]),
1195    ListFunForm = erl_syntax:revert(ListFun),
1196    %% states() -> ...
1197    StatesBody = erl_syntax:abstract(FeatureStates),
1198    StatesClause = erl_syntax:clause([], [], [StatesBody]),
1199    StatesFun = erl_syntax:function(
1200                  erl_syntax:atom(states),
1201                  [StatesClause]),
1202    StatesFunForm = erl_syntax:revert(StatesFun),
1203    %% is_supported(_) -> ...
1204    IsSupportedClauses = [erl_syntax:clause(
1205                            [erl_syntax:atom(FeatureName)],
1206                            [],
1207                            [erl_syntax:atom(true)])
1208                          || FeatureName <- maps:keys(AllFeatureFlags)
1209                         ],
1210    NotSupportedClause = erl_syntax:clause(
1211                           [erl_syntax:variable("_")],
1212                           [],
1213                           [erl_syntax:atom(false)]),
1214    IsSupportedFun = erl_syntax:function(
1215                       erl_syntax:atom(is_supported),
1216                       IsSupportedClauses ++ [NotSupportedClause]),
1217    IsSupportedFunForm = erl_syntax:revert(IsSupportedFun),
1218    %% is_enabled(_) -> ...
1219    IsEnabledClauses = [erl_syntax:clause(
1220                          [erl_syntax:atom(FeatureName)],
1221                          [],
1222                          [case maps:is_key(FeatureName, FeatureStates) of
1223                               true ->
1224                                   erl_syntax:atom(
1225                                     maps:get(FeatureName, FeatureStates));
1226                               false ->
1227                                   erl_syntax:atom(false)
1228                           end])
1229                        || FeatureName <- maps:keys(AllFeatureFlags)
1230                       ],
1231    NotEnabledClause = erl_syntax:clause(
1232                         [erl_syntax:variable("_")],
1233                         [],
1234                         [erl_syntax:atom(false)]),
1235    IsEnabledFun = erl_syntax:function(
1236                     erl_syntax:atom(is_enabled),
1237                     IsEnabledClauses ++ [NotEnabledClause]),
1238    IsEnabledFunForm = erl_syntax:revert(IsEnabledFun),
1239    %% is_registry_initialized() -> ...
1240    IsInitializedClauses = [erl_syntax:clause(
1241                              [],
1242                              [],
1243                              [erl_syntax:atom(true)])
1244                           ],
1245    IsInitializedFun = erl_syntax:function(
1246                         erl_syntax:atom(is_registry_initialized),
1247                         IsInitializedClauses),
1248    IsInitializedFunForm = erl_syntax:revert(IsInitializedFun),
1249    %% is_registry_written_to_disk() -> ...
1250    IsWrittenToDiskClauses = [erl_syntax:clause(
1251                                [],
1252                                [],
1253                                [erl_syntax:atom(WrittenToDisk)])
1254                             ],
1255    IsWrittenToDiskFun = erl_syntax:function(
1256                           erl_syntax:atom(is_registry_written_to_disk),
1257                           IsWrittenToDiskClauses),
1258    IsWrittenToDiskFunForm = erl_syntax:revert(IsWrittenToDiskFun),
1259    %% Compilation!
1260    Forms = [ModuleForm,
1261             ExportForm,
1262             GetFunForm,
1263             ListFunForm,
1264             StatesFunForm,
1265             IsSupportedFunForm,
1266             IsEnabledFunForm,
1267             IsInitializedFunForm,
1268             IsWrittenToDiskFunForm],
1269    maybe_log_registry_source_code(Forms),
1270    CompileOpts = [return_errors,
1271                   return_warnings],
1272    case compile:forms(Forms, CompileOpts) of
1273        {ok, Mod, Bin, _} ->
1274            load_registry_mod(RegistryVsn, Mod, Bin);
1275        {error, Errors, Warnings} ->
1276            rabbit_log_feature_flags:error(
1277              "Feature flags: registry compilation:~n"
1278              "Errors: ~p~n"
1279              "Warnings: ~p",
1280              [Errors, Warnings]),
1281            {error, {compilation_failure, Errors, Warnings}}
1282    end.
1283
1284maybe_log_registry_source_code(Forms) ->
1285    case rabbit_prelaunch:get_context() of
1286        #{log_feature_flags_registry := true} ->
1287            rabbit_log_feature_flags:debug(
1288              "== FEATURE FLAGS REGISTRY ==~n"
1289              "~s~n"
1290              "== END ==~n",
1291              [erl_prettypr:format(erl_syntax:form_list(Forms))]);
1292        _ ->
1293            ok
1294    end.
1295
1296-ifdef(TEST).
1297registry_loading_lock() -> ?FF_REGISTRY_LOADING_LOCK.
1298-endif.
1299
1300-spec load_registry_mod(registry_vsn(), atom(), binary()) ->
1301    ok | restart | no_return().
1302%% @private
1303
1304load_registry_mod(RegistryVsn, Mod, Bin) ->
1305    rabbit_log_feature_flags:debug(
1306      "Feature flags: registry module ready, loading it (~p)...",
1307      [self()]),
1308    FakeFilename = "Compiled and loaded by " ?MODULE_STRING,
1309    %% Time to load the new registry, replacing the old one. We use a
1310    %% lock here to synchronize concurrent reloads.
1311    global:set_lock(?FF_REGISTRY_LOADING_LOCK, [node()]),
1312    rabbit_log_feature_flags:debug(
1313      "Feature flags: acquired lock before reloading registry module (~p)",
1314     [self()]),
1315    %% We want to make sure that the old registry (not the one being
1316    %% currently in use) is purged by the code server. It means no
1317    %% process lingers on that old code.
1318    %%
1319    %% We use code:soft_purge() for that (meaning no process is killed)
1320    %% and we wait in an infinite loop for that to succeed.
1321    ok = purge_old_registry(Mod),
1322    %% Now we can replace the currently loaded registry by the new one.
1323    %% The code server takes care of marking the current registry as old
1324    %% and load the new module in an atomic operation.
1325    %%
1326    %% Therefore there is no chance of a window where there is no
1327    %% registry module available, causing the one on disk to be
1328    %% reloaded.
1329    Ret = case registry_vsn() of
1330              RegistryVsn -> code:load_binary(Mod, FakeFilename, Bin);
1331              OtherVsn    -> {error, {restart, RegistryVsn, OtherVsn}}
1332          end,
1333    rabbit_log_feature_flags:debug(
1334      "Feature flags: releasing lock after reloading registry module (~p)",
1335     [self()]),
1336    global:del_lock(?FF_REGISTRY_LOADING_LOCK, [node()]),
1337    case Ret of
1338        {module, _} ->
1339            rabbit_log_feature_flags:debug(
1340              "Feature flags: registry module loaded (vsn: ~p -> ~p)",
1341              [RegistryVsn, registry_vsn()]),
1342            ok;
1343        {error, {restart, Expected, Current}} ->
1344            rabbit_log_feature_flags:error(
1345              "Feature flags: another registry module was loaded in the "
1346              "meantime (expected old vsn: ~p, current vsn: ~p); "
1347              "restarting the regen",
1348              [Expected, Current]),
1349            restart;
1350        {error, Reason} ->
1351            rabbit_log_feature_flags:error(
1352              "Feature flags: failed to load registry module: ~p",
1353              [Reason]),
1354            throw({feature_flag_registry_reload_failure, Reason})
1355    end.
1356
1357-spec registry_vsn() -> registry_vsn().
1358%% @private
1359
1360registry_vsn() ->
1361    Attrs = rabbit_ff_registry:module_info(attributes),
1362    proplists:get_value(vsn, Attrs, undefined).
1363
1364purge_old_registry(Mod) ->
1365    case code:is_loaded(Mod) of
1366        {file, _} -> do_purge_old_registry(Mod);
1367        false     -> ok
1368    end.
1369
1370do_purge_old_registry(Mod) ->
1371    case code:soft_purge(Mod) of
1372        true  -> ok;
1373        false -> do_purge_old_registry(Mod)
1374    end.
1375
1376%% -------------------------------------------------------------------
1377%% Feature flags state storage.
1378%% -------------------------------------------------------------------
1379
1380-spec ensure_enabled_feature_flags_list_file_exists() -> ok | {error, any()}.
1381%% @private
1382
1383ensure_enabled_feature_flags_list_file_exists() ->
1384    File = enabled_feature_flags_list_file(),
1385    case filelib:is_regular(File) of
1386        true  -> ok;
1387        false -> write_enabled_feature_flags_list([])
1388    end.
1389
1390-spec read_enabled_feature_flags_list() ->
1391    [feature_name()] | no_return().
1392%% @private
1393
1394read_enabled_feature_flags_list() ->
1395    case try_to_read_enabled_feature_flags_list() of
1396        {error, Reason} ->
1397            File = enabled_feature_flags_list_file(),
1398            throw({feature_flags_file_read_error, File, Reason});
1399        Ret ->
1400            Ret
1401    end.
1402
1403-spec try_to_read_enabled_feature_flags_list() ->
1404    [feature_name()] | {error, any()}.
1405%% @private
1406
1407try_to_read_enabled_feature_flags_list() ->
1408    File = enabled_feature_flags_list_file(),
1409    case file:consult(File) of
1410        {ok, [List]} ->
1411            List;
1412        {error, enoent} ->
1413            %% If the file is missing, we consider the list of enabled
1414            %% feature flags to be empty.
1415            [];
1416        {error, Reason} = Error ->
1417            rabbit_log_feature_flags:error(
1418              "Feature flags: failed to read the `feature_flags` "
1419              "file at `~s`: ~s",
1420              [File, file:format_error(Reason)]),
1421            Error
1422    end.
1423
1424-spec write_enabled_feature_flags_list([feature_name()]) ->
1425    ok | no_return().
1426%% @private
1427
1428write_enabled_feature_flags_list(FeatureNames) ->
1429    case try_to_write_enabled_feature_flags_list(FeatureNames) of
1430        {error, Reason} ->
1431            File = enabled_feature_flags_list_file(),
1432            throw({feature_flags_file_write_error, File, Reason});
1433        Ret ->
1434            Ret
1435    end.
1436
1437-spec try_to_write_enabled_feature_flags_list([feature_name()]) ->
1438    ok | {error, any()}.
1439%% @private
1440
1441try_to_write_enabled_feature_flags_list(FeatureNames) ->
1442    %% Before writing the new file, we read the existing one. If there
1443    %% are unknown feature flags in that file, we want to keep their
1444    %% state, even though they are unsupported at this time. It could be
1445    %% that a plugin was disabled in the meantime.
1446    %%
1447    %% FIXME: Lock this code to fix concurrent read/modify/write.
1448    PreviouslyEnabled = case try_to_read_enabled_feature_flags_list() of
1449                            {error, _} -> [];
1450                            List       -> List
1451                        end,
1452    FeatureNames1 = lists:foldl(
1453                      fun(Name, Acc) ->
1454                              case is_supported_locally(Name) of
1455                                  true  -> Acc;
1456                                  false -> [Name | Acc]
1457                              end
1458                      end, FeatureNames, PreviouslyEnabled),
1459    FeatureNames2 = lists:sort(FeatureNames1),
1460
1461    File = enabled_feature_flags_list_file(),
1462    Content = io_lib:format("~p.~n", [FeatureNames2]),
1463    %% TODO: If we fail to write the the file, we should spawn a process
1464    %% to retry the operation.
1465    case file:write_file(File, Content) of
1466        ok ->
1467            ok;
1468        {error, Reason} = Error ->
1469            rabbit_log_feature_flags:error(
1470              "Feature flags: failed to write the `feature_flags` "
1471              "file at `~s`: ~s",
1472              [File, file:format_error(Reason)]),
1473            Error
1474    end.
1475
1476-spec enabled_feature_flags_list_file() -> file:filename().
1477%% @doc
1478%% Returns the path to the file where the state of feature flags is stored.
1479%%
1480%% @returns the path to the file.
1481
1482enabled_feature_flags_list_file() ->
1483    case application:get_env(rabbit, feature_flags_file) of
1484        {ok, Val} -> Val;
1485        undefined -> throw(feature_flags_file_not_set)
1486    end.
1487
1488%% -------------------------------------------------------------------
1489%% Feature flags management: enabling.
1490%% -------------------------------------------------------------------
1491
1492-spec do_enable(feature_name()) -> ok | {error, any()} | no_return().
1493%% @private
1494
1495do_enable(FeatureName) ->
1496    %% We mark this feature flag as "state changing" before doing the
1497    %% actual state change. We also take a global lock: this permits
1498    %% to block callers asking about a feature flag changing state.
1499    global:set_lock(?FF_STATE_CHANGE_LOCK),
1500    Ret = case mark_as_enabled(FeatureName, state_changing) of
1501              ok ->
1502                  case enable_dependencies(FeatureName, true) of
1503                      ok ->
1504                          case run_migration_fun(FeatureName, enable) of
1505                              ok ->
1506                                  mark_as_enabled(FeatureName, true);
1507                              {error, no_migration_fun} ->
1508                                  mark_as_enabled(FeatureName, true);
1509                              Error ->
1510                                  Error
1511                          end;
1512                      Error ->
1513                          Error
1514                  end;
1515              Error ->
1516                  Error
1517          end,
1518    case Ret of
1519        ok -> ok;
1520        _  -> mark_as_enabled(FeatureName, false)
1521    end,
1522    global:del_lock(?FF_STATE_CHANGE_LOCK),
1523    Ret.
1524
1525-spec enable_locally(feature_name()) -> ok | {error, any()} | no_return().
1526%% @private
1527
1528enable_locally(FeatureName) when is_atom(FeatureName) ->
1529    case is_enabled(FeatureName) of
1530        true ->
1531            ok;
1532        false ->
1533            rabbit_log_feature_flags:debug(
1534              "Feature flag `~s`: enable locally (as part of feature "
1535              "flag states synchronization)",
1536              [FeatureName]),
1537            do_enable_locally(FeatureName)
1538    end.
1539
1540-spec do_enable_locally(feature_name()) -> ok | {error, any()} | no_return().
1541%% @private
1542
1543do_enable_locally(FeatureName) ->
1544    case enable_dependencies(FeatureName, false) of
1545        ok ->
1546            case run_migration_fun(FeatureName, enable) of
1547                ok ->
1548                    mark_as_enabled_locally(FeatureName, true);
1549                {error, no_migration_fun} ->
1550                    mark_as_enabled_locally(FeatureName, true);
1551                Error ->
1552                    Error
1553            end;
1554        Error ->
1555            Error
1556    end.
1557
1558-spec enable_dependencies(feature_name(), boolean()) ->
1559    ok | {error, any()} | no_return().
1560%% @private
1561
1562enable_dependencies(FeatureName, Everywhere) ->
1563    FeatureProps = rabbit_ff_registry:get(FeatureName),
1564    DependsOn = maps:get(depends_on, FeatureProps, []),
1565    rabbit_log_feature_flags:debug(
1566      "Feature flag `~s`: enable dependencies: ~p",
1567      [FeatureName, DependsOn]),
1568    enable_dependencies(FeatureName, DependsOn, Everywhere).
1569
1570-spec enable_dependencies(feature_name(), [feature_name()], boolean()) ->
1571    ok | {error, any()} | no_return().
1572%% @private
1573
1574enable_dependencies(TopLevelFeatureName, [FeatureName | Rest], Everywhere) ->
1575    Ret = case Everywhere of
1576              true  -> enable(FeatureName);
1577              false -> enable_locally(FeatureName)
1578          end,
1579    case Ret of
1580        ok    -> enable_dependencies(TopLevelFeatureName, Rest, Everywhere);
1581        Error -> Error
1582    end;
1583enable_dependencies(_, [], _) ->
1584    ok.
1585
1586-spec run_migration_fun(feature_name(), any()) ->
1587    any() | {error, any()}.
1588%% @private
1589
1590run_migration_fun(FeatureName, Arg) ->
1591    FeatureProps = rabbit_ff_registry:get(FeatureName),
1592    run_migration_fun(FeatureName, FeatureProps, Arg).
1593
1594run_migration_fun(FeatureName, FeatureProps, Arg) ->
1595    case maps:get(migration_fun, FeatureProps, none) of
1596        {MigrationMod, MigrationFun}
1597          when is_atom(MigrationMod) andalso is_atom(MigrationFun) ->
1598            rabbit_log_feature_flags:debug(
1599              "Feature flag `~s`: run migration function ~p with arg: ~p",
1600              [FeatureName, MigrationFun, Arg]),
1601            try
1602                erlang:apply(MigrationMod,
1603                             MigrationFun,
1604                             [FeatureName, FeatureProps, Arg])
1605            catch
1606                _:Reason:Stacktrace ->
1607                    rabbit_log_feature_flags:error(
1608                      "Feature flag `~s`: migration function crashed: ~p~n~p",
1609                      [FeatureName, Reason, Stacktrace]),
1610                    {error, {migration_fun_crash, Reason, Stacktrace}}
1611            end;
1612        none ->
1613            {error, no_migration_fun};
1614        Invalid ->
1615            rabbit_log_feature_flags:error(
1616              "Feature flag `~s`: invalid migration function: ~p",
1617              [FeatureName, Invalid]),
1618            {error, {invalid_migration_fun, Invalid}}
1619    end.
1620
1621-spec mark_as_enabled(feature_name(), feature_state()) ->
1622    any() | {error, any()} | no_return().
1623%% @private
1624
1625mark_as_enabled(FeatureName, IsEnabled) ->
1626    case mark_as_enabled_locally(FeatureName, IsEnabled) of
1627        ok ->
1628            mark_as_enabled_remotely(FeatureName, IsEnabled);
1629        Error ->
1630            Error
1631    end.
1632
1633-spec mark_as_enabled_locally(feature_name(), feature_state()) ->
1634    any() | {error, any()} | no_return().
1635%% @private
1636
1637mark_as_enabled_locally(FeatureName, IsEnabled) ->
1638    rabbit_log_feature_flags:info(
1639      "Feature flag `~s`: mark as enabled=~p",
1640      [FeatureName, IsEnabled]),
1641    EnabledFeatureNames = maps:keys(list(enabled)),
1642    NewEnabledFeatureNames = case IsEnabled of
1643                                 true ->
1644                                     [FeatureName | EnabledFeatureNames];
1645                                 false ->
1646                                     EnabledFeatureNames -- [FeatureName];
1647                                 state_changing ->
1648                                     EnabledFeatureNames
1649                             end,
1650    WrittenToDisk = case NewEnabledFeatureNames of
1651                        EnabledFeatureNames ->
1652                            rabbit_ff_registry:is_registry_written_to_disk();
1653                        _ ->
1654                            ok =:= try_to_write_enabled_feature_flags_list(
1655                                     NewEnabledFeatureNames)
1656                    end,
1657    initialize_registry(#{},
1658                        #{FeatureName => IsEnabled},
1659                        WrittenToDisk).
1660
1661-spec mark_as_enabled_remotely(feature_name(), feature_state()) ->
1662    any() | {error, any()} | no_return().
1663%% @private
1664
1665mark_as_enabled_remotely(FeatureName, IsEnabled) ->
1666    Nodes = running_remote_nodes(),
1667    mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, ?TIMEOUT).
1668
1669-spec mark_as_enabled_remotely([node()],
1670                               feature_name(),
1671                               feature_state(),
1672                               timeout()) ->
1673    any() | {error, any()} | no_return().
1674%% @private
1675
1676mark_as_enabled_remotely([], _FeatureName, _IsEnabled, _Timeout) ->
1677    ok;
1678mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, Timeout) ->
1679    T0 = erlang:timestamp(),
1680    Rets = [{Node, rpc:call(Node,
1681                            ?MODULE,
1682                            mark_as_enabled_locally,
1683                            [FeatureName, IsEnabled],
1684                            Timeout)}
1685            || Node <- Nodes],
1686    FailedNodes = [Node || {Node, Ret} <- Rets, Ret =/= ok],
1687    case FailedNodes of
1688        [] ->
1689            rabbit_log_feature_flags:debug(
1690              "Feature flags: `~s` successfully marked as enabled=~p on all "
1691              "nodes", [FeatureName, IsEnabled]),
1692            ok;
1693        _ ->
1694            rabbit_log_feature_flags:error(
1695              "Feature flags: failed to mark feature flag `~s` as enabled=~p "
1696              "on the following nodes:", [FeatureName, IsEnabled]),
1697            [rabbit_log_feature_flags:error(
1698               "Feature flags:   - ~s: ~p",
1699               [Node, Ret])
1700             || {Node, Ret} <- Rets,
1701                Ret =/= ok],
1702            Sleep = 1000,
1703            T1 = erlang:timestamp(),
1704            Duration = timer:now_diff(T1, T0),
1705            NewTimeout = (Timeout * 1000 - Duration) div 1000 - Sleep,
1706            if
1707                NewTimeout > 0 ->
1708                    rabbit_log_feature_flags:debug(
1709                      "Feature flags:   retrying with a timeout of ~b "
1710                      "ms after sleeping for ~b ms",
1711                      [NewTimeout, Sleep]),
1712                    timer:sleep(Sleep),
1713                    mark_as_enabled_remotely(FailedNodes,
1714                                             FeatureName,
1715                                             IsEnabled,
1716                                             NewTimeout);
1717                true ->
1718                    rabbit_log_feature_flags:debug(
1719                      "Feature flags:   not retrying; RPC went over the "
1720                      "~b milliseconds timeout", [Timeout]),
1721                    %% FIXME: Is crashing the process the best solution here?
1722                    throw(
1723                      {failed_to_mark_feature_flag_as_enabled_on_remote_nodes,
1724                       FeatureName, IsEnabled, FailedNodes})
1725            end
1726    end.
1727
1728%% -------------------------------------------------------------------
1729%% Coordination with remote nodes.
1730%% -------------------------------------------------------------------
1731
1732-spec remote_nodes() -> [node()].
1733%% @private
1734
1735remote_nodes() ->
1736    mnesia:system_info(db_nodes) -- [node()].
1737
1738-spec running_remote_nodes() -> [node()].
1739%% @private
1740
1741running_remote_nodes() ->
1742    mnesia:system_info(running_db_nodes) -- [node()].
1743
1744query_running_remote_nodes(Node, Timeout) ->
1745    case rpc:call(Node, mnesia, system_info, [running_db_nodes], Timeout) of
1746        {badrpc, _} = Error -> Error;
1747        Nodes               -> Nodes -- [node()]
1748    end.
1749
1750-spec does_node_support(node(), [feature_name()], timeout()) -> boolean().
1751%% @private
1752
1753does_node_support(Node, FeatureNames, Timeout) ->
1754    rabbit_log_feature_flags:debug(
1755      "Feature flags: querying `~p` support on node ~s...",
1756      [FeatureNames, Node]),
1757    Ret = case node() of
1758              Node ->
1759                  is_supported_locally(FeatureNames);
1760              _ ->
1761                  run_feature_flags_mod_on_remote_node(
1762                    Node, is_supported_locally, [FeatureNames], Timeout)
1763          end,
1764    case Ret of
1765        {error, pre_feature_flags_rabbitmq} ->
1766            %% See run_feature_flags_mod_on_remote_node/4 for
1767            %% an explanation why we consider this node a 3.7.x
1768            %% pre-feature-flags node.
1769            rabbit_log_feature_flags:debug(
1770              "Feature flags: no feature flags support on node `~s`, "
1771              "consider the feature flags unsupported: ~p",
1772              [Node, FeatureNames]),
1773            false;
1774        {error, Reason} ->
1775            rabbit_log_feature_flags:error(
1776              "Feature flags: error while querying `~p` support on "
1777              "node ~s: ~p",
1778              [FeatureNames, Node, Reason]),
1779            false;
1780        true ->
1781            rabbit_log_feature_flags:debug(
1782              "Feature flags: node `~s` supports `~p`",
1783              [Node, FeatureNames]),
1784            true;
1785        false ->
1786            rabbit_log_feature_flags:debug(
1787              "Feature flags: node `~s` does not support `~p`; "
1788              "stopping query here",
1789              [Node, FeatureNames]),
1790            false
1791    end.
1792
1793-spec check_node_compatibility(node()) -> ok | {error, any()}.
1794%% @doc
1795%% Checks if a node is compatible with the local node.
1796%%
1797%% To be compatible, the following two conditions must be met:
1798%% <ol>
1799%% <li>feature flags enabled on the local node must be supported by the
1800%%   remote node</li>
1801%% <li>feature flags enabled on the remote node must be supported by the
1802%%   local node</li>
1803%% </ol>
1804%%
1805%% @param Node the name of the remote node to test.
1806%% @returns `ok' if they are compatible, `{error, Reason}' if they are not.
1807
1808check_node_compatibility(Node) ->
1809    check_node_compatibility(Node, ?TIMEOUT).
1810
1811-spec check_node_compatibility(node(), timeout()) -> ok | {error, any()}.
1812%% @doc
1813%% Checks if a node is compatible with the local node.
1814%%
1815%% See {@link check_node_compatibility/1} for the conditions required to
1816%% consider two nodes compatible.
1817%%
1818%% @param Node the name of the remote node to test.
1819%% @param Timeout Time in milliseconds after which the RPC gives up.
1820%% @returns `ok' if they are compatible, `{error, Reason}' if they are not.
1821%%
1822%% @see check_node_compatibility/1
1823
1824check_node_compatibility(Node, Timeout) ->
1825    %% Before checking compatibility, we exchange feature flags from
1826    %% unknown Erlang applications. So we fetch remote feature flags
1827    %% from applications which are not loaded locally, and the opposite.
1828    %%
1829    %% The goal is that such feature flags are not blocking the
1830    %% communication between nodes because the code (which would
1831    %% break) is missing on those nodes. Therefore they should not be
1832    %% considered when determining compatibility.
1833    exchange_feature_flags_from_unknown_apps(Node, Timeout),
1834
1835    %% FIXME:
1836    %% When we try to cluster two nodes, we get:
1837    %%   Feature flags: starting an unclustered node: all feature flags
1838    %%   will be enabled by default
1839    %% It should probably not be the case...
1840
1841    %% We can now proceed with the actual compatibility check.
1842    rabbit_log_feature_flags:debug(
1843      "Feature flags: node `~s` compatibility check, part 1/2",
1844      [Node]),
1845    Part1 = local_enabled_feature_flags_is_supported_remotely(Node, Timeout),
1846    rabbit_log_feature_flags:debug(
1847      "Feature flags: node `~s` compatibility check, part 2/2",
1848      [Node]),
1849    Part2 = remote_enabled_feature_flags_is_supported_locally(Node, Timeout),
1850    case {Part1, Part2} of
1851        {true, true} ->
1852            rabbit_log_feature_flags:debug(
1853              "Feature flags: node `~s` is compatible",
1854              [Node]),
1855            ok;
1856        {false, _} ->
1857            rabbit_log_feature_flags:error(
1858              "Feature flags: node `~s` is INCOMPATIBLE: "
1859              "feature flags enabled locally are not supported remotely",
1860              [Node]),
1861            {error, incompatible_feature_flags};
1862        {_, false} ->
1863            rabbit_log_feature_flags:error(
1864              "Feature flags: node `~s` is INCOMPATIBLE: "
1865              "feature flags enabled remotely are not supported locally",
1866              [Node]),
1867            {error, incompatible_feature_flags}
1868    end.
1869
1870-spec is_node_compatible(node()) -> boolean().
1871%% @doc
1872%% Returns if a node is compatible with the local node.
1873%%
1874%% This function calls {@link check_node_compatibility/2} and returns
1875%% `true' the latter returns `ok'. Therefore this is the same code,
1876%% except that this function returns a boolean, but not the reason of
1877%% the incompatibility if any.
1878%%
1879%% @param Node the name of the remote node to test.
1880%% @returns `true' if they are compatible, `false' otherwise.
1881
1882is_node_compatible(Node) ->
1883    is_node_compatible(Node, ?TIMEOUT).
1884
1885-spec is_node_compatible(node(), timeout()) -> boolean().
1886%% @doc
1887%% Returns if a node is compatible with the local node.
1888%%
1889%% This function calls {@link check_node_compatibility/2} and returns
1890%% `true' the latter returns `ok'. Therefore this is the same code,
1891%% except that this function returns a boolean, but not the reason
1892%% of the incompatibility if any. If the RPC times out, nodes are
1893%% considered incompatible.
1894%%
1895%% @param Node the name of the remote node to test.
1896%% @param Timeout Time in milliseconds after which the RPC gives up.
1897%% @returns `true' if they are compatible, `false' otherwise.
1898
1899is_node_compatible(Node, Timeout) ->
1900    check_node_compatibility(Node, Timeout) =:= ok.
1901
1902-spec local_enabled_feature_flags_is_supported_remotely(node(),
1903                                                        timeout()) ->
1904    boolean().
1905%% @private
1906
1907local_enabled_feature_flags_is_supported_remotely(Node, Timeout) ->
1908    LocalEnabledFeatureNames = maps:keys(list(enabled)),
1909    is_supported_remotely([Node], LocalEnabledFeatureNames, Timeout).
1910
1911-spec remote_enabled_feature_flags_is_supported_locally(node(),
1912                                                        timeout()) ->
1913    boolean().
1914%% @private
1915
1916remote_enabled_feature_flags_is_supported_locally(Node, Timeout) ->
1917    case query_remote_feature_flags(Node, enabled, Timeout) of
1918        {error, _} ->
1919            false;
1920        RemoteEnabledFeatureFlags when is_map(RemoteEnabledFeatureFlags) ->
1921            RemoteEnabledFeatureNames = maps:keys(RemoteEnabledFeatureFlags),
1922            is_supported_locally(RemoteEnabledFeatureNames)
1923    end.
1924
1925-spec run_feature_flags_mod_on_remote_node(node(),
1926                                           atom(),
1927                                           [term()],
1928                                           timeout()) ->
1929    term() | {error, term()}.
1930%% @private
1931
1932run_feature_flags_mod_on_remote_node(Node, Function, Args, Timeout) ->
1933    case rpc:call(Node, ?MODULE, Function, Args, Timeout) of
1934        {badrpc, {'EXIT',
1935                  {undef,
1936                   [{?MODULE, Function, Args, []}
1937                    | _]}}} ->
1938            %% If rabbit_feature_flags:Function() is undefined
1939            %% on the remote node, we consider it to be a 3.7.x
1940            %% pre-feature-flags node.
1941            %%
1942            %% Theoretically, it could be an older version (3.6.x and
1943            %% older). But the RabbitMQ version consistency check
1944            %% (rabbit_misc:version_minor_equivalent/2) called from
1945            %% rabbit_mnesia:check_rabbit_consistency/2 already blocked
1946            %% this situation from happening before we reach this point.
1947            rabbit_log_feature_flags:debug(
1948              "Feature flags: ~s:~s~p unavailable on node `~s`: "
1949              "assuming it is a RabbitMQ 3.7.x pre-feature-flags node",
1950              [?MODULE, Function, Args, Node]),
1951            {error, pre_feature_flags_rabbitmq};
1952        {badrpc, Reason} = Error ->
1953            rabbit_log_feature_flags:error(
1954              "Feature flags: error while running ~s:~s~p "
1955              "on node `~s`: ~p",
1956              [?MODULE, Function, Args, Node, Reason]),
1957            {error, Error};
1958        Ret ->
1959            Ret
1960    end.
1961
1962-spec query_remote_feature_flags(node(),
1963                                 Which :: all | enabled | disabled,
1964                                 timeout()) ->
1965    feature_flags() | {error, any()}.
1966%% @private
1967
1968query_remote_feature_flags(Node, Which, Timeout) ->
1969    rabbit_log_feature_flags:debug(
1970      "Feature flags: querying ~s feature flags on node `~s`...",
1971      [Which, Node]),
1972    case run_feature_flags_mod_on_remote_node(Node, list, [Which], Timeout) of
1973        {error, pre_feature_flags_rabbitmq} ->
1974            %% See run_feature_flags_mod_on_remote_node/4 for
1975            %% an explanation why we consider this node a 3.7.x
1976            %% pre-feature-flags node.
1977            rabbit_log_feature_flags:debug(
1978              "Feature flags: no feature flags support on node `~s`, "
1979              "consider the list of feature flags empty", [Node]),
1980            #{};
1981        {error, Reason} = Error ->
1982            rabbit_log_feature_flags:error(
1983              "Feature flags: error while querying ~s feature flags "
1984              "on node `~s`: ~p",
1985              [Which, Node, Reason]),
1986            Error;
1987        RemoteFeatureFlags when is_map(RemoteFeatureFlags) ->
1988            RemoteFeatureNames = maps:keys(RemoteFeatureFlags),
1989            rabbit_log_feature_flags:debug(
1990              "Feature flags: querying ~s feature flags on node `~s` "
1991              "done; ~s features: ~p",
1992              [Which, Node, Which, RemoteFeatureNames]),
1993            RemoteFeatureFlags
1994    end.
1995
1996-spec merge_feature_flags_from_unknown_apps(feature_flags()) ->
1997    ok | {error, any()}.
1998%% @private
1999
2000merge_feature_flags_from_unknown_apps(FeatureFlags)
2001  when is_map(FeatureFlags) ->
2002    LoadedApps = [App || {App, _, _} <- application:loaded_applications()],
2003    FeatureFlagsFromUnknownApps =
2004    maps:fold(
2005      fun(FeatureName, FeatureProps, UnknownFF) ->
2006              case is_supported_locally(FeatureName) of
2007                  true ->
2008                      UnknownFF;
2009                  false ->
2010                      FeatureProvider = maps:get(provided_by, FeatureProps),
2011                      case lists:member(FeatureProvider, LoadedApps) of
2012                          true  -> UnknownFF;
2013                          false -> maps:put(FeatureName, FeatureProps,
2014                                            UnknownFF)
2015                      end
2016              end
2017      end,
2018      #{},
2019      FeatureFlags),
2020    case maps:keys(FeatureFlagsFromUnknownApps) of
2021        [] ->
2022            ok;
2023        _ ->
2024            rabbit_log_feature_flags:debug(
2025              "Feature flags: register feature flags provided by applications "
2026              "unknown locally: ~p",
2027              [maps:keys(FeatureFlagsFromUnknownApps)]),
2028            initialize_registry(FeatureFlagsFromUnknownApps)
2029    end.
2030
2031exchange_feature_flags_from_unknown_apps(Node, Timeout) ->
2032    %% The first step is to fetch feature flags from Erlang applications
2033    %% we don't know locally (they are loaded remotely, but not
2034    %% locally).
2035    fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout),
2036
2037    %% The next step is to do the opposite: push feature flags to remote
2038    %% nodes so they can register those from applications they don't
2039    %% know.
2040    push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout).
2041
2042fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout) ->
2043    RemoteFeatureFlags = query_remote_feature_flags(Node, all, Timeout),
2044    merge_feature_flags_from_unknown_apps(RemoteFeatureFlags).
2045
2046push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout) ->
2047    LocalFeatureFlags = list(all),
2048    push_local_feature_flags_from_apps_unknown_remotely(
2049      Node, LocalFeatureFlags, Timeout).
2050
2051push_local_feature_flags_from_apps_unknown_remotely(
2052  Node, FeatureFlags, Timeout)
2053  when map_size(FeatureFlags) > 0 ->
2054    case query_running_remote_nodes(Node, Timeout) of
2055        {badrpc, Reason} ->
2056            {error, Reason};
2057        Nodes ->
2058            lists:foreach(
2059              fun(N) ->
2060                      run_feature_flags_mod_on_remote_node(
2061                        N,
2062                        merge_feature_flags_from_unknown_apps,
2063                        [FeatureFlags],
2064                        Timeout)
2065              end, Nodes)
2066    end;
2067push_local_feature_flags_from_apps_unknown_remotely(_, _, _) ->
2068    ok.
2069
2070-spec sync_feature_flags_with_cluster([node()], boolean()) ->
2071    ok | {error, any()} | no_return().
2072%% @private
2073
2074sync_feature_flags_with_cluster(Nodes, NodeIsVirgin) ->
2075    sync_feature_flags_with_cluster(Nodes, NodeIsVirgin, ?TIMEOUT).
2076
2077-spec sync_feature_flags_with_cluster([node()], boolean(), timeout()) ->
2078    ok | {error, any()} | no_return().
2079%% @private
2080
2081sync_feature_flags_with_cluster([], NodeIsVirgin, _) ->
2082    verify_which_feature_flags_are_actually_enabled(),
2083    case NodeIsVirgin of
2084        true ->
2085            FeatureNames = get_forced_feature_flag_names(),
2086            case remote_nodes() of
2087                [] when FeatureNames =:= undefined ->
2088                    rabbit_log_feature_flags:debug(
2089                      "Feature flags: starting an unclustered node "
2090                      "for the first time: all feature flags will be "
2091                      "enabled by default"),
2092                    enable_all();
2093                [] ->
2094                    case FeatureNames of
2095                        [] ->
2096                            rabbit_log_feature_flags:debug(
2097                              "Feature flags: starting an unclustered "
2098                              "node for the first time: all feature "
2099                              "flags are forcibly left disabled from "
2100                              "the $RABBITMQ_FEATURE_FLAGS environment "
2101                              "variable"),
2102                            ok;
2103                        _ ->
2104                            rabbit_log_feature_flags:debug(
2105                              "Feature flags: starting an unclustered "
2106                              "node for the first time: only the "
2107                              "following feature flags specified in "
2108                              "the $RABBITMQ_FEATURE_FLAGS environment "
2109                              "variable will be enabled: ~p",
2110                              [FeatureNames]),
2111                            enable(FeatureNames)
2112                    end;
2113                _ ->
2114                    ok
2115            end;
2116        false ->
2117            rabbit_log_feature_flags:debug(
2118              "Feature flags: starting an unclustered node which is "
2119              "already initialized: all feature flags left in their "
2120              "current state"),
2121            ok
2122    end;
2123sync_feature_flags_with_cluster(Nodes, _, Timeout) ->
2124    verify_which_feature_flags_are_actually_enabled(),
2125    RemoteNodes = Nodes -- [node()],
2126    sync_feature_flags_with_cluster1(RemoteNodes, Timeout).
2127
2128sync_feature_flags_with_cluster1([], _) ->
2129    ok;
2130sync_feature_flags_with_cluster1(RemoteNodes, Timeout) ->
2131    RandomRemoteNode = pick_one_node(RemoteNodes),
2132    rabbit_log_feature_flags:debug(
2133      "Feature flags: SYNCING FEATURE FLAGS with node `~s`...",
2134      [RandomRemoteNode]),
2135    case query_remote_feature_flags(RandomRemoteNode, enabled, Timeout) of
2136        {error, _} = Error ->
2137            Error;
2138        RemoteFeatureFlags ->
2139            RemoteFeatureNames = maps:keys(RemoteFeatureFlags),
2140            rabbit_log_feature_flags:debug(
2141              "Feature flags: enabling locally feature flags already "
2142              "enabled on node `~s`...",
2143              [RandomRemoteNode]),
2144            case do_sync_feature_flags_with_node(RemoteFeatureNames) of
2145                ok ->
2146                    sync_feature_flags_with_cluster2(
2147                      RandomRemoteNode, Timeout);
2148                Error ->
2149                    Error
2150            end
2151    end.
2152
2153sync_feature_flags_with_cluster2(RandomRemoteNode, Timeout) ->
2154    LocalFeatureNames = maps:keys(list(enabled)),
2155    rabbit_log_feature_flags:debug(
2156      "Feature flags: enabling on node `~s` feature flags already "
2157      "enabled locally...",
2158      [RandomRemoteNode]),
2159    Ret = run_feature_flags_mod_on_remote_node(
2160            RandomRemoteNode,
2161            do_sync_feature_flags_with_node,
2162            [LocalFeatureNames],
2163            Timeout),
2164    case Ret of
2165        {error, pre_feature_flags_rabbitmq} -> ok;
2166        _                                   -> Ret
2167    end.
2168
2169pick_one_node(Nodes) ->
2170    RandomIndex = rand:uniform(length(Nodes)),
2171    lists:nth(RandomIndex, Nodes).
2172
2173do_sync_feature_flags_with_node([FeatureFlag | Rest]) ->
2174    case enable_locally(FeatureFlag) of
2175        ok    -> do_sync_feature_flags_with_node(Rest);
2176        Error -> Error
2177    end;
2178do_sync_feature_flags_with_node([]) ->
2179    ok.
2180
2181-spec get_forced_feature_flag_names() -> [feature_name()] | undefined.
2182%% @private
2183%% @doc
2184%% Returns the (possibly empty) list of feature flags the user want
2185%% to enable out-of-the-box when starting a node for the first time.
2186%%
2187%% Without this, the default is to enable all the supported feature
2188%% flags.
2189%%
2190%% There are two ways to specify that list:
2191%% <ol>
2192%% <li>Using the `$RABBITMQ_FEATURE_FLAGS' environment variable; for
2193%%   instance `RABBITMQ_FEATURE_FLAGS=quorum_queue,mnevis'.</li>
2194%% <li>Using the `forced_feature_flags_on_init' configuration parameter;
2195%%   for instance
2196%%   `{rabbit, [{forced_feature_flags_on_init, [quorum_queue, mnevis]}]}'.</li>
2197%% </ol>
2198%%
2199%% The environment variable has precedence over the configuration
2200%% parameter.
2201
2202get_forced_feature_flag_names() ->
2203    Ret = case get_forced_feature_flag_names_from_env() of
2204              undefined -> get_forced_feature_flag_names_from_config();
2205              List      -> List
2206          end,
2207    case Ret of
2208        undefined -> ok;
2209        []        -> rabbit_log_feature_flags:info(
2210                       "Feature flags: automatic enablement of feature "
2211                       "flags disabled (i.e. none will be enabled "
2212                       "automatically)");
2213        _         -> rabbit_log_feature_flags:info(
2214                       "Feature flags: automatic enablement of feature "
2215                       "flags limited to the following list: ~p", [Ret])
2216    end,
2217    Ret.
2218
2219-spec get_forced_feature_flag_names_from_env() -> [feature_name()] | undefined.
2220%% @private
2221
2222get_forced_feature_flag_names_from_env() ->
2223    case rabbit_prelaunch:get_context() of
2224        #{forced_feature_flags_on_init := ForcedFFs}
2225          when is_list(ForcedFFs) ->
2226            ForcedFFs;
2227        _ ->
2228            undefined
2229    end.
2230
2231-spec get_forced_feature_flag_names_from_config() -> [feature_name()] | undefined.
2232%% @private
2233
2234get_forced_feature_flag_names_from_config() ->
2235    Value = application:get_env(rabbit,
2236                                forced_feature_flags_on_init,
2237                                undefined),
2238    case Value of
2239        undefined ->
2240            Value;
2241        _ when is_list(Value) ->
2242            case lists:all(fun is_atom/1, Value) of
2243                true  -> Value;
2244                false -> undefined
2245            end;
2246        _ ->
2247            undefined
2248    end.
2249
2250-spec verify_which_feature_flags_are_actually_enabled() ->
2251    ok | {error, any()} | no_return().
2252%% @private
2253
2254verify_which_feature_flags_are_actually_enabled() ->
2255    AllFeatureFlags = list(all),
2256    EnabledFeatureNames = read_enabled_feature_flags_list(),
2257    rabbit_log_feature_flags:debug(
2258      "Feature flags: double-checking feature flag states..."),
2259    %% In case the previous instance of the node failed to write the
2260    %% feature flags list file, we want to double-check the list of
2261    %% enabled feature flags read from disk. For each feature flag,
2262    %% we call the migration function to query if the feature flag is
2263    %% actually enabled.
2264    %%
2265    %% If a feature flag doesn't provide a migration function (or if the
2266    %% function fails), we keep the current state of the feature flag.
2267    List1 = maps:fold(
2268              fun(Name, Props, Acc) ->
2269                      Ret = run_migration_fun(Name, Props, is_enabled),
2270                      case Ret of
2271                          true ->
2272                              [Name | Acc];
2273                          false ->
2274                              Acc;
2275                          _ ->
2276                              MarkedAsEnabled = is_enabled(Name),
2277                              case MarkedAsEnabled of
2278                                  true  -> [Name | Acc];
2279                                  false -> Acc
2280                              end
2281                      end
2282              end,
2283              [], AllFeatureFlags),
2284    RepairedEnabledFeatureNames = lists:sort(List1),
2285    %% We log the list of feature flags for which the state changes
2286    %% after the check above.
2287    WereEnabled = RepairedEnabledFeatureNames -- EnabledFeatureNames,
2288    WereDisabled = EnabledFeatureNames -- RepairedEnabledFeatureNames,
2289    case {WereEnabled, WereDisabled} of
2290        {[], []} -> ok;
2291        _        -> rabbit_log_feature_flags:warning(
2292                      "Feature flags: the previous instance of this node "
2293                      "must have failed to write the `feature_flags` "
2294                      "file at `~s`:",
2295                      [enabled_feature_flags_list_file()])
2296    end,
2297    case WereEnabled of
2298        [] -> ok;
2299        _  -> rabbit_log_feature_flags:warning(
2300                "Feature flags:   - list of previously enabled "
2301                "feature flags now marked as such: ~p", [WereEnabled])
2302    end,
2303    case WereDisabled of
2304        [] -> ok;
2305        _  -> rabbit_log_feature_flags:warning(
2306                "Feature flags:   - list of previously disabled "
2307                "feature flags now marked as such: ~p", [WereDisabled])
2308    end,
2309    %% Finally, if the new list of enabled feature flags is different
2310    %% than the one on disk, we write the new list and re-initialize the
2311    %% registry.
2312    case RepairedEnabledFeatureNames of
2313        EnabledFeatureNames ->
2314            ok;
2315        _ ->
2316            rabbit_log_feature_flags:debug(
2317              "Feature flags: write the repaired list of enabled feature "
2318              "flags"),
2319            WrittenToDisk = ok =:= try_to_write_enabled_feature_flags_list(
2320                                     RepairedEnabledFeatureNames),
2321            initialize_registry(
2322              #{},
2323              list_of_enabled_feature_flags_to_feature_states(
2324                RepairedEnabledFeatureNames),
2325              WrittenToDisk)
2326    end.
2327
2328-spec refresh_feature_flags_after_app_load([atom()]) ->
2329    ok | {error, any()} | no_return().
2330
2331refresh_feature_flags_after_app_load([]) ->
2332    ok;
2333refresh_feature_flags_after_app_load(Apps) ->
2334    rabbit_log_feature_flags:debug(
2335      "Feature flags: new apps loaded: ~p -> refreshing feature flags",
2336      [Apps]),
2337
2338    FeatureFlags0 = list(all),
2339    FeatureFlags1 = query_supported_feature_flags(),
2340
2341    %% The following list contains all the feature flags this node
2342    %% learned about only because remote nodes have them. Now, the
2343    %% applications providing them are loaded locally as well.
2344    %% Therefore, we may run their migration function in case the state
2345    %% of this node needs it.
2346    AlreadySupportedFeatureNames = maps:keys(
2347                                     maps:filter(
2348                                       fun(_, #{provided_by := App}) ->
2349                                               lists:member(App, Apps)
2350                                       end, FeatureFlags0)),
2351    case AlreadySupportedFeatureNames of
2352        [] ->
2353            ok;
2354        _ ->
2355            rabbit_log_feature_flags:debug(
2356              "Feature flags: new apps loaded: feature flags already "
2357              "supported: ~p",
2358              [lists:sort(AlreadySupportedFeatureNames)])
2359    end,
2360
2361    %% The following list contains all the feature flags no nodes in the
2362    %% cluster knew about before: this is the first time we see them in
2363    %% this instance of the cluster. We need to register them on all
2364    %% nodes.
2365    NewSupportedFeatureFlags = maps:filter(
2366                                 fun(FeatureName, _) ->
2367                                         not maps:is_key(FeatureName,
2368                                                         FeatureFlags0)
2369                                 end, FeatureFlags1),
2370    case maps:keys(NewSupportedFeatureFlags) of
2371        [] ->
2372            ok;
2373        NewSupportedFeatureNames ->
2374            rabbit_log_feature_flags:debug(
2375              "Feature flags: new apps loaded: new feature flags (unseen so "
2376              "far): ~p ",
2377              [lists:sort(NewSupportedFeatureNames)])
2378    end,
2379
2380    case initialize_registry() of
2381        ok ->
2382            Ret = maybe_enable_locally_after_app_load(
2383                    AlreadySupportedFeatureNames),
2384            case Ret of
2385                ok ->
2386                    share_new_feature_flags_after_app_load(
2387                      NewSupportedFeatureFlags, ?TIMEOUT);
2388                Error ->
2389                    Error
2390            end;
2391        Error ->
2392            Error
2393    end.
2394
2395maybe_enable_locally_after_app_load([]) ->
2396    ok;
2397maybe_enable_locally_after_app_load([FeatureName | Rest]) ->
2398    case is_enabled(FeatureName) of
2399        true ->
2400            case do_enable_locally(FeatureName) of
2401                ok    -> maybe_enable_locally_after_app_load(Rest);
2402                Error -> Error
2403            end;
2404        false ->
2405            maybe_enable_locally_after_app_load(Rest)
2406    end.
2407
2408share_new_feature_flags_after_app_load(FeatureFlags, Timeout) ->
2409    push_local_feature_flags_from_apps_unknown_remotely(
2410      node(), FeatureFlags, Timeout).
2411
2412on_load() ->
2413    %% The goal of this `on_load()` code server hook is to prevent this
2414    %% module from being loaded in an already running RabbitMQ node if
2415    %% the running version does not have the feature flags subsystem.
2416    %%
2417    %% This situation happens when an upgrade overwrites RabbitMQ files
2418    %% with the node still running. This is the case with many packages:
2419    %% files are updated on disk, then a post-install step takes care of
2420    %% restarting the service.
2421    %%
2422    %% The problem is that if many nodes in a cluster are updated at the
2423    %% same time, one node running the newer version might query feature
2424    %% flags on an old node where this module is already available
2425    %% (because files were already overwritten). This causes the query
2426    %% to report an unexpected answer and the newer node to refuse to
2427    %% start.
2428    %%
2429    %% However, when the module is executed outside of RabbitMQ (for
2430    %% debugging purpose or in the context of EUnit for instance), we
2431    %% want to allow the load. That's why we first check if RabbitMQ is
2432    %% actually running.
2433    case rabbit:is_running() of
2434        true ->
2435            %% RabbitMQ is running.
2436            %%
2437            %% Now we want to differentiate a pre-feature-flags node
2438            %% from one having the subsystem.
2439            %%
2440            %% To do that, we verify if the `feature_flags_file`
2441            %% application environment variable is defined. With a
2442            %% feature-flags-enabled node, this application environment
2443            %% variable is defined by rabbitmq-server(8).
2444            case application:get_env(rabbit, feature_flags_file) of
2445                {ok, _} ->
2446                    %% This is a feature-flags-enabled version. Loading
2447                    %% the module is permitted.
2448                    ok;
2449                _ ->
2450                    %% This is a pre-feature-flags version. We deny the
2451                    %% load and report why, possibly specifying the
2452                    %% version of RabbitMQ.
2453                    Vsn = case application:get_key(rabbit, vsn) of
2454                              {ok, V}   -> V;
2455                              undefined -> "unknown version"
2456                          end,
2457                    "Refusing to load '" ?MODULE_STRING "' on this "
2458                    "node. It appears to be running a pre-feature-flags "
2459                    "version of RabbitMQ (" ++ Vsn ++ "). This is fine: "
2460                    "a newer version of RabbitMQ was deployed on this "
2461                    "node, but it was not restarted yet. This warning "
2462                    "is probably caused by a remote node querying this "
2463                    "node for its feature flags."
2464            end;
2465        false ->
2466            %% RabbitMQ is not running. Loading the module is permitted
2467            %% because this Erlang node will never be queried for its
2468            %% feature flags.
2469            ok
2470    end.
2471