1%% This Source Code Form is subject to the terms of the Mozilla Public 2%% License, v. 2.0. If a copy of the MPL was not distributed with this 3%% file, You can obtain one at https://mozilla.org/MPL/2.0/. 4%% 5%% Copyright (c) 2018-2021 VMware, Inc. or its affiliates. All rights reserved. 6%% 7 8%% @author The RabbitMQ team 9%% @copyright 2018-2021 VMware, Inc. or its affiliates. 10%% 11%% @doc 12%% This module offers a framework to declare capabilities a RabbitMQ node 13%% supports and therefore a way to determine if multiple RabbitMQ nodes in 14%% a cluster are compatible and can work together. 15%% 16%% == What a feature flag is == 17%% 18%% A <strong>feature flag</strong> is a name and several properties given 19%% to a change in RabbitMQ which impacts its communication with other 20%% RabbitMQ nodes. This kind of change can be: 21%% <ul> 22%% <li>an update to an Erlang record</li> 23%% <li>a modification to a replicated Mnesia table schema</li> 24%% <li>a modification to Erlang messages exchanged between Erlang processes 25%% which might run on remote nodes</li> 26%% </ul> 27%% 28%% A feature flag is qualified by: 29%% <ul> 30%% <li>a <strong>name</strong></li> 31%% <li>a <strong>description</strong> (optional)</li> 32%% <li>a list of other <strong>feature flags this feature flag depends on 33%% </strong> (optional). This can be useful when the change builds up on 34%% top of a previous change. For instance, it expands a record which was 35%% already modified by a previous feature flag.</li> 36%% <li>a <strong>migration function</strong> (optional). If provided, this 37%% function is called when the feature flag is enabled. It is responsible 38%% for doing all the data conversion, if any, and confirming the feature 39%% flag can be enabled.</li> 40%% <li>a level of stability (stable or experimental). For now, this is only 41%% informational. But it might be used for specific purposes in the 42%% future.</li> 43%% </ul> 44%% 45%% == How to declare a feature flag == 46%% 47%% To define a new feature flag, you need to use the 48%% `rabbit_feature_flag()' module attribute: 49%% 50%% ``` 51%% -rabbit_feature_flag(FeatureFlag). 52%% ''' 53%% 54%% `FeatureFlag' is a {@type feature_flag_modattr()}. 55%% 56%% == How to enable a feature flag == 57%% 58%% To enable a supported feature flag, you have the following solutions: 59%% 60%% <ul> 61%% <li>Using this module API: 62%% ``` 63%% rabbit_feature_flags:enable(FeatureFlagName). 64%% ''' 65%% </li> 66%% <li>Using the `rabbitmqctl' CLI: 67%% ``` 68%% rabbitmqctl enable_feature_flag "$feature_flag_name" 69%% ''' 70%% </li> 71%% </ul> 72%% 73%% == How to disable a feature flag == 74%% 75%% Once enabled, there is <strong>currently no way to disable</strong> a 76%% feature flag. 77 78-module(rabbit_feature_flags). 79 80-export([list/0, 81 list/1, 82 list/2, 83 enable/1, 84 enable_all/0, 85 disable/1, 86 disable_all/0, 87 is_supported/1, 88 is_supported/2, 89 is_supported_locally/1, 90 is_supported_remotely/1, 91 is_supported_remotely/2, 92 is_supported_remotely/3, 93 is_enabled/1, 94 is_enabled/2, 95 is_disabled/1, 96 is_disabled/2, 97 info/0, 98 info/1, 99 init/0, 100 get_state/1, 101 get_stability/1, 102 check_node_compatibility/1, 103 check_node_compatibility/2, 104 is_node_compatible/1, 105 is_node_compatible/2, 106 sync_feature_flags_with_cluster/2, 107 sync_feature_flags_with_cluster/3, 108 refresh_feature_flags_after_app_load/1, 109 enabled_feature_flags_list_file/0 110 ]). 111 112%% RabbitMQ internal use only. 113-export([initialize_registry/0, 114 initialize_registry/1, 115 mark_as_enabled_locally/2, 116 remote_nodes/0, 117 running_remote_nodes/0, 118 does_node_support/3, 119 merge_feature_flags_from_unknown_apps/1, 120 do_sync_feature_flags_with_node/1]). 121 122-ifdef(TEST). 123-export([inject_test_feature_flags/1, 124 initialize_registry/3, 125 query_supported_feature_flags/0, 126 mark_as_enabled_remotely/2, 127 mark_as_enabled_remotely/4, 128 registry_loading_lock/0]). 129-endif. 130 131%% Default timeout for operations on remote nodes. 132-define(TIMEOUT, 60000). 133 134-define(FF_REGISTRY_LOADING_LOCK, {feature_flags_registry_loading, self()}). 135-define(FF_STATE_CHANGE_LOCK, {feature_flags_state_change, self()}). 136 137-type feature_flag_modattr() :: {feature_name(), 138 feature_props()}. 139%% The value of a `-rabbitmq_feature_flag()' module attribute used to 140%% declare a new feature flag. 141 142-type feature_name() :: atom(). 143%% The feature flag's name. It is used in many places to identify a 144%% specific feature flag. In particular, this is how an end-user (or 145%% the CLI) can enable a feature flag. This is also the only bit which 146%% is persisted so a node remember which feature flags are enabled. 147 148-type feature_props() :: #{desc => string(), 149 doc_url => string(), 150 stability => stability(), 151 depends_on => [feature_name()], 152 migration_fun => migration_fun_name()}. 153%% The feature flag properties. 154%% 155%% All properties are optional. 156%% 157%% The properties are: 158%% <ul> 159%% <li>`desc': a description of the feature flag</li> 160%% <li>`doc_url': a URL pointing to more documentation about the feature 161%% flag</li> 162%% <li>`stability': the level of stability</li> 163%% <li>`depends_on': a list of feature flags name which must be enabled 164%% before this one</li> 165%% <li>`migration_fun': a migration function specified by its module and 166%% function names</li> 167%% </ul> 168%% 169%% Note that the `migration_fun' is a {@type migration_fun_name()}, 170%% not a {@type migration_fun()}. However, the function signature 171%% must conform to the {@type migration_fun()} signature. The reason 172%% is that we must be able to represent it as an Erlang term when 173%% we regenerate the registry module source code (using {@link 174%% erl_syntax:abstract/1}). 175 176-type feature_flags() :: #{feature_name() => feature_props_extended()}. 177%% The feature flags map as returned or accepted by several functions in 178%% this module. In particular, this what the {@link list/0} function 179%% returns. 180 181-type feature_props_extended() :: #{desc => string(), 182 doc_url => string(), 183 stability => stability(), 184 migration_fun => migration_fun_name(), 185 depends_on => [feature_name()], 186 provided_by => atom()}. 187%% The feature flag properties, once expanded by this module when feature 188%% flags are discovered. 189%% 190%% The new properties compared to {@type feature_props()} are: 191%% <ul> 192%% <li>`provided_by': the name of the application providing the feature flag</li> 193%% </ul> 194 195-type feature_state() :: boolean() | state_changing. 196%% The state of the feature flag: enabled if `true', disabled if `false' 197%% or `state_changing'. 198 199-type feature_states() :: #{feature_name() => feature_state()}. 200 201-type stability() :: stable | experimental. 202%% The level of stability of a feature flag. Currently, only informational. 203 204-type migration_fun_name() :: {Module :: atom(), Function :: atom()}. 205%% The name of the module and function to call when changing the state of 206%% the feature flag. 207 208-type migration_fun() :: fun((feature_name(), 209 feature_props_extended(), 210 migration_fun_context()) 211 -> ok | {error, any()} | % context = enable 212 boolean() | undefined). % context = is_enabled 213%% The migration function signature. 214%% 215%% It is called with context `enable' when a feature flag is being enabled. 216%% The function is responsible for this feature-flag-specific verification 217%% and data conversion. It returns `ok' if RabbitMQ can mark the feature 218%% flag as enabled an continue with the next one, if any. Otherwise, it 219%% returns `{error, any()}' if there is an error and the feature flag should 220%% remain disabled. The function must be idempotent: if the feature flag is 221%% already enabled on another node and the local node is running this function 222%% again because it is syncing its feature flags state, it should succeed. 223%% 224%% It is called with the context `is_enabled' to check if a feature flag 225%% is actually enabled. It is useful on RabbitMQ startup, just in case 226%% the previous instance failed to write the feature flags list file. 227 228-type migration_fun_context() :: enable | is_enabled. 229 230-type registry_vsn() :: term(). 231 232-export_type([feature_flag_modattr/0, 233 feature_props/0, 234 feature_name/0, 235 feature_flags/0, 236 feature_props_extended/0, 237 feature_state/0, 238 feature_states/0, 239 stability/0, 240 migration_fun_name/0, 241 migration_fun/0, 242 migration_fun_context/0]). 243 244-on_load(on_load/0). 245 246-spec list() -> feature_flags(). 247%% @doc 248%% Lists all supported feature flags. 249%% 250%% @returns A map of all supported feature flags. 251 252list() -> list(all). 253 254-spec list(Which :: all | enabled | disabled) -> feature_flags(). 255%% @doc 256%% Lists all, enabled or disabled feature flags, depending on the argument. 257%% 258%% @param Which The group of feature flags to return: `all', `enabled' or 259%% `disabled'. 260%% @returns A map of selected feature flags. 261 262list(all) -> rabbit_ff_registry:list(all); 263list(enabled) -> rabbit_ff_registry:list(enabled); 264list(disabled) -> maps:filter( 265 fun(FeatureName, _) -> is_disabled(FeatureName) end, 266 list(all)). 267 268-spec list(all | enabled | disabled, stability()) -> feature_flags(). 269%% @doc 270%% Lists all, enabled or disabled feature flags, depending on the first 271%% argument, only keeping those having the specified stability. 272%% 273%% @param Which The group of feature flags to return: `all', `enabled' or 274%% `disabled'. 275%% @param Stability The level of stability used to filter the map of feature 276%% flags. 277%% @returns A map of selected feature flags. 278 279list(Which, Stability) 280 when Stability =:= stable orelse Stability =:= experimental -> 281 maps:filter(fun(_, FeatureProps) -> 282 Stability =:= get_stability(FeatureProps) 283 end, list(Which)). 284 285-spec enable(feature_name() | [feature_name()]) -> ok | 286 {error, Reason :: any()}. 287%% @doc 288%% Enables the specified feature flag or set of feature flags. 289%% 290%% @param FeatureName The name or the list of names of feature flags to 291%% enable. 292%% @returns `ok' if the feature flags (and all the feature flags they 293%% depend on) were successfully enabled, or `{error, Reason}' if one 294%% feature flag could not be enabled (subsequent feature flags in the 295%% dependency tree are left unchanged). 296 297enable(FeatureName) when is_atom(FeatureName) -> 298 rabbit_log_feature_flags:debug( 299 "Feature flag `~s`: REQUEST TO ENABLE", 300 [FeatureName]), 301 case is_enabled(FeatureName) of 302 true -> 303 rabbit_log_feature_flags:debug( 304 "Feature flag `~s`: already enabled", 305 [FeatureName]), 306 ok; 307 false -> 308 rabbit_log_feature_flags:debug( 309 "Feature flag `~s`: not enabled, check if supported by cluster", 310 [FeatureName]), 311 %% The feature flag must be supported locally and remotely 312 %% (i.e. by all members of the cluster). 313 case is_supported(FeatureName) of 314 true -> 315 rabbit_log_feature_flags:info( 316 "Feature flag `~s`: supported, attempt to enable...", 317 [FeatureName]), 318 do_enable(FeatureName); 319 false -> 320 rabbit_log_feature_flags:error( 321 "Feature flag `~s`: not supported", 322 [FeatureName]), 323 {error, unsupported} 324 end 325 end; 326enable(FeatureNames) when is_list(FeatureNames) -> 327 with_feature_flags(FeatureNames, fun enable/1). 328 329-spec enable_all() -> ok | {error, any()}. 330%% @doc 331%% Enables all supported feature flags. 332%% 333%% @returns `ok' if the feature flags were successfully enabled, 334%% or `{error, Reason}' if one feature flag could not be enabled 335%% (subsequent feature flags in the dependency tree are left 336%% unchanged). 337 338enable_all() -> 339 with_feature_flags(maps:keys(list(all)), fun enable/1). 340 341-spec disable(feature_name() | [feature_name()]) -> ok | {error, any()}. 342%% @doc 343%% Disables the specified feature flag or set of feature flags. 344%% 345%% @param FeatureName The name or the list of names of feature flags to 346%% disable. 347%% @returns `ok' if the feature flags (and all the feature flags they 348%% depend on) were successfully disabled, or `{error, Reason}' if one 349%% feature flag could not be disabled (subsequent feature flags in the 350%% dependency tree are left unchanged). 351 352disable(FeatureName) when is_atom(FeatureName) -> 353 {error, unsupported}; 354disable(FeatureNames) when is_list(FeatureNames) -> 355 with_feature_flags(FeatureNames, fun disable/1). 356 357-spec disable_all() -> ok | {error, any()}. 358%% @doc 359%% Disables all supported feature flags. 360%% 361%% @returns `ok' if the feature flags were successfully disabled, 362%% or `{error, Reason}' if one feature flag could not be disabled 363%% (subsequent feature flags in the dependency tree are left 364%% unchanged). 365 366disable_all() -> 367 with_feature_flags(maps:keys(list(all)), fun disable/1). 368 369-spec with_feature_flags([feature_name()], 370 fun((feature_name()) -> ok | {error, any()})) -> 371 ok | {error, any()}. 372%% @private 373 374with_feature_flags([FeatureName | Rest], Fun) -> 375 case Fun(FeatureName) of 376 ok -> with_feature_flags(Rest, Fun); 377 Error -> Error 378 end; 379with_feature_flags([], _) -> 380 ok. 381 382-spec is_supported(feature_name() | [feature_name()]) -> boolean(). 383%% @doc 384%% Returns if a single feature flag or a set of feature flags is 385%% supported by the entire cluster. 386%% 387%% This is the same as calling both {@link is_supported_locally/1} and 388%% {@link is_supported_remotely/1} with a logical AND. 389%% 390%% @param FeatureNames The name or a list of names of the feature flag(s) 391%% to be checked. 392%% @returns `true' if the set of feature flags is entirely supported, or 393%% `false' if one of them is not or the RPC timed out. 394 395is_supported(FeatureNames) -> 396 is_supported_locally(FeatureNames) andalso 397 is_supported_remotely(FeatureNames). 398 399-spec is_supported(feature_name() | [feature_name()], timeout()) -> 400 boolean(). 401%% @doc 402%% Returns if a single feature flag or a set of feature flags is 403%% supported by the entire cluster. 404%% 405%% This is the same as calling both {@link is_supported_locally/1} and 406%% {@link is_supported_remotely/2} with a logical AND. 407%% 408%% @param FeatureNames The name or a list of names of the feature flag(s) 409%% to be checked. 410%% @param Timeout Time in milliseconds after which the RPC gives up. 411%% @returns `true' if the set of feature flags is entirely supported, or 412%% `false' if one of them is not or the RPC timed out. 413 414is_supported(FeatureNames, Timeout) -> 415 is_supported_locally(FeatureNames) andalso 416 is_supported_remotely(FeatureNames, Timeout). 417 418-spec is_supported_locally(feature_name() | [feature_name()]) -> boolean(). 419%% @doc 420%% Returns if a single feature flag or a set of feature flags is 421%% supported by the local node. 422%% 423%% @param FeatureNames The name or a list of names of the feature flag(s) 424%% to be checked. 425%% @returns `true' if the set of feature flags is entirely supported, or 426%% `false' if one of them is not. 427 428is_supported_locally(FeatureName) when is_atom(FeatureName) -> 429 rabbit_ff_registry:is_supported(FeatureName); 430is_supported_locally(FeatureNames) when is_list(FeatureNames) -> 431 lists:all(fun(F) -> rabbit_ff_registry:is_supported(F) end, FeatureNames). 432 433-spec is_supported_remotely(feature_name() | [feature_name()]) -> boolean(). 434%% @doc 435%% Returns if a single feature flag or a set of feature flags is 436%% supported by all remote nodes. 437%% 438%% @param FeatureNames The name or a list of names of the feature flag(s) 439%% to be checked. 440%% @returns `true' if the set of feature flags is entirely supported, or 441%% `false' if one of them is not or the RPC timed out. 442 443is_supported_remotely(FeatureNames) -> 444 is_supported_remotely(FeatureNames, ?TIMEOUT). 445 446-spec is_supported_remotely(feature_name() | [feature_name()], timeout()) -> boolean(). 447%% @doc 448%% Returns if a single feature flag or a set of feature flags is 449%% supported by all remote nodes. 450%% 451%% @param FeatureNames The name or a list of names of the feature flag(s) 452%% to be checked. 453%% @param Timeout Time in milliseconds after which the RPC gives up. 454%% @returns `true' if the set of feature flags is entirely supported, or 455%% `false' if one of them is not or the RPC timed out. 456 457is_supported_remotely(FeatureName, Timeout) when is_atom(FeatureName) -> 458 is_supported_remotely([FeatureName], Timeout); 459is_supported_remotely([], _) -> 460 rabbit_log_feature_flags:debug( 461 "Feature flags: skipping query for feature flags support as the " 462 "given list is empty"), 463 true; 464is_supported_remotely(FeatureNames, Timeout) when is_list(FeatureNames) -> 465 case running_remote_nodes() of 466 [] -> 467 rabbit_log_feature_flags:debug( 468 "Feature flags: isolated node; skipping remote node query " 469 "=> consider `~p` supported", 470 [FeatureNames]), 471 true; 472 RemoteNodes -> 473 rabbit_log_feature_flags:debug( 474 "Feature flags: about to query these remote nodes about " 475 "support for `~p`: ~p", 476 [FeatureNames, RemoteNodes]), 477 is_supported_remotely(RemoteNodes, FeatureNames, Timeout) 478 end. 479 480-spec is_supported_remotely([node()], 481 feature_name() | [feature_name()], 482 timeout()) -> boolean(). 483%% @doc 484%% Returns if a single feature flag or a set of feature flags is 485%% supported by specified remote nodes. 486%% 487%% @param RemoteNodes The list of remote nodes to query. 488%% @param FeatureNames The name or a list of names of the feature flag(s) 489%% to be checked. 490%% @param Timeout Time in milliseconds after which the RPC gives up. 491%% @returns `true' if the set of feature flags is entirely supported by 492%% all nodes, or `false' if one of them is not or the RPC timed out. 493 494is_supported_remotely(_, [], _) -> 495 rabbit_log_feature_flags:debug( 496 "Feature flags: skipping query for feature flags support as the " 497 "given list is empty"), 498 true; 499is_supported_remotely([Node | Rest], FeatureNames, Timeout) -> 500 case does_node_support(Node, FeatureNames, Timeout) of 501 true -> 502 is_supported_remotely(Rest, FeatureNames, Timeout); 503 false -> 504 rabbit_log_feature_flags:debug( 505 "Feature flags: stopping query for support for `~p` here", 506 [FeatureNames]), 507 false 508 end; 509is_supported_remotely([], FeatureNames, _) -> 510 rabbit_log_feature_flags:debug( 511 "Feature flags: all running remote nodes support `~p`", 512 [FeatureNames]), 513 true. 514 515-spec is_enabled(feature_name() | [feature_name()]) -> boolean(). 516%% @doc 517%% Returns if a single feature flag or a set of feature flags is 518%% enabled. 519%% 520%% This is the same as calling {@link is_enabled/2} as a `blocking' 521%% call. 522%% 523%% @param FeatureNames The name or a list of names of the feature flag(s) 524%% to be checked. 525%% @returns `true' if the set of feature flags is enabled, or 526%% `false' if one of them is not. 527 528is_enabled(FeatureNames) -> 529 is_enabled(FeatureNames, blocking). 530 531-spec is_enabled 532(feature_name() | [feature_name()], blocking) -> 533 boolean(); 534(feature_name() | [feature_name()], non_blocking) -> 535 feature_state(). 536%% @doc 537%% Returns if a single feature flag or a set of feature flags is 538%% enabled. 539%% 540%% When `blocking' is passed, the function waits (blocks) for the 541%% state of a feature flag being disabled or enabled stabilizes before 542%% returning its final state. 543%% 544%% When `non_blocking' is passed, the function returns immediately with 545%% the state of the feature flag (`true' if enabled, `false' otherwise) 546%% or `state_changing' is the state is being changed at the time of the 547%% call. 548%% 549%% @param FeatureNames The name or a list of names of the feature flag(s) 550%% to be checked. 551%% @returns `true' if the set of feature flags is enabled, 552%% `false' if one of them is not, or `state_changing' if one of them 553%% is being worked on. Note that `state_changing' has precedence over 554%% `false', so if one is `false' and another one is `state_changing', 555%% `state_changing' is returned. 556 557is_enabled(FeatureNames, non_blocking) -> 558 is_enabled_nb(FeatureNames); 559is_enabled(FeatureNames, blocking) -> 560 case is_enabled_nb(FeatureNames) of 561 state_changing -> 562 global:set_lock(?FF_STATE_CHANGE_LOCK), 563 global:del_lock(?FF_STATE_CHANGE_LOCK), 564 is_enabled(FeatureNames, blocking); 565 IsEnabled -> 566 IsEnabled 567 end. 568 569is_enabled_nb(FeatureName) when is_atom(FeatureName) -> 570 rabbit_ff_registry:is_enabled(FeatureName); 571is_enabled_nb(FeatureNames) when is_list(FeatureNames) -> 572 lists:foldl( 573 fun 574 (_F, state_changing = Acc) -> 575 Acc; 576 (F, false = Acc) -> 577 case rabbit_ff_registry:is_enabled(F) of 578 state_changing -> state_changing; 579 _ -> Acc 580 end; 581 (F, _) -> 582 rabbit_ff_registry:is_enabled(F) 583 end, 584 true, FeatureNames). 585 586-spec is_disabled(feature_name() | [feature_name()]) -> boolean(). 587%% @doc 588%% Returns if a single feature flag or one feature flag in a set of 589%% feature flags is disabled. 590%% 591%% This is the same as negating the result of {@link is_enabled/1}. 592%% 593%% @param FeatureNames The name or a list of names of the feature flag(s) 594%% to be checked. 595%% @returns `true' if one of the feature flags is disabled, or 596%% `false' if they are all enabled. 597 598is_disabled(FeatureNames) -> 599 is_disabled(FeatureNames, blocking). 600 601-spec is_disabled 602(feature_name() | [feature_name()], blocking) -> 603 boolean(); 604(feature_name() | [feature_name()], non_blocking) -> 605 feature_state(). 606%% @doc 607%% Returns if a single feature flag or one feature flag in a set of 608%% feature flags is disabled. 609%% 610%% This is the same as negating the result of {@link is_enabled/2}, 611%% except that `state_changing' is returned as is. 612%% 613%% See {@link is_enabled/2} for a description of the `blocking' and 614%% `non_blocking' modes. 615%% 616%% @param FeatureNames The name or a list of names of the feature flag(s) 617%% to be checked. 618%% @returns `true' if one feature flag in the set of feature flags is 619%% disabled, `false' if they are all enabled, or `state_changing' if 620%% one of them is being worked on. Note that `state_changing' has 621%% precedence over `true', so if one is `true' (i.e. disabled) and 622%% another one is `state_changing', `state_changing' is returned. 623%% 624%% @see is_enabled/2 625 626is_disabled(FeatureName, Blocking) -> 627 case is_enabled(FeatureName, Blocking) of 628 state_changing -> state_changing; 629 IsEnabled -> not IsEnabled 630 end. 631 632-spec info() -> ok. 633%% @doc 634%% Displays a table on stdout summing up the supported feature flags, 635%% their state and various informations about them. 636 637info() -> 638 info(#{}). 639 640-spec info(#{color => boolean(), 641 lines => boolean(), 642 verbose => non_neg_integer()}) -> ok. 643%% @doc 644%% Displays a table on stdout summing up the supported feature flags, 645%% their state and various informations about them. 646%% 647%% Supported options are: 648%% <ul> 649%% <li>`color': a boolean to indicate if colors should be used to 650%% highlight some elements.</li> 651%% <li>`lines': a boolean to indicate if table borders should be drawn 652%% using ASCII lines instead of regular characters.</li> 653%% <li>`verbose': a non-negative integer to specify the level of 654%% verbosity.</li> 655%% </ul> 656%% 657%% @param Options A map of various options to tune the displayed table. 658 659info(Options) when is_map(Options) -> 660 rabbit_ff_extra:info(Options). 661 662-spec get_state(feature_name()) -> enabled | disabled | unavailable. 663%% @doc 664%% Returns the state of a feature flag. 665%% 666%% The possible states are: 667%% <ul> 668%% <li>`enabled': the feature flag is enabled.</li> 669%% <li>`disabled': the feature flag is supported by all nodes in the 670%% cluster but currently disabled.</li> 671%% <li>`unavailable': the feature flag is unsupported by at least one 672%% node in the cluster and can not be enabled for now.</li> 673%% </ul> 674%% 675%% @param FeatureName The name of the feature flag to check. 676%% @returns `enabled', `disabled' or `unavailable'. 677 678get_state(FeatureName) when is_atom(FeatureName) -> 679 IsEnabled = is_enabled(FeatureName), 680 IsSupported = is_supported(FeatureName), 681 case IsEnabled of 682 true -> enabled; 683 false -> case IsSupported of 684 true -> disabled; 685 false -> unavailable 686 end 687 end. 688 689-spec get_stability(feature_name() | feature_props_extended()) -> stability(). 690%% @doc 691%% Returns the stability of a feature flag. 692%% 693%% The possible stability levels are: 694%% <ul> 695%% <li>`stable': the feature flag is stable and will not change in future 696%% releases: it can be enabled in production.</li> 697%% <li>`experimental': the feature flag is experimental and may change in 698%% the future (without a guaranteed upgrade path): enabling it in 699%% production is not recommended.</li> 700%% <li>`unavailable': the feature flag is unsupported by at least one 701%% node in the cluster and can not be enabled for now.</li> 702%% </ul> 703%% 704%% @param FeatureName The name of the feature flag to check. 705%% @returns `stable' or `experimental'. 706 707get_stability(FeatureName) when is_atom(FeatureName) -> 708 case rabbit_ff_registry:get(FeatureName) of 709 undefined -> undefined; 710 FeatureProps -> get_stability(FeatureProps) 711 end; 712get_stability(FeatureProps) when is_map(FeatureProps) -> 713 maps:get(stability, FeatureProps, stable). 714 715%% ------------------------------------------------------------------- 716%% Feature flags registry. 717%% ------------------------------------------------------------------- 718 719-spec init() -> ok | no_return(). 720%% @private 721 722init() -> 723 %% We want to make sure the `feature_flags` file exists once 724 %% RabbitMQ was started at least once. This is not required by 725 %% this module (it works fine if the file is missing) but it helps 726 %% external tools. 727 _ = ensure_enabled_feature_flags_list_file_exists(), 728 729 %% We also "list" supported feature flags. We are not interested in 730 %% that list, however, it triggers the first initialization of the 731 %% registry. 732 _ = list(all), 733 ok. 734 735-spec initialize_registry() -> ok | {error, any()} | no_return(). 736%% @private 737%% @doc 738%% Initializes or reinitializes the registry. 739%% 740%% The registry is an Erlang module recompiled at runtime to hold the 741%% state of all supported feature flags. 742%% 743%% That Erlang module is called {@link rabbit_ff_registry}. The initial 744%% source code of this module simply calls this function so it is 745%% replaced by a proper registry. 746%% 747%% Once replaced, the registry contains the map of all supported feature 748%% flags and their state. This is makes it very efficient to query a 749%% feature flag state or property. 750%% 751%% The registry is local to all RabbitMQ nodes. 752 753initialize_registry() -> 754 initialize_registry(#{}). 755 756-spec initialize_registry(feature_flags()) -> 757 ok | {error, any()} | no_return(). 758%% @private 759%% @doc 760%% Initializes or reinitializes the registry. 761%% 762%% See {@link initialize_registry/0} for a description of the registry. 763%% 764%% This function takes a map of new supported feature flags (so their 765%% name and extended properties) to add to the existing known feature 766%% flags. 767 768initialize_registry(NewSupportedFeatureFlags) -> 769 %% The first step is to get the feature flag states: if this is the 770 %% first time we initialize it, we read the list from disk (the 771 %% `feature_flags` file). Otherwise we query the existing registry 772 %% before it is replaced. 773 RegistryInitialized = rabbit_ff_registry:is_registry_initialized(), 774 FeatureStates = case RegistryInitialized of 775 true -> 776 rabbit_ff_registry:states(); 777 false -> 778 EnabledFeatureNames = 779 read_enabled_feature_flags_list(), 780 list_of_enabled_feature_flags_to_feature_states( 781 EnabledFeatureNames) 782 end, 783 784 %% We also record if the feature flags state was correctly written 785 %% to disk. Currently we don't use this information, but in the 786 %% future, we might want to retry the write if it failed so far. 787 %% 788 %% TODO: Retry to write the feature flags state if the first try 789 %% failed. 790 WrittenToDisk = case RegistryInitialized of 791 true -> 792 rabbit_ff_registry:is_registry_written_to_disk(); 793 false -> 794 true 795 end, 796 initialize_registry(NewSupportedFeatureFlags, 797 FeatureStates, 798 WrittenToDisk). 799 800-spec list_of_enabled_feature_flags_to_feature_states([feature_name()]) -> 801 feature_states(). 802 803list_of_enabled_feature_flags_to_feature_states(FeatureNames) -> 804 maps:from_list([{FeatureName, true} || FeatureName <- FeatureNames]). 805 806-spec initialize_registry(feature_flags(), 807 feature_states(), 808 boolean()) -> 809 ok | {error, any()} | no_return(). 810%% @private 811%% @doc 812%% Initializes or reinitializes the registry. 813%% 814%% See {@link initialize_registry/0} for a description of the registry. 815%% 816%% This function takes a map of new supported feature flags (so their 817%% name and extended properties) to add to the existing known feature 818%% flags, a map of the new feature flag states (whether they are 819%% enabled, disabled or `state_changing'), and a flag to indicate if the 820%% feature flag states was recorded to disk. 821%% 822%% The latter is used to block callers asking if a feature flag is 823%% enabled or disabled while its state is changing. 824 825initialize_registry(NewSupportedFeatureFlags, 826 NewFeatureStates, 827 WrittenToDisk) -> 828 Ret = maybe_initialize_registry(NewSupportedFeatureFlags, 829 NewFeatureStates, 830 WrittenToDisk), 831 case Ret of 832 ok -> ok; 833 restart -> initialize_registry(NewSupportedFeatureFlags, 834 NewFeatureStates, 835 WrittenToDisk); 836 Error -> Error 837 end. 838 839-spec maybe_initialize_registry(feature_flags(), 840 feature_states(), 841 boolean()) -> 842 ok | restart | {error, any()} | no_return(). 843 844maybe_initialize_registry(NewSupportedFeatureFlags, 845 NewFeatureStates, 846 WrittenToDisk) -> 847 %% We save the version of the current registry before computing 848 %% the new one. This is used when we do the actual reload: if the 849 %% current registry was reloaded in the meantime, we need to restart 850 %% the computation to make sure we don't loose data. 851 RegistryVsn = registry_vsn(), 852 853 %% We take the feature flags already registered. 854 RegistryInitialized = rabbit_ff_registry:is_registry_initialized(), 855 KnownFeatureFlags1 = case RegistryInitialized of 856 true -> rabbit_ff_registry:list(all); 857 false -> #{} 858 end, 859 860 %% Query the list (it's a map to be exact) of known 861 %% supported feature flags. That list comes from the 862 %% `-rabbitmq_feature_flag().` module attributes exposed by all 863 %% currently loaded Erlang modules. 864 KnownFeatureFlags2 = query_supported_feature_flags(), 865 866 %% We merge the feature flags we already knew about 867 %% (KnownFeatureFlags1), those found in the loaded applications 868 %% (KnownFeatureFlags2) and those specified in arguments 869 %% (NewSupportedFeatureFlags). The latter come from remote nodes 870 %% usually: for example, they can come from plugins loaded on remote 871 %% node but the plugins are missing locally. In this case, we 872 %% consider those feature flags supported because there is no code 873 %% locally which would cause issues. 874 %% 875 %% It means that the list of feature flags only grows. we don't try 876 %% to clean it at some point because we want to remember about the 877 %% feature flags we saw (and their state). It should be fine because 878 %% that list should remain small. 879 KnownFeatureFlags = maps:merge(KnownFeatureFlags1, 880 KnownFeatureFlags2), 881 AllFeatureFlags = maps:merge(KnownFeatureFlags, 882 NewSupportedFeatureFlags), 883 884 %% Next we want to update the feature states, based on the new 885 %% states passed as arguments. 886 FeatureStates0 = case RegistryInitialized of 887 true -> 888 maps:merge(rabbit_ff_registry:states(), 889 NewFeatureStates); 890 false -> 891 NewFeatureStates 892 end, 893 FeatureStates = maps:filter( 894 fun(_, true) -> true; 895 (_, state_changing) -> true; 896 (_, false) -> false 897 end, FeatureStates0), 898 899 Proceed = does_registry_need_refresh(AllFeatureFlags, 900 FeatureStates, 901 WrittenToDisk), 902 903 case Proceed of 904 true -> 905 rabbit_log_feature_flags:debug( 906 "Feature flags: (re)initialize registry (~p)", 907 [self()]), 908 T0 = erlang:timestamp(), 909 Ret = do_initialize_registry(RegistryVsn, 910 AllFeatureFlags, 911 FeatureStates, 912 WrittenToDisk), 913 T1 = erlang:timestamp(), 914 rabbit_log_feature_flags:debug( 915 "Feature flags: time to regen registry: ~p µs", 916 [timer:now_diff(T1, T0)]), 917 Ret; 918 false -> 919 rabbit_log_feature_flags:debug( 920 "Feature flags: registry already up-to-date, skipping init"), 921 ok 922 end. 923 924-spec does_registry_need_refresh(feature_flags(), 925 feature_states(), 926 boolean()) -> 927 boolean(). 928 929does_registry_need_refresh(AllFeatureFlags, 930 FeatureStates, 931 WrittenToDisk) -> 932 case rabbit_ff_registry:is_registry_initialized() of 933 true -> 934 %% Before proceeding with the actual 935 %% (re)initialization, let's see if there are any 936 %% changes. 937 CurrentAllFeatureFlags = rabbit_ff_registry:list(all), 938 CurrentFeatureStates = rabbit_ff_registry:states(), 939 CurrentWrittenToDisk = 940 rabbit_ff_registry:is_registry_written_to_disk(), 941 942 if 943 AllFeatureFlags =/= CurrentAllFeatureFlags -> 944 rabbit_log_feature_flags:debug( 945 "Feature flags: registry refresh needed: " 946 "yes, list of feature flags differs"), 947 true; 948 FeatureStates =/= CurrentFeatureStates -> 949 rabbit_log_feature_flags:debug( 950 "Feature flags: registry refresh needed: " 951 "yes, feature flag states differ"), 952 true; 953 WrittenToDisk =/= CurrentWrittenToDisk -> 954 rabbit_log_feature_flags:debug( 955 "Feature flags: registry refresh needed: " 956 "yes, \"written to disk\" state changed"), 957 true; 958 true -> 959 rabbit_log_feature_flags:debug( 960 "Feature flags: registry refresh needed: no"), 961 false 962 end; 963 false -> 964 rabbit_log_feature_flags:debug( 965 "Feature flags: registry refresh needed: " 966 "yes, first-time initialization"), 967 true 968 end. 969 970-spec do_initialize_registry(registry_vsn(), 971 feature_flags(), 972 feature_states(), 973 boolean()) -> 974 ok | restart | {error, any()} | no_return(). 975%% @private 976 977do_initialize_registry(RegistryVsn, 978 AllFeatureFlags, 979 FeatureStates, 980 WrittenToDisk) -> 981 %% We log the state of those feature flags. 982 rabbit_log_feature_flags:info( 983 "Feature flags: list of feature flags found:"), 984 lists:foreach( 985 fun(FeatureName) -> 986 rabbit_log_feature_flags:info( 987 "Feature flags: [~s] ~s", 988 [case maps:is_key(FeatureName, FeatureStates) of 989 true -> 990 case maps:get(FeatureName, FeatureStates) of 991 true -> "x"; 992 state_changing -> "~" 993 end; 994 false -> 995 " " 996 end, 997 FeatureName]) 998 end, lists:sort(maps:keys(AllFeatureFlags))), 999 rabbit_log_feature_flags:info( 1000 "Feature flags: feature flag states written to disk: ~s", 1001 [case WrittenToDisk of 1002 true -> "yes"; 1003 false -> "no" 1004 end]), 1005 1006 %% We request the registry to be regenerated and reloaded with the 1007 %% new state. 1008 regen_registry_mod(RegistryVsn, 1009 AllFeatureFlags, 1010 FeatureStates, 1011 WrittenToDisk). 1012 1013-spec query_supported_feature_flags() -> feature_flags(). 1014%% @private 1015 1016-ifdef(TEST). 1017-define(PT_TESTSUITE_ATTRS, {?MODULE, testsuite_feature_flags_attrs}). 1018 1019inject_test_feature_flags(AttributesFromTestsuite) -> 1020 rabbit_log_feature_flags:debug( 1021 "Feature flags: injecting feature flags from testsuite: ~p", 1022 [AttributesFromTestsuite]), 1023 ok = persistent_term:put(?PT_TESTSUITE_ATTRS, AttributesFromTestsuite), 1024 initialize_registry(). 1025 1026module_attributes_from_testsuite() -> 1027 persistent_term:get(?PT_TESTSUITE_ATTRS, []). 1028 1029query_supported_feature_flags() -> 1030 rabbit_log_feature_flags:debug( 1031 "Feature flags: query feature flags in loaded applications " 1032 "+ testsuite"), 1033 T0 = erlang:timestamp(), 1034 AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes( 1035 rabbit_feature_flag), 1036 AttributesFromTestsuite = module_attributes_from_testsuite(), 1037 T1 = erlang:timestamp(), 1038 rabbit_log_feature_flags:debug( 1039 "Feature flags: time to find supported feature flags: ~p µs", 1040 [timer:now_diff(T1, T0)]), 1041 AllAttributes = AttributesPerApp ++ AttributesFromTestsuite, 1042 prepare_queried_feature_flags(AllAttributes, #{}). 1043-else. 1044query_supported_feature_flags() -> 1045 rabbit_log_feature_flags:debug( 1046 "Feature flags: query feature flags in loaded applications"), 1047 T0 = erlang:timestamp(), 1048 AttributesPerApp = rabbit_misc:rabbitmq_related_module_attributes( 1049 rabbit_feature_flag), 1050 T1 = erlang:timestamp(), 1051 rabbit_log_feature_flags:debug( 1052 "Feature flags: time to find supported feature flags: ~p µs", 1053 [timer:now_diff(T1, T0)]), 1054 prepare_queried_feature_flags(AttributesPerApp, #{}). 1055-endif. 1056 1057prepare_queried_feature_flags([{App, _Module, Attributes} | Rest], 1058 AllFeatureFlags) -> 1059 rabbit_log_feature_flags:debug( 1060 "Feature flags: application `~s` has ~b feature flags", 1061 [App, length(Attributes)]), 1062 AllFeatureFlags1 = lists:foldl( 1063 fun({FeatureName, FeatureProps}, AllFF) -> 1064 merge_new_feature_flags(AllFF, 1065 App, 1066 FeatureName, 1067 FeatureProps) 1068 end, AllFeatureFlags, Attributes), 1069 prepare_queried_feature_flags(Rest, AllFeatureFlags1); 1070prepare_queried_feature_flags([], AllFeatureFlags) -> 1071 AllFeatureFlags. 1072 1073-spec merge_new_feature_flags(feature_flags(), 1074 atom(), 1075 feature_name(), 1076 feature_props()) -> feature_flags(). 1077%% @private 1078 1079merge_new_feature_flags(AllFeatureFlags, App, FeatureName, FeatureProps) 1080 when is_atom(FeatureName) andalso is_map(FeatureProps) -> 1081 %% We expand the feature flag properties map with: 1082 %% - the name of the application providing it: only informational 1083 %% for now, but can be handy to understand that a feature flag 1084 %% comes from a plugin. 1085 FeatureProps1 = maps:put(provided_by, App, FeatureProps), 1086 maps:merge(AllFeatureFlags, 1087 #{FeatureName => FeatureProps1}). 1088 1089-spec regen_registry_mod(registry_vsn(), 1090 feature_flags(), 1091 feature_states(), 1092 boolean()) -> 1093 ok | restart | {error, any()} | no_return(). 1094%% @private 1095 1096regen_registry_mod(RegistryVsn, 1097 AllFeatureFlags, 1098 FeatureStates, 1099 WrittenToDisk) -> 1100 %% Here, we recreate the source code of the `rabbit_ff_registry` 1101 %% module from scratch. 1102 %% 1103 %% IMPORTANT: We want both modules to have the exact same public 1104 %% API in order to simplify the life of developers and their tools 1105 %% (Dialyzer, completion, and so on). 1106 1107 %% -module(rabbit_ff_registry). 1108 ModuleAttr = erl_syntax:attribute( 1109 erl_syntax:atom(module), 1110 [erl_syntax:atom(rabbit_ff_registry)]), 1111 ModuleForm = erl_syntax:revert(ModuleAttr), 1112 %% -export([...]). 1113 ExportAttr = erl_syntax:attribute( 1114 erl_syntax:atom(export), 1115 [erl_syntax:list( 1116 [erl_syntax:arity_qualifier( 1117 erl_syntax:atom(F), 1118 erl_syntax:integer(A)) 1119 || {F, A} <- [{get, 1}, 1120 {list, 1}, 1121 {states, 0}, 1122 {is_supported, 1}, 1123 {is_enabled, 1}, 1124 {is_registry_initialized, 0}, 1125 {is_registry_written_to_disk, 0}]] 1126 ) 1127 ] 1128 ), 1129 ExportForm = erl_syntax:revert(ExportAttr), 1130 %% get(_) -> ... 1131 GetClauses = [erl_syntax:clause( 1132 [erl_syntax:atom(FeatureName)], 1133 [], 1134 [erl_syntax:abstract(maps:get(FeatureName, 1135 AllFeatureFlags))]) 1136 || FeatureName <- maps:keys(AllFeatureFlags) 1137 ], 1138 GetUnknownClause = erl_syntax:clause( 1139 [erl_syntax:variable("_")], 1140 [], 1141 [erl_syntax:atom(undefined)]), 1142 GetFun = erl_syntax:function( 1143 erl_syntax:atom(get), 1144 GetClauses ++ [GetUnknownClause]), 1145 GetFunForm = erl_syntax:revert(GetFun), 1146 %% list(_) -> ... 1147 ListAllBody = erl_syntax:abstract(AllFeatureFlags), 1148 ListAllClause = erl_syntax:clause([erl_syntax:atom(all)], 1149 [], 1150 [ListAllBody]), 1151 EnabledFeatureFlags = maps:filter( 1152 fun(FeatureName, _) -> 1153 maps:is_key(FeatureName, 1154 FeatureStates) 1155 andalso 1156 maps:get(FeatureName, FeatureStates) 1157 =:= 1158 true 1159 end, AllFeatureFlags), 1160 ListEnabledBody = erl_syntax:abstract(EnabledFeatureFlags), 1161 ListEnabledClause = erl_syntax:clause( 1162 [erl_syntax:atom(enabled)], 1163 [], 1164 [ListEnabledBody]), 1165 DisabledFeatureFlags = maps:filter( 1166 fun(FeatureName, _) -> 1167 not maps:is_key(FeatureName, 1168 FeatureStates) 1169 end, AllFeatureFlags), 1170 ListDisabledBody = erl_syntax:abstract(DisabledFeatureFlags), 1171 ListDisabledClause = erl_syntax:clause( 1172 [erl_syntax:atom(disabled)], 1173 [], 1174 [ListDisabledBody]), 1175 StateChangingFeatureFlags = maps:filter( 1176 fun(FeatureName, _) -> 1177 maps:is_key(FeatureName, 1178 FeatureStates) 1179 andalso 1180 maps:get(FeatureName, FeatureStates) 1181 =:= 1182 state_changing 1183 end, AllFeatureFlags), 1184 ListStateChangingBody = erl_syntax:abstract(StateChangingFeatureFlags), 1185 ListStateChangingClause = erl_syntax:clause( 1186 [erl_syntax:atom(state_changing)], 1187 [], 1188 [ListStateChangingBody]), 1189 ListFun = erl_syntax:function( 1190 erl_syntax:atom(list), 1191 [ListAllClause, 1192 ListEnabledClause, 1193 ListDisabledClause, 1194 ListStateChangingClause]), 1195 ListFunForm = erl_syntax:revert(ListFun), 1196 %% states() -> ... 1197 StatesBody = erl_syntax:abstract(FeatureStates), 1198 StatesClause = erl_syntax:clause([], [], [StatesBody]), 1199 StatesFun = erl_syntax:function( 1200 erl_syntax:atom(states), 1201 [StatesClause]), 1202 StatesFunForm = erl_syntax:revert(StatesFun), 1203 %% is_supported(_) -> ... 1204 IsSupportedClauses = [erl_syntax:clause( 1205 [erl_syntax:atom(FeatureName)], 1206 [], 1207 [erl_syntax:atom(true)]) 1208 || FeatureName <- maps:keys(AllFeatureFlags) 1209 ], 1210 NotSupportedClause = erl_syntax:clause( 1211 [erl_syntax:variable("_")], 1212 [], 1213 [erl_syntax:atom(false)]), 1214 IsSupportedFun = erl_syntax:function( 1215 erl_syntax:atom(is_supported), 1216 IsSupportedClauses ++ [NotSupportedClause]), 1217 IsSupportedFunForm = erl_syntax:revert(IsSupportedFun), 1218 %% is_enabled(_) -> ... 1219 IsEnabledClauses = [erl_syntax:clause( 1220 [erl_syntax:atom(FeatureName)], 1221 [], 1222 [case maps:is_key(FeatureName, FeatureStates) of 1223 true -> 1224 erl_syntax:atom( 1225 maps:get(FeatureName, FeatureStates)); 1226 false -> 1227 erl_syntax:atom(false) 1228 end]) 1229 || FeatureName <- maps:keys(AllFeatureFlags) 1230 ], 1231 NotEnabledClause = erl_syntax:clause( 1232 [erl_syntax:variable("_")], 1233 [], 1234 [erl_syntax:atom(false)]), 1235 IsEnabledFun = erl_syntax:function( 1236 erl_syntax:atom(is_enabled), 1237 IsEnabledClauses ++ [NotEnabledClause]), 1238 IsEnabledFunForm = erl_syntax:revert(IsEnabledFun), 1239 %% is_registry_initialized() -> ... 1240 IsInitializedClauses = [erl_syntax:clause( 1241 [], 1242 [], 1243 [erl_syntax:atom(true)]) 1244 ], 1245 IsInitializedFun = erl_syntax:function( 1246 erl_syntax:atom(is_registry_initialized), 1247 IsInitializedClauses), 1248 IsInitializedFunForm = erl_syntax:revert(IsInitializedFun), 1249 %% is_registry_written_to_disk() -> ... 1250 IsWrittenToDiskClauses = [erl_syntax:clause( 1251 [], 1252 [], 1253 [erl_syntax:atom(WrittenToDisk)]) 1254 ], 1255 IsWrittenToDiskFun = erl_syntax:function( 1256 erl_syntax:atom(is_registry_written_to_disk), 1257 IsWrittenToDiskClauses), 1258 IsWrittenToDiskFunForm = erl_syntax:revert(IsWrittenToDiskFun), 1259 %% Compilation! 1260 Forms = [ModuleForm, 1261 ExportForm, 1262 GetFunForm, 1263 ListFunForm, 1264 StatesFunForm, 1265 IsSupportedFunForm, 1266 IsEnabledFunForm, 1267 IsInitializedFunForm, 1268 IsWrittenToDiskFunForm], 1269 maybe_log_registry_source_code(Forms), 1270 CompileOpts = [return_errors, 1271 return_warnings], 1272 case compile:forms(Forms, CompileOpts) of 1273 {ok, Mod, Bin, _} -> 1274 load_registry_mod(RegistryVsn, Mod, Bin); 1275 {error, Errors, Warnings} -> 1276 rabbit_log_feature_flags:error( 1277 "Feature flags: registry compilation:~n" 1278 "Errors: ~p~n" 1279 "Warnings: ~p", 1280 [Errors, Warnings]), 1281 {error, {compilation_failure, Errors, Warnings}} 1282 end. 1283 1284maybe_log_registry_source_code(Forms) -> 1285 case rabbit_prelaunch:get_context() of 1286 #{log_feature_flags_registry := true} -> 1287 rabbit_log_feature_flags:debug( 1288 "== FEATURE FLAGS REGISTRY ==~n" 1289 "~s~n" 1290 "== END ==~n", 1291 [erl_prettypr:format(erl_syntax:form_list(Forms))]); 1292 _ -> 1293 ok 1294 end. 1295 1296-ifdef(TEST). 1297registry_loading_lock() -> ?FF_REGISTRY_LOADING_LOCK. 1298-endif. 1299 1300-spec load_registry_mod(registry_vsn(), atom(), binary()) -> 1301 ok | restart | no_return(). 1302%% @private 1303 1304load_registry_mod(RegistryVsn, Mod, Bin) -> 1305 rabbit_log_feature_flags:debug( 1306 "Feature flags: registry module ready, loading it (~p)...", 1307 [self()]), 1308 FakeFilename = "Compiled and loaded by " ?MODULE_STRING, 1309 %% Time to load the new registry, replacing the old one. We use a 1310 %% lock here to synchronize concurrent reloads. 1311 global:set_lock(?FF_REGISTRY_LOADING_LOCK, [node()]), 1312 rabbit_log_feature_flags:debug( 1313 "Feature flags: acquired lock before reloading registry module (~p)", 1314 [self()]), 1315 %% We want to make sure that the old registry (not the one being 1316 %% currently in use) is purged by the code server. It means no 1317 %% process lingers on that old code. 1318 %% 1319 %% We use code:soft_purge() for that (meaning no process is killed) 1320 %% and we wait in an infinite loop for that to succeed. 1321 ok = purge_old_registry(Mod), 1322 %% Now we can replace the currently loaded registry by the new one. 1323 %% The code server takes care of marking the current registry as old 1324 %% and load the new module in an atomic operation. 1325 %% 1326 %% Therefore there is no chance of a window where there is no 1327 %% registry module available, causing the one on disk to be 1328 %% reloaded. 1329 Ret = case registry_vsn() of 1330 RegistryVsn -> code:load_binary(Mod, FakeFilename, Bin); 1331 OtherVsn -> {error, {restart, RegistryVsn, OtherVsn}} 1332 end, 1333 rabbit_log_feature_flags:debug( 1334 "Feature flags: releasing lock after reloading registry module (~p)", 1335 [self()]), 1336 global:del_lock(?FF_REGISTRY_LOADING_LOCK, [node()]), 1337 case Ret of 1338 {module, _} -> 1339 rabbit_log_feature_flags:debug( 1340 "Feature flags: registry module loaded (vsn: ~p -> ~p)", 1341 [RegistryVsn, registry_vsn()]), 1342 ok; 1343 {error, {restart, Expected, Current}} -> 1344 rabbit_log_feature_flags:error( 1345 "Feature flags: another registry module was loaded in the " 1346 "meantime (expected old vsn: ~p, current vsn: ~p); " 1347 "restarting the regen", 1348 [Expected, Current]), 1349 restart; 1350 {error, Reason} -> 1351 rabbit_log_feature_flags:error( 1352 "Feature flags: failed to load registry module: ~p", 1353 [Reason]), 1354 throw({feature_flag_registry_reload_failure, Reason}) 1355 end. 1356 1357-spec registry_vsn() -> registry_vsn(). 1358%% @private 1359 1360registry_vsn() -> 1361 Attrs = rabbit_ff_registry:module_info(attributes), 1362 proplists:get_value(vsn, Attrs, undefined). 1363 1364purge_old_registry(Mod) -> 1365 case code:is_loaded(Mod) of 1366 {file, _} -> do_purge_old_registry(Mod); 1367 false -> ok 1368 end. 1369 1370do_purge_old_registry(Mod) -> 1371 case code:soft_purge(Mod) of 1372 true -> ok; 1373 false -> do_purge_old_registry(Mod) 1374 end. 1375 1376%% ------------------------------------------------------------------- 1377%% Feature flags state storage. 1378%% ------------------------------------------------------------------- 1379 1380-spec ensure_enabled_feature_flags_list_file_exists() -> ok | {error, any()}. 1381%% @private 1382 1383ensure_enabled_feature_flags_list_file_exists() -> 1384 File = enabled_feature_flags_list_file(), 1385 case filelib:is_regular(File) of 1386 true -> ok; 1387 false -> write_enabled_feature_flags_list([]) 1388 end. 1389 1390-spec read_enabled_feature_flags_list() -> 1391 [feature_name()] | no_return(). 1392%% @private 1393 1394read_enabled_feature_flags_list() -> 1395 case try_to_read_enabled_feature_flags_list() of 1396 {error, Reason} -> 1397 File = enabled_feature_flags_list_file(), 1398 throw({feature_flags_file_read_error, File, Reason}); 1399 Ret -> 1400 Ret 1401 end. 1402 1403-spec try_to_read_enabled_feature_flags_list() -> 1404 [feature_name()] | {error, any()}. 1405%% @private 1406 1407try_to_read_enabled_feature_flags_list() -> 1408 File = enabled_feature_flags_list_file(), 1409 case file:consult(File) of 1410 {ok, [List]} -> 1411 List; 1412 {error, enoent} -> 1413 %% If the file is missing, we consider the list of enabled 1414 %% feature flags to be empty. 1415 []; 1416 {error, Reason} = Error -> 1417 rabbit_log_feature_flags:error( 1418 "Feature flags: failed to read the `feature_flags` " 1419 "file at `~s`: ~s", 1420 [File, file:format_error(Reason)]), 1421 Error 1422 end. 1423 1424-spec write_enabled_feature_flags_list([feature_name()]) -> 1425 ok | no_return(). 1426%% @private 1427 1428write_enabled_feature_flags_list(FeatureNames) -> 1429 case try_to_write_enabled_feature_flags_list(FeatureNames) of 1430 {error, Reason} -> 1431 File = enabled_feature_flags_list_file(), 1432 throw({feature_flags_file_write_error, File, Reason}); 1433 Ret -> 1434 Ret 1435 end. 1436 1437-spec try_to_write_enabled_feature_flags_list([feature_name()]) -> 1438 ok | {error, any()}. 1439%% @private 1440 1441try_to_write_enabled_feature_flags_list(FeatureNames) -> 1442 %% Before writing the new file, we read the existing one. If there 1443 %% are unknown feature flags in that file, we want to keep their 1444 %% state, even though they are unsupported at this time. It could be 1445 %% that a plugin was disabled in the meantime. 1446 %% 1447 %% FIXME: Lock this code to fix concurrent read/modify/write. 1448 PreviouslyEnabled = case try_to_read_enabled_feature_flags_list() of 1449 {error, _} -> []; 1450 List -> List 1451 end, 1452 FeatureNames1 = lists:foldl( 1453 fun(Name, Acc) -> 1454 case is_supported_locally(Name) of 1455 true -> Acc; 1456 false -> [Name | Acc] 1457 end 1458 end, FeatureNames, PreviouslyEnabled), 1459 FeatureNames2 = lists:sort(FeatureNames1), 1460 1461 File = enabled_feature_flags_list_file(), 1462 Content = io_lib:format("~p.~n", [FeatureNames2]), 1463 %% TODO: If we fail to write the the file, we should spawn a process 1464 %% to retry the operation. 1465 case file:write_file(File, Content) of 1466 ok -> 1467 ok; 1468 {error, Reason} = Error -> 1469 rabbit_log_feature_flags:error( 1470 "Feature flags: failed to write the `feature_flags` " 1471 "file at `~s`: ~s", 1472 [File, file:format_error(Reason)]), 1473 Error 1474 end. 1475 1476-spec enabled_feature_flags_list_file() -> file:filename(). 1477%% @doc 1478%% Returns the path to the file where the state of feature flags is stored. 1479%% 1480%% @returns the path to the file. 1481 1482enabled_feature_flags_list_file() -> 1483 case application:get_env(rabbit, feature_flags_file) of 1484 {ok, Val} -> Val; 1485 undefined -> throw(feature_flags_file_not_set) 1486 end. 1487 1488%% ------------------------------------------------------------------- 1489%% Feature flags management: enabling. 1490%% ------------------------------------------------------------------- 1491 1492-spec do_enable(feature_name()) -> ok | {error, any()} | no_return(). 1493%% @private 1494 1495do_enable(FeatureName) -> 1496 %% We mark this feature flag as "state changing" before doing the 1497 %% actual state change. We also take a global lock: this permits 1498 %% to block callers asking about a feature flag changing state. 1499 global:set_lock(?FF_STATE_CHANGE_LOCK), 1500 Ret = case mark_as_enabled(FeatureName, state_changing) of 1501 ok -> 1502 case enable_dependencies(FeatureName, true) of 1503 ok -> 1504 case run_migration_fun(FeatureName, enable) of 1505 ok -> 1506 mark_as_enabled(FeatureName, true); 1507 {error, no_migration_fun} -> 1508 mark_as_enabled(FeatureName, true); 1509 Error -> 1510 Error 1511 end; 1512 Error -> 1513 Error 1514 end; 1515 Error -> 1516 Error 1517 end, 1518 case Ret of 1519 ok -> ok; 1520 _ -> mark_as_enabled(FeatureName, false) 1521 end, 1522 global:del_lock(?FF_STATE_CHANGE_LOCK), 1523 Ret. 1524 1525-spec enable_locally(feature_name()) -> ok | {error, any()} | no_return(). 1526%% @private 1527 1528enable_locally(FeatureName) when is_atom(FeatureName) -> 1529 case is_enabled(FeatureName) of 1530 true -> 1531 ok; 1532 false -> 1533 rabbit_log_feature_flags:debug( 1534 "Feature flag `~s`: enable locally (as part of feature " 1535 "flag states synchronization)", 1536 [FeatureName]), 1537 do_enable_locally(FeatureName) 1538 end. 1539 1540-spec do_enable_locally(feature_name()) -> ok | {error, any()} | no_return(). 1541%% @private 1542 1543do_enable_locally(FeatureName) -> 1544 case enable_dependencies(FeatureName, false) of 1545 ok -> 1546 case run_migration_fun(FeatureName, enable) of 1547 ok -> 1548 mark_as_enabled_locally(FeatureName, true); 1549 {error, no_migration_fun} -> 1550 mark_as_enabled_locally(FeatureName, true); 1551 Error -> 1552 Error 1553 end; 1554 Error -> 1555 Error 1556 end. 1557 1558-spec enable_dependencies(feature_name(), boolean()) -> 1559 ok | {error, any()} | no_return(). 1560%% @private 1561 1562enable_dependencies(FeatureName, Everywhere) -> 1563 FeatureProps = rabbit_ff_registry:get(FeatureName), 1564 DependsOn = maps:get(depends_on, FeatureProps, []), 1565 rabbit_log_feature_flags:debug( 1566 "Feature flag `~s`: enable dependencies: ~p", 1567 [FeatureName, DependsOn]), 1568 enable_dependencies(FeatureName, DependsOn, Everywhere). 1569 1570-spec enable_dependencies(feature_name(), [feature_name()], boolean()) -> 1571 ok | {error, any()} | no_return(). 1572%% @private 1573 1574enable_dependencies(TopLevelFeatureName, [FeatureName | Rest], Everywhere) -> 1575 Ret = case Everywhere of 1576 true -> enable(FeatureName); 1577 false -> enable_locally(FeatureName) 1578 end, 1579 case Ret of 1580 ok -> enable_dependencies(TopLevelFeatureName, Rest, Everywhere); 1581 Error -> Error 1582 end; 1583enable_dependencies(_, [], _) -> 1584 ok. 1585 1586-spec run_migration_fun(feature_name(), any()) -> 1587 any() | {error, any()}. 1588%% @private 1589 1590run_migration_fun(FeatureName, Arg) -> 1591 FeatureProps = rabbit_ff_registry:get(FeatureName), 1592 run_migration_fun(FeatureName, FeatureProps, Arg). 1593 1594run_migration_fun(FeatureName, FeatureProps, Arg) -> 1595 case maps:get(migration_fun, FeatureProps, none) of 1596 {MigrationMod, MigrationFun} 1597 when is_atom(MigrationMod) andalso is_atom(MigrationFun) -> 1598 rabbit_log_feature_flags:debug( 1599 "Feature flag `~s`: run migration function ~p with arg: ~p", 1600 [FeatureName, MigrationFun, Arg]), 1601 try 1602 erlang:apply(MigrationMod, 1603 MigrationFun, 1604 [FeatureName, FeatureProps, Arg]) 1605 catch 1606 _:Reason:Stacktrace -> 1607 rabbit_log_feature_flags:error( 1608 "Feature flag `~s`: migration function crashed: ~p~n~p", 1609 [FeatureName, Reason, Stacktrace]), 1610 {error, {migration_fun_crash, Reason, Stacktrace}} 1611 end; 1612 none -> 1613 {error, no_migration_fun}; 1614 Invalid -> 1615 rabbit_log_feature_flags:error( 1616 "Feature flag `~s`: invalid migration function: ~p", 1617 [FeatureName, Invalid]), 1618 {error, {invalid_migration_fun, Invalid}} 1619 end. 1620 1621-spec mark_as_enabled(feature_name(), feature_state()) -> 1622 any() | {error, any()} | no_return(). 1623%% @private 1624 1625mark_as_enabled(FeatureName, IsEnabled) -> 1626 case mark_as_enabled_locally(FeatureName, IsEnabled) of 1627 ok -> 1628 mark_as_enabled_remotely(FeatureName, IsEnabled); 1629 Error -> 1630 Error 1631 end. 1632 1633-spec mark_as_enabled_locally(feature_name(), feature_state()) -> 1634 any() | {error, any()} | no_return(). 1635%% @private 1636 1637mark_as_enabled_locally(FeatureName, IsEnabled) -> 1638 rabbit_log_feature_flags:info( 1639 "Feature flag `~s`: mark as enabled=~p", 1640 [FeatureName, IsEnabled]), 1641 EnabledFeatureNames = maps:keys(list(enabled)), 1642 NewEnabledFeatureNames = case IsEnabled of 1643 true -> 1644 [FeatureName | EnabledFeatureNames]; 1645 false -> 1646 EnabledFeatureNames -- [FeatureName]; 1647 state_changing -> 1648 EnabledFeatureNames 1649 end, 1650 WrittenToDisk = case NewEnabledFeatureNames of 1651 EnabledFeatureNames -> 1652 rabbit_ff_registry:is_registry_written_to_disk(); 1653 _ -> 1654 ok =:= try_to_write_enabled_feature_flags_list( 1655 NewEnabledFeatureNames) 1656 end, 1657 initialize_registry(#{}, 1658 #{FeatureName => IsEnabled}, 1659 WrittenToDisk). 1660 1661-spec mark_as_enabled_remotely(feature_name(), feature_state()) -> 1662 any() | {error, any()} | no_return(). 1663%% @private 1664 1665mark_as_enabled_remotely(FeatureName, IsEnabled) -> 1666 Nodes = running_remote_nodes(), 1667 mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, ?TIMEOUT). 1668 1669-spec mark_as_enabled_remotely([node()], 1670 feature_name(), 1671 feature_state(), 1672 timeout()) -> 1673 any() | {error, any()} | no_return(). 1674%% @private 1675 1676mark_as_enabled_remotely([], _FeatureName, _IsEnabled, _Timeout) -> 1677 ok; 1678mark_as_enabled_remotely(Nodes, FeatureName, IsEnabled, Timeout) -> 1679 T0 = erlang:timestamp(), 1680 Rets = [{Node, rpc:call(Node, 1681 ?MODULE, 1682 mark_as_enabled_locally, 1683 [FeatureName, IsEnabled], 1684 Timeout)} 1685 || Node <- Nodes], 1686 FailedNodes = [Node || {Node, Ret} <- Rets, Ret =/= ok], 1687 case FailedNodes of 1688 [] -> 1689 rabbit_log_feature_flags:debug( 1690 "Feature flags: `~s` successfully marked as enabled=~p on all " 1691 "nodes", [FeatureName, IsEnabled]), 1692 ok; 1693 _ -> 1694 rabbit_log_feature_flags:error( 1695 "Feature flags: failed to mark feature flag `~s` as enabled=~p " 1696 "on the following nodes:", [FeatureName, IsEnabled]), 1697 [rabbit_log_feature_flags:error( 1698 "Feature flags: - ~s: ~p", 1699 [Node, Ret]) 1700 || {Node, Ret} <- Rets, 1701 Ret =/= ok], 1702 Sleep = 1000, 1703 T1 = erlang:timestamp(), 1704 Duration = timer:now_diff(T1, T0), 1705 NewTimeout = (Timeout * 1000 - Duration) div 1000 - Sleep, 1706 if 1707 NewTimeout > 0 -> 1708 rabbit_log_feature_flags:debug( 1709 "Feature flags: retrying with a timeout of ~b " 1710 "ms after sleeping for ~b ms", 1711 [NewTimeout, Sleep]), 1712 timer:sleep(Sleep), 1713 mark_as_enabled_remotely(FailedNodes, 1714 FeatureName, 1715 IsEnabled, 1716 NewTimeout); 1717 true -> 1718 rabbit_log_feature_flags:debug( 1719 "Feature flags: not retrying; RPC went over the " 1720 "~b milliseconds timeout", [Timeout]), 1721 %% FIXME: Is crashing the process the best solution here? 1722 throw( 1723 {failed_to_mark_feature_flag_as_enabled_on_remote_nodes, 1724 FeatureName, IsEnabled, FailedNodes}) 1725 end 1726 end. 1727 1728%% ------------------------------------------------------------------- 1729%% Coordination with remote nodes. 1730%% ------------------------------------------------------------------- 1731 1732-spec remote_nodes() -> [node()]. 1733%% @private 1734 1735remote_nodes() -> 1736 mnesia:system_info(db_nodes) -- [node()]. 1737 1738-spec running_remote_nodes() -> [node()]. 1739%% @private 1740 1741running_remote_nodes() -> 1742 mnesia:system_info(running_db_nodes) -- [node()]. 1743 1744query_running_remote_nodes(Node, Timeout) -> 1745 case rpc:call(Node, mnesia, system_info, [running_db_nodes], Timeout) of 1746 {badrpc, _} = Error -> Error; 1747 Nodes -> Nodes -- [node()] 1748 end. 1749 1750-spec does_node_support(node(), [feature_name()], timeout()) -> boolean(). 1751%% @private 1752 1753does_node_support(Node, FeatureNames, Timeout) -> 1754 rabbit_log_feature_flags:debug( 1755 "Feature flags: querying `~p` support on node ~s...", 1756 [FeatureNames, Node]), 1757 Ret = case node() of 1758 Node -> 1759 is_supported_locally(FeatureNames); 1760 _ -> 1761 run_feature_flags_mod_on_remote_node( 1762 Node, is_supported_locally, [FeatureNames], Timeout) 1763 end, 1764 case Ret of 1765 {error, pre_feature_flags_rabbitmq} -> 1766 %% See run_feature_flags_mod_on_remote_node/4 for 1767 %% an explanation why we consider this node a 3.7.x 1768 %% pre-feature-flags node. 1769 rabbit_log_feature_flags:debug( 1770 "Feature flags: no feature flags support on node `~s`, " 1771 "consider the feature flags unsupported: ~p", 1772 [Node, FeatureNames]), 1773 false; 1774 {error, Reason} -> 1775 rabbit_log_feature_flags:error( 1776 "Feature flags: error while querying `~p` support on " 1777 "node ~s: ~p", 1778 [FeatureNames, Node, Reason]), 1779 false; 1780 true -> 1781 rabbit_log_feature_flags:debug( 1782 "Feature flags: node `~s` supports `~p`", 1783 [Node, FeatureNames]), 1784 true; 1785 false -> 1786 rabbit_log_feature_flags:debug( 1787 "Feature flags: node `~s` does not support `~p`; " 1788 "stopping query here", 1789 [Node, FeatureNames]), 1790 false 1791 end. 1792 1793-spec check_node_compatibility(node()) -> ok | {error, any()}. 1794%% @doc 1795%% Checks if a node is compatible with the local node. 1796%% 1797%% To be compatible, the following two conditions must be met: 1798%% <ol> 1799%% <li>feature flags enabled on the local node must be supported by the 1800%% remote node</li> 1801%% <li>feature flags enabled on the remote node must be supported by the 1802%% local node</li> 1803%% </ol> 1804%% 1805%% @param Node the name of the remote node to test. 1806%% @returns `ok' if they are compatible, `{error, Reason}' if they are not. 1807 1808check_node_compatibility(Node) -> 1809 check_node_compatibility(Node, ?TIMEOUT). 1810 1811-spec check_node_compatibility(node(), timeout()) -> ok | {error, any()}. 1812%% @doc 1813%% Checks if a node is compatible with the local node. 1814%% 1815%% See {@link check_node_compatibility/1} for the conditions required to 1816%% consider two nodes compatible. 1817%% 1818%% @param Node the name of the remote node to test. 1819%% @param Timeout Time in milliseconds after which the RPC gives up. 1820%% @returns `ok' if they are compatible, `{error, Reason}' if they are not. 1821%% 1822%% @see check_node_compatibility/1 1823 1824check_node_compatibility(Node, Timeout) -> 1825 %% Before checking compatibility, we exchange feature flags from 1826 %% unknown Erlang applications. So we fetch remote feature flags 1827 %% from applications which are not loaded locally, and the opposite. 1828 %% 1829 %% The goal is that such feature flags are not blocking the 1830 %% communication between nodes because the code (which would 1831 %% break) is missing on those nodes. Therefore they should not be 1832 %% considered when determining compatibility. 1833 exchange_feature_flags_from_unknown_apps(Node, Timeout), 1834 1835 %% FIXME: 1836 %% When we try to cluster two nodes, we get: 1837 %% Feature flags: starting an unclustered node: all feature flags 1838 %% will be enabled by default 1839 %% It should probably not be the case... 1840 1841 %% We can now proceed with the actual compatibility check. 1842 rabbit_log_feature_flags:debug( 1843 "Feature flags: node `~s` compatibility check, part 1/2", 1844 [Node]), 1845 Part1 = local_enabled_feature_flags_is_supported_remotely(Node, Timeout), 1846 rabbit_log_feature_flags:debug( 1847 "Feature flags: node `~s` compatibility check, part 2/2", 1848 [Node]), 1849 Part2 = remote_enabled_feature_flags_is_supported_locally(Node, Timeout), 1850 case {Part1, Part2} of 1851 {true, true} -> 1852 rabbit_log_feature_flags:debug( 1853 "Feature flags: node `~s` is compatible", 1854 [Node]), 1855 ok; 1856 {false, _} -> 1857 rabbit_log_feature_flags:error( 1858 "Feature flags: node `~s` is INCOMPATIBLE: " 1859 "feature flags enabled locally are not supported remotely", 1860 [Node]), 1861 {error, incompatible_feature_flags}; 1862 {_, false} -> 1863 rabbit_log_feature_flags:error( 1864 "Feature flags: node `~s` is INCOMPATIBLE: " 1865 "feature flags enabled remotely are not supported locally", 1866 [Node]), 1867 {error, incompatible_feature_flags} 1868 end. 1869 1870-spec is_node_compatible(node()) -> boolean(). 1871%% @doc 1872%% Returns if a node is compatible with the local node. 1873%% 1874%% This function calls {@link check_node_compatibility/2} and returns 1875%% `true' the latter returns `ok'. Therefore this is the same code, 1876%% except that this function returns a boolean, but not the reason of 1877%% the incompatibility if any. 1878%% 1879%% @param Node the name of the remote node to test. 1880%% @returns `true' if they are compatible, `false' otherwise. 1881 1882is_node_compatible(Node) -> 1883 is_node_compatible(Node, ?TIMEOUT). 1884 1885-spec is_node_compatible(node(), timeout()) -> boolean(). 1886%% @doc 1887%% Returns if a node is compatible with the local node. 1888%% 1889%% This function calls {@link check_node_compatibility/2} and returns 1890%% `true' the latter returns `ok'. Therefore this is the same code, 1891%% except that this function returns a boolean, but not the reason 1892%% of the incompatibility if any. If the RPC times out, nodes are 1893%% considered incompatible. 1894%% 1895%% @param Node the name of the remote node to test. 1896%% @param Timeout Time in milliseconds after which the RPC gives up. 1897%% @returns `true' if they are compatible, `false' otherwise. 1898 1899is_node_compatible(Node, Timeout) -> 1900 check_node_compatibility(Node, Timeout) =:= ok. 1901 1902-spec local_enabled_feature_flags_is_supported_remotely(node(), 1903 timeout()) -> 1904 boolean(). 1905%% @private 1906 1907local_enabled_feature_flags_is_supported_remotely(Node, Timeout) -> 1908 LocalEnabledFeatureNames = maps:keys(list(enabled)), 1909 is_supported_remotely([Node], LocalEnabledFeatureNames, Timeout). 1910 1911-spec remote_enabled_feature_flags_is_supported_locally(node(), 1912 timeout()) -> 1913 boolean(). 1914%% @private 1915 1916remote_enabled_feature_flags_is_supported_locally(Node, Timeout) -> 1917 case query_remote_feature_flags(Node, enabled, Timeout) of 1918 {error, _} -> 1919 false; 1920 RemoteEnabledFeatureFlags when is_map(RemoteEnabledFeatureFlags) -> 1921 RemoteEnabledFeatureNames = maps:keys(RemoteEnabledFeatureFlags), 1922 is_supported_locally(RemoteEnabledFeatureNames) 1923 end. 1924 1925-spec run_feature_flags_mod_on_remote_node(node(), 1926 atom(), 1927 [term()], 1928 timeout()) -> 1929 term() | {error, term()}. 1930%% @private 1931 1932run_feature_flags_mod_on_remote_node(Node, Function, Args, Timeout) -> 1933 case rpc:call(Node, ?MODULE, Function, Args, Timeout) of 1934 {badrpc, {'EXIT', 1935 {undef, 1936 [{?MODULE, Function, Args, []} 1937 | _]}}} -> 1938 %% If rabbit_feature_flags:Function() is undefined 1939 %% on the remote node, we consider it to be a 3.7.x 1940 %% pre-feature-flags node. 1941 %% 1942 %% Theoretically, it could be an older version (3.6.x and 1943 %% older). But the RabbitMQ version consistency check 1944 %% (rabbit_misc:version_minor_equivalent/2) called from 1945 %% rabbit_mnesia:check_rabbit_consistency/2 already blocked 1946 %% this situation from happening before we reach this point. 1947 rabbit_log_feature_flags:debug( 1948 "Feature flags: ~s:~s~p unavailable on node `~s`: " 1949 "assuming it is a RabbitMQ 3.7.x pre-feature-flags node", 1950 [?MODULE, Function, Args, Node]), 1951 {error, pre_feature_flags_rabbitmq}; 1952 {badrpc, Reason} = Error -> 1953 rabbit_log_feature_flags:error( 1954 "Feature flags: error while running ~s:~s~p " 1955 "on node `~s`: ~p", 1956 [?MODULE, Function, Args, Node, Reason]), 1957 {error, Error}; 1958 Ret -> 1959 Ret 1960 end. 1961 1962-spec query_remote_feature_flags(node(), 1963 Which :: all | enabled | disabled, 1964 timeout()) -> 1965 feature_flags() | {error, any()}. 1966%% @private 1967 1968query_remote_feature_flags(Node, Which, Timeout) -> 1969 rabbit_log_feature_flags:debug( 1970 "Feature flags: querying ~s feature flags on node `~s`...", 1971 [Which, Node]), 1972 case run_feature_flags_mod_on_remote_node(Node, list, [Which], Timeout) of 1973 {error, pre_feature_flags_rabbitmq} -> 1974 %% See run_feature_flags_mod_on_remote_node/4 for 1975 %% an explanation why we consider this node a 3.7.x 1976 %% pre-feature-flags node. 1977 rabbit_log_feature_flags:debug( 1978 "Feature flags: no feature flags support on node `~s`, " 1979 "consider the list of feature flags empty", [Node]), 1980 #{}; 1981 {error, Reason} = Error -> 1982 rabbit_log_feature_flags:error( 1983 "Feature flags: error while querying ~s feature flags " 1984 "on node `~s`: ~p", 1985 [Which, Node, Reason]), 1986 Error; 1987 RemoteFeatureFlags when is_map(RemoteFeatureFlags) -> 1988 RemoteFeatureNames = maps:keys(RemoteFeatureFlags), 1989 rabbit_log_feature_flags:debug( 1990 "Feature flags: querying ~s feature flags on node `~s` " 1991 "done; ~s features: ~p", 1992 [Which, Node, Which, RemoteFeatureNames]), 1993 RemoteFeatureFlags 1994 end. 1995 1996-spec merge_feature_flags_from_unknown_apps(feature_flags()) -> 1997 ok | {error, any()}. 1998%% @private 1999 2000merge_feature_flags_from_unknown_apps(FeatureFlags) 2001 when is_map(FeatureFlags) -> 2002 LoadedApps = [App || {App, _, _} <- application:loaded_applications()], 2003 FeatureFlagsFromUnknownApps = 2004 maps:fold( 2005 fun(FeatureName, FeatureProps, UnknownFF) -> 2006 case is_supported_locally(FeatureName) of 2007 true -> 2008 UnknownFF; 2009 false -> 2010 FeatureProvider = maps:get(provided_by, FeatureProps), 2011 case lists:member(FeatureProvider, LoadedApps) of 2012 true -> UnknownFF; 2013 false -> maps:put(FeatureName, FeatureProps, 2014 UnknownFF) 2015 end 2016 end 2017 end, 2018 #{}, 2019 FeatureFlags), 2020 case maps:keys(FeatureFlagsFromUnknownApps) of 2021 [] -> 2022 ok; 2023 _ -> 2024 rabbit_log_feature_flags:debug( 2025 "Feature flags: register feature flags provided by applications " 2026 "unknown locally: ~p", 2027 [maps:keys(FeatureFlagsFromUnknownApps)]), 2028 initialize_registry(FeatureFlagsFromUnknownApps) 2029 end. 2030 2031exchange_feature_flags_from_unknown_apps(Node, Timeout) -> 2032 %% The first step is to fetch feature flags from Erlang applications 2033 %% we don't know locally (they are loaded remotely, but not 2034 %% locally). 2035 fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout), 2036 2037 %% The next step is to do the opposite: push feature flags to remote 2038 %% nodes so they can register those from applications they don't 2039 %% know. 2040 push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout). 2041 2042fetch_remote_feature_flags_from_apps_unknown_locally(Node, Timeout) -> 2043 RemoteFeatureFlags = query_remote_feature_flags(Node, all, Timeout), 2044 merge_feature_flags_from_unknown_apps(RemoteFeatureFlags). 2045 2046push_local_feature_flags_from_apps_unknown_remotely(Node, Timeout) -> 2047 LocalFeatureFlags = list(all), 2048 push_local_feature_flags_from_apps_unknown_remotely( 2049 Node, LocalFeatureFlags, Timeout). 2050 2051push_local_feature_flags_from_apps_unknown_remotely( 2052 Node, FeatureFlags, Timeout) 2053 when map_size(FeatureFlags) > 0 -> 2054 case query_running_remote_nodes(Node, Timeout) of 2055 {badrpc, Reason} -> 2056 {error, Reason}; 2057 Nodes -> 2058 lists:foreach( 2059 fun(N) -> 2060 run_feature_flags_mod_on_remote_node( 2061 N, 2062 merge_feature_flags_from_unknown_apps, 2063 [FeatureFlags], 2064 Timeout) 2065 end, Nodes) 2066 end; 2067push_local_feature_flags_from_apps_unknown_remotely(_, _, _) -> 2068 ok. 2069 2070-spec sync_feature_flags_with_cluster([node()], boolean()) -> 2071 ok | {error, any()} | no_return(). 2072%% @private 2073 2074sync_feature_flags_with_cluster(Nodes, NodeIsVirgin) -> 2075 sync_feature_flags_with_cluster(Nodes, NodeIsVirgin, ?TIMEOUT). 2076 2077-spec sync_feature_flags_with_cluster([node()], boolean(), timeout()) -> 2078 ok | {error, any()} | no_return(). 2079%% @private 2080 2081sync_feature_flags_with_cluster([], NodeIsVirgin, _) -> 2082 verify_which_feature_flags_are_actually_enabled(), 2083 case NodeIsVirgin of 2084 true -> 2085 FeatureNames = get_forced_feature_flag_names(), 2086 case remote_nodes() of 2087 [] when FeatureNames =:= undefined -> 2088 rabbit_log_feature_flags:debug( 2089 "Feature flags: starting an unclustered node " 2090 "for the first time: all feature flags will be " 2091 "enabled by default"), 2092 enable_all(); 2093 [] -> 2094 case FeatureNames of 2095 [] -> 2096 rabbit_log_feature_flags:debug( 2097 "Feature flags: starting an unclustered " 2098 "node for the first time: all feature " 2099 "flags are forcibly left disabled from " 2100 "the $RABBITMQ_FEATURE_FLAGS environment " 2101 "variable"), 2102 ok; 2103 _ -> 2104 rabbit_log_feature_flags:debug( 2105 "Feature flags: starting an unclustered " 2106 "node for the first time: only the " 2107 "following feature flags specified in " 2108 "the $RABBITMQ_FEATURE_FLAGS environment " 2109 "variable will be enabled: ~p", 2110 [FeatureNames]), 2111 enable(FeatureNames) 2112 end; 2113 _ -> 2114 ok 2115 end; 2116 false -> 2117 rabbit_log_feature_flags:debug( 2118 "Feature flags: starting an unclustered node which is " 2119 "already initialized: all feature flags left in their " 2120 "current state"), 2121 ok 2122 end; 2123sync_feature_flags_with_cluster(Nodes, _, Timeout) -> 2124 verify_which_feature_flags_are_actually_enabled(), 2125 RemoteNodes = Nodes -- [node()], 2126 sync_feature_flags_with_cluster1(RemoteNodes, Timeout). 2127 2128sync_feature_flags_with_cluster1([], _) -> 2129 ok; 2130sync_feature_flags_with_cluster1(RemoteNodes, Timeout) -> 2131 RandomRemoteNode = pick_one_node(RemoteNodes), 2132 rabbit_log_feature_flags:debug( 2133 "Feature flags: SYNCING FEATURE FLAGS with node `~s`...", 2134 [RandomRemoteNode]), 2135 case query_remote_feature_flags(RandomRemoteNode, enabled, Timeout) of 2136 {error, _} = Error -> 2137 Error; 2138 RemoteFeatureFlags -> 2139 RemoteFeatureNames = maps:keys(RemoteFeatureFlags), 2140 rabbit_log_feature_flags:debug( 2141 "Feature flags: enabling locally feature flags already " 2142 "enabled on node `~s`...", 2143 [RandomRemoteNode]), 2144 case do_sync_feature_flags_with_node(RemoteFeatureNames) of 2145 ok -> 2146 sync_feature_flags_with_cluster2( 2147 RandomRemoteNode, Timeout); 2148 Error -> 2149 Error 2150 end 2151 end. 2152 2153sync_feature_flags_with_cluster2(RandomRemoteNode, Timeout) -> 2154 LocalFeatureNames = maps:keys(list(enabled)), 2155 rabbit_log_feature_flags:debug( 2156 "Feature flags: enabling on node `~s` feature flags already " 2157 "enabled locally...", 2158 [RandomRemoteNode]), 2159 Ret = run_feature_flags_mod_on_remote_node( 2160 RandomRemoteNode, 2161 do_sync_feature_flags_with_node, 2162 [LocalFeatureNames], 2163 Timeout), 2164 case Ret of 2165 {error, pre_feature_flags_rabbitmq} -> ok; 2166 _ -> Ret 2167 end. 2168 2169pick_one_node(Nodes) -> 2170 RandomIndex = rand:uniform(length(Nodes)), 2171 lists:nth(RandomIndex, Nodes). 2172 2173do_sync_feature_flags_with_node([FeatureFlag | Rest]) -> 2174 case enable_locally(FeatureFlag) of 2175 ok -> do_sync_feature_flags_with_node(Rest); 2176 Error -> Error 2177 end; 2178do_sync_feature_flags_with_node([]) -> 2179 ok. 2180 2181-spec get_forced_feature_flag_names() -> [feature_name()] | undefined. 2182%% @private 2183%% @doc 2184%% Returns the (possibly empty) list of feature flags the user want 2185%% to enable out-of-the-box when starting a node for the first time. 2186%% 2187%% Without this, the default is to enable all the supported feature 2188%% flags. 2189%% 2190%% There are two ways to specify that list: 2191%% <ol> 2192%% <li>Using the `$RABBITMQ_FEATURE_FLAGS' environment variable; for 2193%% instance `RABBITMQ_FEATURE_FLAGS=quorum_queue,mnevis'.</li> 2194%% <li>Using the `forced_feature_flags_on_init' configuration parameter; 2195%% for instance 2196%% `{rabbit, [{forced_feature_flags_on_init, [quorum_queue, mnevis]}]}'.</li> 2197%% </ol> 2198%% 2199%% The environment variable has precedence over the configuration 2200%% parameter. 2201 2202get_forced_feature_flag_names() -> 2203 Ret = case get_forced_feature_flag_names_from_env() of 2204 undefined -> get_forced_feature_flag_names_from_config(); 2205 List -> List 2206 end, 2207 case Ret of 2208 undefined -> ok; 2209 [] -> rabbit_log_feature_flags:info( 2210 "Feature flags: automatic enablement of feature " 2211 "flags disabled (i.e. none will be enabled " 2212 "automatically)"); 2213 _ -> rabbit_log_feature_flags:info( 2214 "Feature flags: automatic enablement of feature " 2215 "flags limited to the following list: ~p", [Ret]) 2216 end, 2217 Ret. 2218 2219-spec get_forced_feature_flag_names_from_env() -> [feature_name()] | undefined. 2220%% @private 2221 2222get_forced_feature_flag_names_from_env() -> 2223 case rabbit_prelaunch:get_context() of 2224 #{forced_feature_flags_on_init := ForcedFFs} 2225 when is_list(ForcedFFs) -> 2226 ForcedFFs; 2227 _ -> 2228 undefined 2229 end. 2230 2231-spec get_forced_feature_flag_names_from_config() -> [feature_name()] | undefined. 2232%% @private 2233 2234get_forced_feature_flag_names_from_config() -> 2235 Value = application:get_env(rabbit, 2236 forced_feature_flags_on_init, 2237 undefined), 2238 case Value of 2239 undefined -> 2240 Value; 2241 _ when is_list(Value) -> 2242 case lists:all(fun is_atom/1, Value) of 2243 true -> Value; 2244 false -> undefined 2245 end; 2246 _ -> 2247 undefined 2248 end. 2249 2250-spec verify_which_feature_flags_are_actually_enabled() -> 2251 ok | {error, any()} | no_return(). 2252%% @private 2253 2254verify_which_feature_flags_are_actually_enabled() -> 2255 AllFeatureFlags = list(all), 2256 EnabledFeatureNames = read_enabled_feature_flags_list(), 2257 rabbit_log_feature_flags:debug( 2258 "Feature flags: double-checking feature flag states..."), 2259 %% In case the previous instance of the node failed to write the 2260 %% feature flags list file, we want to double-check the list of 2261 %% enabled feature flags read from disk. For each feature flag, 2262 %% we call the migration function to query if the feature flag is 2263 %% actually enabled. 2264 %% 2265 %% If a feature flag doesn't provide a migration function (or if the 2266 %% function fails), we keep the current state of the feature flag. 2267 List1 = maps:fold( 2268 fun(Name, Props, Acc) -> 2269 Ret = run_migration_fun(Name, Props, is_enabled), 2270 case Ret of 2271 true -> 2272 [Name | Acc]; 2273 false -> 2274 Acc; 2275 _ -> 2276 MarkedAsEnabled = is_enabled(Name), 2277 case MarkedAsEnabled of 2278 true -> [Name | Acc]; 2279 false -> Acc 2280 end 2281 end 2282 end, 2283 [], AllFeatureFlags), 2284 RepairedEnabledFeatureNames = lists:sort(List1), 2285 %% We log the list of feature flags for which the state changes 2286 %% after the check above. 2287 WereEnabled = RepairedEnabledFeatureNames -- EnabledFeatureNames, 2288 WereDisabled = EnabledFeatureNames -- RepairedEnabledFeatureNames, 2289 case {WereEnabled, WereDisabled} of 2290 {[], []} -> ok; 2291 _ -> rabbit_log_feature_flags:warning( 2292 "Feature flags: the previous instance of this node " 2293 "must have failed to write the `feature_flags` " 2294 "file at `~s`:", 2295 [enabled_feature_flags_list_file()]) 2296 end, 2297 case WereEnabled of 2298 [] -> ok; 2299 _ -> rabbit_log_feature_flags:warning( 2300 "Feature flags: - list of previously enabled " 2301 "feature flags now marked as such: ~p", [WereEnabled]) 2302 end, 2303 case WereDisabled of 2304 [] -> ok; 2305 _ -> rabbit_log_feature_flags:warning( 2306 "Feature flags: - list of previously disabled " 2307 "feature flags now marked as such: ~p", [WereDisabled]) 2308 end, 2309 %% Finally, if the new list of enabled feature flags is different 2310 %% than the one on disk, we write the new list and re-initialize the 2311 %% registry. 2312 case RepairedEnabledFeatureNames of 2313 EnabledFeatureNames -> 2314 ok; 2315 _ -> 2316 rabbit_log_feature_flags:debug( 2317 "Feature flags: write the repaired list of enabled feature " 2318 "flags"), 2319 WrittenToDisk = ok =:= try_to_write_enabled_feature_flags_list( 2320 RepairedEnabledFeatureNames), 2321 initialize_registry( 2322 #{}, 2323 list_of_enabled_feature_flags_to_feature_states( 2324 RepairedEnabledFeatureNames), 2325 WrittenToDisk) 2326 end. 2327 2328-spec refresh_feature_flags_after_app_load([atom()]) -> 2329 ok | {error, any()} | no_return(). 2330 2331refresh_feature_flags_after_app_load([]) -> 2332 ok; 2333refresh_feature_flags_after_app_load(Apps) -> 2334 rabbit_log_feature_flags:debug( 2335 "Feature flags: new apps loaded: ~p -> refreshing feature flags", 2336 [Apps]), 2337 2338 FeatureFlags0 = list(all), 2339 FeatureFlags1 = query_supported_feature_flags(), 2340 2341 %% The following list contains all the feature flags this node 2342 %% learned about only because remote nodes have them. Now, the 2343 %% applications providing them are loaded locally as well. 2344 %% Therefore, we may run their migration function in case the state 2345 %% of this node needs it. 2346 AlreadySupportedFeatureNames = maps:keys( 2347 maps:filter( 2348 fun(_, #{provided_by := App}) -> 2349 lists:member(App, Apps) 2350 end, FeatureFlags0)), 2351 case AlreadySupportedFeatureNames of 2352 [] -> 2353 ok; 2354 _ -> 2355 rabbit_log_feature_flags:debug( 2356 "Feature flags: new apps loaded: feature flags already " 2357 "supported: ~p", 2358 [lists:sort(AlreadySupportedFeatureNames)]) 2359 end, 2360 2361 %% The following list contains all the feature flags no nodes in the 2362 %% cluster knew about before: this is the first time we see them in 2363 %% this instance of the cluster. We need to register them on all 2364 %% nodes. 2365 NewSupportedFeatureFlags = maps:filter( 2366 fun(FeatureName, _) -> 2367 not maps:is_key(FeatureName, 2368 FeatureFlags0) 2369 end, FeatureFlags1), 2370 case maps:keys(NewSupportedFeatureFlags) of 2371 [] -> 2372 ok; 2373 NewSupportedFeatureNames -> 2374 rabbit_log_feature_flags:debug( 2375 "Feature flags: new apps loaded: new feature flags (unseen so " 2376 "far): ~p ", 2377 [lists:sort(NewSupportedFeatureNames)]) 2378 end, 2379 2380 case initialize_registry() of 2381 ok -> 2382 Ret = maybe_enable_locally_after_app_load( 2383 AlreadySupportedFeatureNames), 2384 case Ret of 2385 ok -> 2386 share_new_feature_flags_after_app_load( 2387 NewSupportedFeatureFlags, ?TIMEOUT); 2388 Error -> 2389 Error 2390 end; 2391 Error -> 2392 Error 2393 end. 2394 2395maybe_enable_locally_after_app_load([]) -> 2396 ok; 2397maybe_enable_locally_after_app_load([FeatureName | Rest]) -> 2398 case is_enabled(FeatureName) of 2399 true -> 2400 case do_enable_locally(FeatureName) of 2401 ok -> maybe_enable_locally_after_app_load(Rest); 2402 Error -> Error 2403 end; 2404 false -> 2405 maybe_enable_locally_after_app_load(Rest) 2406 end. 2407 2408share_new_feature_flags_after_app_load(FeatureFlags, Timeout) -> 2409 push_local_feature_flags_from_apps_unknown_remotely( 2410 node(), FeatureFlags, Timeout). 2411 2412on_load() -> 2413 %% The goal of this `on_load()` code server hook is to prevent this 2414 %% module from being loaded in an already running RabbitMQ node if 2415 %% the running version does not have the feature flags subsystem. 2416 %% 2417 %% This situation happens when an upgrade overwrites RabbitMQ files 2418 %% with the node still running. This is the case with many packages: 2419 %% files are updated on disk, then a post-install step takes care of 2420 %% restarting the service. 2421 %% 2422 %% The problem is that if many nodes in a cluster are updated at the 2423 %% same time, one node running the newer version might query feature 2424 %% flags on an old node where this module is already available 2425 %% (because files were already overwritten). This causes the query 2426 %% to report an unexpected answer and the newer node to refuse to 2427 %% start. 2428 %% 2429 %% However, when the module is executed outside of RabbitMQ (for 2430 %% debugging purpose or in the context of EUnit for instance), we 2431 %% want to allow the load. That's why we first check if RabbitMQ is 2432 %% actually running. 2433 case rabbit:is_running() of 2434 true -> 2435 %% RabbitMQ is running. 2436 %% 2437 %% Now we want to differentiate a pre-feature-flags node 2438 %% from one having the subsystem. 2439 %% 2440 %% To do that, we verify if the `feature_flags_file` 2441 %% application environment variable is defined. With a 2442 %% feature-flags-enabled node, this application environment 2443 %% variable is defined by rabbitmq-server(8). 2444 case application:get_env(rabbit, feature_flags_file) of 2445 {ok, _} -> 2446 %% This is a feature-flags-enabled version. Loading 2447 %% the module is permitted. 2448 ok; 2449 _ -> 2450 %% This is a pre-feature-flags version. We deny the 2451 %% load and report why, possibly specifying the 2452 %% version of RabbitMQ. 2453 Vsn = case application:get_key(rabbit, vsn) of 2454 {ok, V} -> V; 2455 undefined -> "unknown version" 2456 end, 2457 "Refusing to load '" ?MODULE_STRING "' on this " 2458 "node. It appears to be running a pre-feature-flags " 2459 "version of RabbitMQ (" ++ Vsn ++ "). This is fine: " 2460 "a newer version of RabbitMQ was deployed on this " 2461 "node, but it was not restarted yet. This warning " 2462 "is probably caused by a remote node querying this " 2463 "node for its feature flags." 2464 end; 2465 false -> 2466 %% RabbitMQ is not running. Loading the module is permitted 2467 %% because this Erlang node will never be queried for its 2468 %% feature flags. 2469 ok 2470 end. 2471