1#
2#  Copyright 2021 Northern.tech AS
3#
4#  This file is part of CFEngine 3 - written and maintained by Northern.tech AS.
5#
6#  This program is free software; you can redistribute it and/or modify it
7#  under the terms of the GNU General Public License as published by the
8#  Free Software Foundation; version 3.
9#
10#  This program is distributed in the hope that it will be useful,
11#  but WITHOUT ANY WARRANTY; without even the implied warranty of
12#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13#  GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
18#
19# To the extent this program is licensed as part of the Enterprise
20# versions of CFEngine, the applicable Commercial Open Source License
21# (COSL) may apply to this file if you as a licensee so wish it. See
22# included file COSL.txt.
23
24########## CFEngine Bootstrap / Failsafe Policy ##############################
25# This file (failsafe.cf) is re-generated inside "inputs" directory every time
26# you bootstrap. This means that custom changes will be overwritten.
27#
28# The role of this standalone policy file is to fetch the main promises from
29# the policy hub for the first time when bootstrapping, and to recover the
30# system by fetching policies in case the standard agent run fails.
31##############################################################################
32
33body agent control
34{
35      # Bootstrapping can't continue without keys
36      abortclasses => { "no_ppkeys_ABORT_kept" };
37      # Make sure that running failsafe many times in a row does not
38      # change functionality
39      ifelapsed => "0";
40}
41
42################################################################################
43
44bundle agent main
45{
46  meta:
47
48      "description"
49        string => "Perform bootstrap or failsafe recovery operations.";
50
51  methods:
52
53      "Check Keys"
54        usebundle => failsafe_cfe_internal_checkkeys,
55        comment => "Without a valid keypair we aren't going to be able
56                    to establish trust";
57
58      "Fetch Inputs"
59        usebundle => failsafe_cfe_internal_update,
60        comment => "We need to fetch policy from upstream if we are
61                    bootstrapping or if we are performing failsafe
62                    recovery.";
63
64      "Actuate Update Policy"
65        usebundle => failsafe_cfe_internal_call_update,
66        comment => "In order to speed up convergence and reporting we
67                    trigger the update policy right after initial
68                    bootstrap. This allows the first scheduled run to
69                    happen with the most up to date and complete
70                    information.";
71
72      "Trigger Policy"
73        usebundle => failsafe_cfe_internal_trigger_policy,
74        comment => "In order to speed up convergence and reporting we
75                    trigger the whole policy right after initial
76                    bootstrap. This allows the first report to provide
77                    more complete data.";
78
79      "Report"
80        usebundle => failsafe_cfe_internal_report,
81        comment => "It's important to let the user know what happened
82                    as the result of the bootstrap or failsafe
83                    operation.";
84}
85
86bundle agent failsafe_cfe_internal_checkkeys
87{
88  classes:
89      "have_ppkeys"
90        expression => fileexists("$(sys.workdir)/ppkeys/localhost.pub"),
91        handle     => "failsafe_cfe_internal_bootstrap_checkkeys_classes_have_ppkeys";
92
93  reports:
94    !have_ppkeys::
95      "No public/private key pair is loaded, please create one by running \"cf-key\""
96        classes => failsafe_results("namespace", "no_ppkeys_ABORT");
97}
98
99################################################################################
100
101bundle agent failsafe_cfe_internal_update
102{
103  vars:
104
105      # A policy server cannot use the shortcut feature to resolve
106      # masterfiles since cf-serverd is potentially not yet up and
107      # running.
108
109      # The unqualified path is used for non policy servers so that
110      # the policy server can use a shortcut to decide on behalf of
111      # the client which policy to serve by default. This is useful
112      # when running binaires from mixed sources (for example CFEngine
113      # produced binaries vs packages from the debian repository).
114
115      "masterfiles_dir_remote"
116        string => ifelse( "policy_server", $(sys.masterdir),
117                          "masterfiles" );
118
119  files:
120
121      "$(sys.inputdir)"
122        handle => "failsafe_cfe_internal_bootstrap_update_files_sys_workdir_inputs_shortcut",
123        copy_from => failsafe_scp("$(masterfiles_dir_remote)"),
124        depth_search => failsafe_recurse("inf"),
125        file_select => failsafe_exclude_vcs_files,
126        classes => failsafe_results("namespace", "inputdir_update");
127
128    !policy_server::
129
130      "$(sys.workdir)/modules"
131        handle => "failsafe_cfe_internal_bootstrap_update_files_sys_workdir_modules_shortcut",
132        copy_from => failsafe_scp("modules"),
133        depth_search => failsafe_recurse("inf"),
134        file_select => failsafe_exclude_vcs_files,
135        classes => failsafe_results("namespace", "modulesdir_update");
136
137    !windows.inputdir_update_error::
138
139      # When running on a *nix platform with homogeneous packages
140      # $(sys.masterdir) is a good guess. This is never the case for
141      # windows, and might be a poor guess if mixing packages from
142      # different sources (for example debian repositories and
143      # CFEngine produced packages).
144      "$(sys.inputdir)"
145        handle => "failsafe_cfe_internal_bootstrap_update_files_sys_workdir_inputs_not_windows",
146        copy_from => failsafe_scp("$(sys.masterdir)"),
147        depth_search => failsafe_recurse("inf"),
148        file_select => failsafe_exclude_vcs_files,
149        classes => failsafe_results("namespace", "inputdir_update"),
150        comment => "If we failed to fetch policy we try again using
151                    the legacy default in case we are fetching policy
152                    from a hub that is not serving mastefiles via a
153                    shortcut.";
154
155    windows.inputdir_update_error::
156
157      # Note: Windows can't use $(sys.masterdir) because no one runs a
158      # hub on windows and the copy_from needs the remote path.
159      "$(sys.inputdir)"
160        handle => "failsafe_cfe_internal_bootstrap_update_files_sys_workdir_inputs_windows",
161        copy_from => failsafe_scp("/var/cfengine/masterfiles"),
162        depth_search => failsafe_recurse("inf"),
163        file_select => failsafe_exclude_vcs_files,
164        classes => failsafe_results("namespace", "inputdir_update"),
165        comment => "If we failed to fetch policy we try again using
166                    the legacy default in case we are fetching policy
167                    from a hub that is not serving mastefiles via a
168                    shortcut.";
169
170    windows::
171
172      # TODO: Remove the use of bin-twin ref: Redmine #7364
173      "$(sys.workdir)\\bin-twin\\."
174        handle => "failsafe_cfe_internal_bootstrap_update_files_sys_workdir_bin_twin_windows",
175        copy_from => failsafe_cp("$(sys.workdir)\\bin\\."),
176        depth_search => failsafe_recurse("1"),
177        file_select => failsafe_exclude_vcs_files,
178        comment => "Make sure we maintain a clone of the binaries and
179                    libraries for updating";
180
181
182  processes:
183
184      # TODO: Decide if this class guard is appropriate. Should we
185      # guard checking of cf-execd process running to when inputs are
186      # repaired
187    !windows.inputdir_update_repaired::
188
189      # We need to know when cf-execd is not running so that we can
190      # start it when necessary. Windows and systemd hosts uses the service
191      # manager instead of keying on individual processes.
192
193      "cf-execd" restart_class => "cf_execd_not_running",
194        handle => "failsafe_cfe_internal_bootstrap_update_processes_start_cf_execd";
195
196    any::
197
198      # We need to know if cf-serverd isn't running so that we can
199      # start it when necessary.
200
201      "cf-serverd" restart_class => "cf_serverd_not_running",
202        handle => "failsafe_cfe_internal_bootstrap_update_processes_start_cf_serverd";
203
204  commands:
205
206    cf_execd_not_running.!(windows|systemd)::
207
208      # Windows and systemd do not launch cf-execd directly and are
209      # handeled separately.
210
211      "$(sys.cf_execd)"
212        handle => "failsafe_cfe_internal_bootstrap_update_commands_check_sys_cf_execd_start",
213        classes => failsafe_results("namespace", "cf_execd_running");
214
215    cf_serverd_not_running.!(windows|systemd)::
216
217      # cf-serverd is not launched directly on Windows and systemd and is
218      # handled separately.
219
220      "$(sys.cf_serverd)"
221        handle => "failsafe_cfe_internal_bootstrap_update_commands_check_sys_cf_serverd_start",
222        action => failsafe_ifwin_bg,
223        classes => failsafe_results("namespace", "cf_serverd_running"),
224        comment => "cf-serverd is needed on policy hubs or remote
225                    clients will not be able to get policy. Clients do
226                    not have a strong dependency on cf-serverd and if
227                    the component is necessay it is expected to be
228                    started by a separate policy.";
229
230    cf_execd_not_running.systemd::
231
232      # We explicitly use "restart", because it is possible that cf-serverd
233      # is running, even if cf-execd isn't, for example. Here we want to be
234      # sure we relaunch everything.
235
236      "/bin/systemctl restart cfengine3" -> { "CFE-1459" }
237        handle => "failsafe_cfe_internal_bootstrap_update_commands_systemd_cfe_start",
238        contain => bootstrap_command_silent,
239        classes => failsafe_results("namespace", "systemctl_restart_cfengine3");
240
241  services:
242
243      # TODO: Is this restriction to only promise the service running
244      # when inputs are repaired appropriate? Perhaps it should always
245      # be checked.
246    windows.inputdir_update_repaired::
247
248      "CfengineNovaExec"
249        handle => "failsafe_cfe_internal_bootstrap_update_services_windows_executor",
250        service_policy => "start",
251        service_method => failsafe_bootstart,
252        classes => failsafe_results("namespace", "cf_execd_running");
253}
254
255################################################################################
256
257bundle agent failsafe_cfe_internal_report
258{
259  meta:
260
261      "description"
262        string => "Report the outcome of the embedded
263                   bootstrap/failsafe operation.";
264
265  classes:
266
267      # TODO: Determine if this is necessary and/or useful.  Pre-eval
268      # might resolve this before policy update occurs, and this is
269      # probably most useful after policy update has been attempted.
270
271      "have_promises_cf"
272        scope => "bundle",
273        expression => fileexists("$(sys.inputdir)/promises.cf"),
274        handle => "failsafe_cfe_internal_bootstrap_update_classes_have_promises_cf",
275        comment => "We expect to find promises.cf after policy has
276                    been successfully copied from the policy
277                    server. If promises.cf is missing, then the
278                    bootstrap or failsafe recovery has likely
279                    failed.";
280
281  reports:
282
283    !bootstrap_mode::
284
285      "Built-in failsafe policy triggered"
286        handle => "failsafe_cfe_internal_bootstrap_update_reports_failsafe_notification",
287        comment => "Be sure to inform the user that the failsafe policy has
288                    been triggered. This typically indicates that the agent has
289                    received broken policy. It may also indicate legacy
290                    configuration in body executor control.";
291
292    bootstrap_mode::
293
294      "Bootstrapping from host '$(sys.policy_hub)' via built-in policy '$(this.promise_filename)'"
295        handle => "failsafe_cfe_internal_bootstrap_update_reports_bootstrap_notification",
296        comment => "Be sure to inform the user that they have triggerd a bootstrap.";
297
298    bootstrap_mode.policy_server::
299
300      "This host assumes the role of policy server"
301      handle => "failsafe_cfe_internal_bootstrap_update_reports_assume_policy_hub";
302
303    bootstrap_mode.!policy_server::
304
305      "This autonomous node assumes the role of voluntary client"
306      handle => "failsafe_cfe_internal_bootstrap_update_reports_assume_voluntary_client";
307
308    inputdir_update_repaired::
309
310      "Updated local policy from policy server"
311      handle => "failsafe_cfe_internal_bootstrap_update_reports_inputdir_update_repaired";
312
313
314    inputdir_update_repaired.!have_promises_cf::
315
316      # We used to display this report when we have fetched new
317      # policy, but still can not find promises.cf in
318      # sys.inputdir. However if the hub being bootstrapped to is down
319      # we may never repair inputs and this may not be triggered
320      #
321      # TODO: Come up with better conditions. These seem weak.
322      #  - Potentially use returnszero() with cf-promises?
323
324      "Failed to copy policy from policy server at $(sys.policy_hub):$(sys.masterdir)
325       Please check
326       * cf-serverd is running on $(sys.policy_hub)
327       * CFEngine version on the policy hub is 3.6.0 or latest - otherwise you need to tweak the protocol_version setting
328       * network connectivity to $(sys.policy_hub) on port $(sys.policy_hub_port)
329       * masterfiles 'body server control' - in particular allowconnects, trustkeysfrom and skipverify
330       * masterfiles 'bundle server' -> access: -> masterfiles -> admit/deny
331       It is often useful to restart cf-serverd in verbose mode (cf-serverd -v) on $(sys.policy_hub) to diagnose connection issues.
332       When updating masterfiles, wait (usually 5 minutes) for files to propagate to inputs on $(sys.policy_hub) before retrying."
333      handle => "failsafe_cfe_internal_bootstrap_update_reports_did_not_get_policy";
334
335    trigger_policy_repaired::
336      "Triggered an initial run of the policy"
337        handle => "failsafe_cfe_internal_bootstrap_trigger_policy_passed";
338
339    trigger_policy_failed::
340      "Initial run of the policy failed"
341        handle => "failsafe_cfe_internal_bootstrap_trigger_policy_failed";
342
343    systemctl_restart_cfengine3_repaired::
344
345      "Restarted systemd unit cfengine3"
346        handle => "failsafe_cfe_intrnal_bootstrap_update_reports_systemd_unit_restarted";
347
348    systemctl_restart_cfengine3_error::
349
350      "Error restarting systemd unit cfengine3"
351        handle => "failsafe_cfe_intrnal_bootstrap_update_reports_systemd_unit_restarted";
352
353    cf_serverd_running_repaired::
354
355      "Started the server"
356      handle => "failsafe_cfe_internal_bootstrap_update_reports_started_serverd";
357
358    cf_serverd_running_failed::
359
360      "Failed to start the server"
361      handle => "failsafe_cfe_internal_bootstrap_update_reports_failed_to_start_serverd";
362
363    cf_execd_running_repaired::
364
365      "Started the scheduler"
366      handle => "failsafe_cfe_internal_bootstrap_update_reports_started_execd";
367
368    cf_execd_running_failed::
369
370      "Failed to start the scheduler"
371      handle => "failsafe_cfe_internal_bootstrap_update_reports_failed_to_start_execd";
372}
373
374################################################################################
375
376bundle agent failsafe_cfe_internal_call_update
377{
378  vars:
379
380    "mode" string => ifelse("bootstrap_mode", "bootstrap_mode", "failsafe_mode");
381
382  commands:
383
384    # On Windows we need cf-execd to call update.cf, otherwise the daemons will
385    # not run under the SYSTEM account.
386    !windows.!skip_policy_on_bootstrap::
387      "$(sys.cf_agent) -f $(sys.update_policy_path) --define $(mode)"
388        handle => "failsafe_cfe_internal_call_update_commands_call_update_cf",
389        if => fileexists( $(sys.update_policy_path) ),
390        comment => "We run update.cf in order to prepare system information for
391                    collection into CFEngine Enterprise more quickly.";
392}
393
394################################################################################
395
396bundle agent failsafe_cfe_internal_trigger_policy
397{
398  commands:
399
400    bootstrap_mode.!skip_policy_on_bootstrap::
401      "$(sys.cf_agent) --define bootstrap_mode"
402        handle => "failsafe_cfe_internal_trigger_policy_commands_call_promises_cf",
403        if => fileexists( $(sys.default_policy_path) ),
404        classes => failsafe_results("namespace", "trigger_policy"),
405        comment => "We run promises.cf in order to prepare system information for
406                    collection into CFEngine Enterprise more quickly.";
407}
408
409############################################
410body copy_from failsafe_scp(from)
411{
412      source      => "$(from)";
413      compare     => "digest";
414    # This class is always set when bootstrapping. You can deactivate
415    # this class with --trust-server=no when bootstrapping
416    trust_server::
417      trustkey    => "true";
418    !policy_server::
419      servers => { "$(sys.policy_hub)" };
420      portnumber => "$(sys.policy_hub_port)";
421}
422############################################
423body depth_search failsafe_recurse(d)
424{
425      depth => "$(d)";
426      exclude_dirs => { "\.svn", "\.git" };
427}
428############################################
429body file_select failsafe_exclude_vcs_files
430{
431      leaf_name => { "\.git.*", "\.mailmap" };
432      file_result => "!leaf_name";
433}
434############################################
435body service_method failsafe_bootstart
436{
437      service_autostart_policy => "boot_time";
438}
439############################################
440body action failsafe_ifwin_bg
441{
442    windows::
443      background => "true";
444}
445############################################
446body copy_from failsafe_cp(from)
447{
448      source          => "$(from)";
449      compare         => "digest";
450      copy_backup     => "false";
451}
452
453############################################
454body classes failsafe_results(scope, class_prefix)
455# @brief Define classes prefixed with `class_prefix` and suffixed with
456# appropriate outcomes: _kept, _repaired, _not_kept, _error, _failed,
457# _denied, _timeout, _reached
458#
459# @param scope The scope in which the class should be defined (`bundle` or `namespace`)
460# @param class_prefix The prefix for the classes defined
461{
462  scope => "$(scope)";
463
464  promise_kept => { "$(class_prefix)_reached",
465                    "$(class_prefix)_kept" };
466
467  promise_repaired => { "$(class_prefix)_reached",
468                        "$(class_prefix)_repaired" };
469
470  repair_failed => { "$(class_prefix)_reached",
471                     "$(class_prefix)_error",
472                     "$(class_prefix)_not_kept",
473                     "$(class_prefix)_failed" };
474
475  repair_denied => { "$(class_prefix)_reached",
476                     "$(class_prefix)_error",
477                     "$(class_prefix)_not_kept",
478                     "$(class_prefix)_denied" };
479
480  repair_timeout => { "$(class_prefix)_reached",
481                      "$(class_prefix)_error",
482                      "$(class_prefix)_not_kept",
483                      "$(class_prefix)_timeout" };
484}
485
486body contain bootstrap_command_silent
487# @brief Suppress command output
488{
489      no_output => "true";
490}
491