1!--------------------------------------------------------------------------------------------------!
2!   CP2K: A general program to perform molecular dynamics simulations                              !
3!   Copyright (C) 2000 - 2020  CP2K developers group                                               !
4!--------------------------------------------------------------------------------------------------!
5
6! **************************************************************************************************
7!> \brief Master's routines for the swarm-framework
8!> \author Ole Schuett
9! **************************************************************************************************
10MODULE swarm_master
11   USE cp_external_control,             ONLY: external_control
12   USE cp_log_handling,                 ONLY: cp_get_default_logger,&
13                                              cp_logger_type
14   USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
15                                              cp_print_key_unit_nr
16   USE cp_para_types,                   ONLY: cp_para_env_type
17   USE cp_parser_types,                 ONLY: cp_parser_type,&
18                                              parser_create,&
19                                              parser_release
20   USE glbopt_master,                   ONLY: glbopt_master_finalize,&
21                                              glbopt_master_init,&
22                                              glbopt_master_steer,&
23                                              glbopt_master_type
24   USE global_types,                    ONLY: global_environment_type
25   USE input_constants,                 ONLY: swarm_do_glbopt
26   USE input_section_types,             ONLY: section_vals_get_subs_vals,&
27                                              section_vals_type,&
28                                              section_vals_val_get
29   USE kinds,                           ONLY: default_path_length,&
30                                              default_string_length
31   USE swarm_message,                   ONLY: swarm_message_add,&
32                                              swarm_message_equal,&
33                                              swarm_message_file_read,&
34                                              swarm_message_file_write,&
35                                              swarm_message_free,&
36                                              swarm_message_get,&
37                                              swarm_message_type
38#include "../base/base_uses.f90"
39
40   IMPLICIT NONE
41   PRIVATE
42
43   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'swarm_master'
44
45   PUBLIC :: swarm_master_type
46   PUBLIC :: swarm_master_init, swarm_master_finalize
47   PUBLIC :: swarm_master_steer
48
49   TYPE swarm_message_p_type
50      TYPE(swarm_message_type), POINTER                   :: p => Null()
51   END TYPE swarm_message_p_type
52
53   TYPE swarm_master_type
54      PRIVATE
55      INTEGER                                             :: behavior = -1
56      TYPE(glbopt_master_type), POINTER                   :: glbopt => Null()
57      !possibly more behaviors ...
58      INTEGER                                             :: iw = 0
59      INTEGER                                             :: i_iteration = 0
60      INTEGER                                             :: max_iter = 0
61      LOGICAL                                             :: should_stop = .FALSE.
62      INTEGER                                             :: n_workers = -1
63      INTEGER                                             :: comlog_unit
64      TYPE(section_vals_type), POINTER                    :: swarm_section => Null()
65      TYPE(cp_para_env_type), POINTER                     :: para_env => Null()
66      TYPE(swarm_message_p_type), DIMENSION(:), POINTER   :: queued_commands => Null()
67      TYPE(global_environment_type), POINTER              :: globenv => Null()
68      LOGICAL                                             :: ignore_last_iteration = .FALSE.
69      INTEGER                                             :: n_waiting = 0
70   END TYPE swarm_master_type
71
72CONTAINS
73
74! **************************************************************************************************
75!> \brief Initializes the swarm master
76!> \param master ...
77!> \param para_env ...
78!> \param globenv ...
79!> \param root_section ...
80!> \param n_workers ...
81!> \author Ole Schuett
82! **************************************************************************************************
83   SUBROUTINE swarm_master_init(master, para_env, globenv, root_section, n_workers)
84      TYPE(swarm_master_type)                            :: master
85      TYPE(cp_para_env_type), POINTER                    :: para_env
86      TYPE(global_environment_type), POINTER             :: globenv
87      TYPE(section_vals_type), POINTER                   :: root_section
88      INTEGER, INTENT(IN)                                :: n_workers
89
90      CHARACTER(len=*), PARAMETER :: routineN = 'swarm_master_init', &
91         routineP = moduleN//':'//routineN
92
93      TYPE(cp_logger_type), POINTER                      :: logger
94
95      master%swarm_section => section_vals_get_subs_vals(root_section, "SWARM")
96
97      logger => cp_get_default_logger()
98      master%n_workers = n_workers
99      master%para_env => para_env
100      master%globenv => globenv
101      ALLOCATE (master%queued_commands(master%n_workers))
102      master%iw = cp_print_key_unit_nr(logger, master%swarm_section, &
103                                       "PRINT%MASTER_RUN_INFO", extension=".masterLog")
104
105      CALL section_vals_val_get(master%swarm_section, "BEHAVIOR", i_val=master%behavior)
106
107      ! uses logger%iter_info%project_name to construct filename
108      master%comlog_unit = cp_print_key_unit_nr(logger, master%swarm_section, "PRINT%COMMUNICATION_LOG", &
109                                                !middle_name="comlog", extension=".xyz", &
110                                                extension=".comlog", &
111                                                file_action="WRITE", file_position="REWIND")
112
113      CALL section_vals_val_get(master%swarm_section, "MAX_ITER", i_val=master%max_iter)
114
115      SELECT CASE (master%behavior)
116      CASE (swarm_do_glbopt)
117         ALLOCATE (master%glbopt)
118         CALL glbopt_master_init(master%glbopt, para_env, root_section, n_workers, master%iw)
119      CASE DEFAULT
120         CPABORT("got unknown behavior")
121      END SELECT
122
123      CALL replay_comlog(master)
124   END SUBROUTINE swarm_master_init
125
126! **************************************************************************************************
127!> \brief Helper routine for swarm_master_init, restarts a calculation
128!> \param master ...
129!> \author Ole Schuett
130! **************************************************************************************************
131   SUBROUTINE replay_comlog(master)
132      TYPE(swarm_master_type)                            :: master
133
134      CHARACTER(len=*), PARAMETER :: routineN = 'replay_comlog', routineP = moduleN//':'//routineN
135
136      CHARACTER(LEN=default_path_length)                 :: filename
137      CHARACTER(LEN=default_string_length)               :: command_log
138      INTEGER                                            :: handle, i, worker_id
139      LOGICAL                                            :: at_end, explicit
140      TYPE(cp_parser_type), POINTER                      :: parser
141      TYPE(swarm_message_type)                           :: cmd_log, report_log
142      TYPE(swarm_message_type), &
143         DIMENSION(master%n_workers)                     :: last_commands
144      TYPE(swarm_message_type), POINTER                  :: cmd_now
145
146      NULLIFY (parser)
147
148      ! Initialize parser for trajectory
149      CALL section_vals_val_get(master%swarm_section, "REPLAY_COMMUNICATION_LOG", &
150                                c_val=filename, explicit=explicit)
151
152      IF (.NOT. explicit) RETURN
153      IF (master%iw > 0) WRITE (master%iw, '(A,A)') &
154         " SWARM| Starting replay of communication-log: ", TRIM(filename)
155
156      CALL timeset("swarm_master_replay_comlog", handle)
157      CALL parser_create(parser, filename, para_env=master%para_env)
158
159      at_end = .FALSE.
160      DO
161         CALL swarm_message_file_read(report_log, parser, at_end)
162         IF (at_end) EXIT
163
164         CALL swarm_message_file_read(cmd_log, parser, at_end)
165         IF (at_end) EXIT
166
167         ALLOCATE (cmd_now)
168         CALL swarm_master_steer(master, report_log, cmd_now)
169
170         !TODO: maybe we should just exit the loop instead of stopping?
171         CALL swarm_message_get(cmd_log, "command", command_log)
172         IF (TRIM(command_log) /= "shutdown") THEN
173            IF (.NOT. commands_equal(cmd_now, cmd_log, master%iw)) CPABORT("wrong behaviour")
174         END IF
175
176         CALL swarm_message_free(cmd_log)
177         CALL swarm_message_free(report_log)
178         CALL swarm_message_get(cmd_now, "worker_id", worker_id)
179         CALL swarm_message_free(last_commands(worker_id))
180         last_commands(worker_id) = cmd_now
181         DEALLOCATE (cmd_now)
182      END DO
183
184      CALL swarm_message_free(report_log) !don't worry about double-frees
185      CALL swarm_message_free(cmd_log)
186
187      IF (master%iw > 0) WRITE (master%iw, '(A,A)') &
188         " SWARM| Reached end of communication log. Queueing last commands."
189
190      DO i = 1, master%n_workers
191         ALLOCATE (master%queued_commands(i)%p)
192         master%queued_commands(i)%p = last_commands(i)
193      END DO
194
195      CALL parser_release(parser)
196      CALL timestop(handle)
197   END SUBROUTINE replay_comlog
198
199! **************************************************************************************************
200!> \brief Helper routine for replay_comlog, compares two commands
201!> \param cmd1 ...
202!> \param cmd2 ...
203!> \param iw ...
204!> \return ...
205!> \author Ole Schuett
206! **************************************************************************************************
207   FUNCTION commands_equal(cmd1, cmd2, iw) RESULT(res)
208      TYPE(swarm_message_type)                           :: cmd1, cmd2
209      INTEGER                                            :: iw
210      LOGICAL                                            :: res
211
212      res = swarm_message_equal(cmd1, cmd2)
213      IF (.NOT. res .AND. iw > 0) THEN
214         WRITE (iw, *) "Command 1:"
215         CALL swarm_message_file_write(cmd1, iw)
216         WRITE (iw, *) "Command 2:"
217         CALL swarm_message_file_write(cmd2, iw)
218      END IF
219   END FUNCTION commands_equal
220
221! **************************************************************************************************
222!> \brief Central steering routine of the swarm master
223!> \param master ...
224!> \param report ...
225!> \param cmd ...
226!> \author Ole Schuett
227! **************************************************************************************************
228   SUBROUTINE swarm_master_steer(master, report, cmd)
229      TYPE(swarm_master_type), INTENT(INOUT)             :: master
230      TYPE(swarm_message_type), INTENT(IN)               :: report
231      TYPE(swarm_message_type), INTENT(OUT)              :: cmd
232
233      CHARACTER(len=*), PARAMETER :: routineN = 'swarm_master_steer', &
234         routineP = moduleN//':'//routineN
235
236      CHARACTER(len=default_string_length)               :: command, status
237      INTEGER                                            :: handle, worker_id
238      LOGICAL                                            :: should_stop
239
240      should_stop = .FALSE.
241
242      CALL timeset("swarm_master_steer", handle)
243
244      ! First check if there are queued commands for this worker
245      CALL swarm_message_get(report, "worker_id", worker_id)
246
247      IF (ASSOCIATED(master%queued_commands(worker_id)%p)) THEN
248         cmd = master%queued_commands(worker_id)%p
249         DEALLOCATE (master%queued_commands(worker_id)%p)
250         IF (master%iw > 0) WRITE (master%iw, '(A,A,A,I9,1X,A)') ' SWARM| ', &
251            REPEAT("*", 9), " Sending out queued command to worker: ", &
252            worker_id, REPEAT("*", 9)
253         CALL timestop(handle)
254         RETURN
255      END IF
256
257      IF (.NOT. master%ignore_last_iteration) THEN
258         ! There are no queued commands. Do the normal processing.
259         master%i_iteration = master%i_iteration + 1
260
261         IF (master%iw > 0) WRITE (master%iw, '(A,A,1X,I8,A,A)') ' SWARM| ', REPEAT("*", 15), &
262            master%i_iteration, ' Master / Worker Communication  ', REPEAT("*", 15)
263      ENDIF
264
265      IF (master%i_iteration >= master%max_iter .AND. .NOT. master%should_stop) THEN
266         IF (master%iw > 0) WRITE (master%iw, '(A)') " SWARM| Reached MAX_ITER. Quitting."
267         master%should_stop = .TRUE.
268      ENDIF
269
270      IF (.NOT. master%should_stop) THEN
271         CALL external_control(master%should_stop, "SWARM", master%globenv)
272         IF (master%should_stop .AND. master%iw > 0) &
273            WRITE (master%iw, *) " SWARM| Received stop from external_control. Quitting."
274      END IF
275
276      !IF(unit > 0) &
277
278      IF (master%should_stop) THEN
279         CALL swarm_message_add(cmd, "command", "shutdown")
280         IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
281            "SWARM| Sending shutdown command to worker", worker_id
282      ELSE
283         SELECT CASE (master%behavior)
284         CASE (swarm_do_glbopt)
285            CALL glbopt_master_steer(master%glbopt, report, cmd, should_stop)
286         CASE DEFAULT
287            CPABORT("got unknown behavior")
288         END SELECT
289
290         IF (should_stop) THEN
291            CALL swarm_message_free(cmd)
292            CALL swarm_message_add(cmd, "command", "shutdown") !overwrite command
293            IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
294               "SWARM| Sending shutdown command to worker", worker_id
295            master%should_stop = .TRUE.
296         ENDIF
297      ENDIF
298
299      CALL swarm_message_add(cmd, "worker_id", worker_id)
300
301      ! Don't pollute comlog with "continue waiting"-commands.
302      CALL swarm_message_get(report, "status", status)
303      CALL swarm_message_get(cmd, "command", command)
304      IF (TRIM(status) == "wait_done") master%n_waiting = master%n_waiting - 1
305      IF (TRIM(command) == "wait") master%n_waiting = master%n_waiting + 1
306      IF (master%n_waiting < 0) CPABORT("master%n_waiting < 0")
307      IF (TRIM(status) /= "wait_done" .OR. TRIM(command) /= "wait") THEN
308         CALL swarm_message_file_write(report, master%comlog_unit)
309         CALL swarm_message_file_write(cmd, master%comlog_unit)
310         IF (master%n_waiting > 0 .AND. master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
311            "SWARM| Number of waiting workers:", master%n_waiting
312         master%ignore_last_iteration = .FALSE.
313      ELSE
314         master%ignore_last_iteration = .TRUE.
315      END IF
316      CALL timestop(handle)
317   END SUBROUTINE swarm_master_steer
318
319! **************************************************************************************************
320!> \brief Finalizes the swarm master
321!> \param master ...
322!> \author Ole Schuett
323! **************************************************************************************************
324   SUBROUTINE swarm_master_finalize(master)
325      TYPE(swarm_master_type)                            :: master
326
327      CHARACTER(len=*), PARAMETER :: routineN = 'swarm_master_finalize', &
328         routineP = moduleN//':'//routineN
329
330      TYPE(cp_logger_type), POINTER                      :: logger
331
332      IF (master%iw > 0) THEN
333         WRITE (master%iw, "(1X,A,T71,I10)") "SWARM| Total number of iterations ", master%i_iteration
334         WRITE (master%iw, "(A)") " SWARM| Shutting down the master."
335      ENDIF
336
337      SELECT CASE (master%behavior)
338      CASE (swarm_do_glbopt)
339         CALL glbopt_master_finalize(master%glbopt)
340         DEALLOCATE (master%glbopt)
341      CASE DEFAULT
342         CPABORT("got unknown behavior")
343      END SELECT
344
345      DEALLOCATE (master%queued_commands)
346
347      logger => cp_get_default_logger()
348      CALL cp_print_key_finished_output(master%iw, logger, &
349                                        master%swarm_section, "PRINT%MASTER_RUN_INFO")
350      CALL cp_print_key_finished_output(master%comlog_unit, logger, &
351                                        master%swarm_section, "PRINT%COMMUNICATION_LOG")
352
353      !CALL rm_timer_env() !pops the top-most timer
354   END SUBROUTINE swarm_master_finalize
355
356END MODULE swarm_master
357
358