1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2020 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7!> \brief Master's routines for the swarm-framework 8!> \author Ole Schuett 9! ************************************************************************************************** 10MODULE swarm_master 11 USE cp_external_control, ONLY: external_control 12 USE cp_log_handling, ONLY: cp_get_default_logger,& 13 cp_logger_type 14 USE cp_output_handling, ONLY: cp_print_key_finished_output,& 15 cp_print_key_unit_nr 16 USE cp_para_types, ONLY: cp_para_env_type 17 USE cp_parser_types, ONLY: cp_parser_type,& 18 parser_create,& 19 parser_release 20 USE glbopt_master, ONLY: glbopt_master_finalize,& 21 glbopt_master_init,& 22 glbopt_master_steer,& 23 glbopt_master_type 24 USE global_types, ONLY: global_environment_type 25 USE input_constants, ONLY: swarm_do_glbopt 26 USE input_section_types, ONLY: section_vals_get_subs_vals,& 27 section_vals_type,& 28 section_vals_val_get 29 USE kinds, ONLY: default_path_length,& 30 default_string_length 31 USE swarm_message, ONLY: swarm_message_add,& 32 swarm_message_equal,& 33 swarm_message_file_read,& 34 swarm_message_file_write,& 35 swarm_message_free,& 36 swarm_message_get,& 37 swarm_message_type 38#include "../base/base_uses.f90" 39 40 IMPLICIT NONE 41 PRIVATE 42 43 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'swarm_master' 44 45 PUBLIC :: swarm_master_type 46 PUBLIC :: swarm_master_init, swarm_master_finalize 47 PUBLIC :: swarm_master_steer 48 49 TYPE swarm_message_p_type 50 TYPE(swarm_message_type), POINTER :: p => Null() 51 END TYPE swarm_message_p_type 52 53 TYPE swarm_master_type 54 PRIVATE 55 INTEGER :: behavior = -1 56 TYPE(glbopt_master_type), POINTER :: glbopt => Null() 57 !possibly more behaviors ... 58 INTEGER :: iw = 0 59 INTEGER :: i_iteration = 0 60 INTEGER :: max_iter = 0 61 LOGICAL :: should_stop = .FALSE. 62 INTEGER :: n_workers = -1 63 INTEGER :: comlog_unit 64 TYPE(section_vals_type), POINTER :: swarm_section => Null() 65 TYPE(cp_para_env_type), POINTER :: para_env => Null() 66 TYPE(swarm_message_p_type), DIMENSION(:), POINTER :: queued_commands => Null() 67 TYPE(global_environment_type), POINTER :: globenv => Null() 68 LOGICAL :: ignore_last_iteration = .FALSE. 69 INTEGER :: n_waiting = 0 70 END TYPE swarm_master_type 71 72CONTAINS 73 74! ************************************************************************************************** 75!> \brief Initializes the swarm master 76!> \param master ... 77!> \param para_env ... 78!> \param globenv ... 79!> \param root_section ... 80!> \param n_workers ... 81!> \author Ole Schuett 82! ************************************************************************************************** 83 SUBROUTINE swarm_master_init(master, para_env, globenv, root_section, n_workers) 84 TYPE(swarm_master_type) :: master 85 TYPE(cp_para_env_type), POINTER :: para_env 86 TYPE(global_environment_type), POINTER :: globenv 87 TYPE(section_vals_type), POINTER :: root_section 88 INTEGER, INTENT(IN) :: n_workers 89 90 CHARACTER(len=*), PARAMETER :: routineN = 'swarm_master_init', & 91 routineP = moduleN//':'//routineN 92 93 TYPE(cp_logger_type), POINTER :: logger 94 95 master%swarm_section => section_vals_get_subs_vals(root_section, "SWARM") 96 97 logger => cp_get_default_logger() 98 master%n_workers = n_workers 99 master%para_env => para_env 100 master%globenv => globenv 101 ALLOCATE (master%queued_commands(master%n_workers)) 102 master%iw = cp_print_key_unit_nr(logger, master%swarm_section, & 103 "PRINT%MASTER_RUN_INFO", extension=".masterLog") 104 105 CALL section_vals_val_get(master%swarm_section, "BEHAVIOR", i_val=master%behavior) 106 107 ! uses logger%iter_info%project_name to construct filename 108 master%comlog_unit = cp_print_key_unit_nr(logger, master%swarm_section, "PRINT%COMMUNICATION_LOG", & 109 !middle_name="comlog", extension=".xyz", & 110 extension=".comlog", & 111 file_action="WRITE", file_position="REWIND") 112 113 CALL section_vals_val_get(master%swarm_section, "MAX_ITER", i_val=master%max_iter) 114 115 SELECT CASE (master%behavior) 116 CASE (swarm_do_glbopt) 117 ALLOCATE (master%glbopt) 118 CALL glbopt_master_init(master%glbopt, para_env, root_section, n_workers, master%iw) 119 CASE DEFAULT 120 CPABORT("got unknown behavior") 121 END SELECT 122 123 CALL replay_comlog(master) 124 END SUBROUTINE swarm_master_init 125 126! ************************************************************************************************** 127!> \brief Helper routine for swarm_master_init, restarts a calculation 128!> \param master ... 129!> \author Ole Schuett 130! ************************************************************************************************** 131 SUBROUTINE replay_comlog(master) 132 TYPE(swarm_master_type) :: master 133 134 CHARACTER(len=*), PARAMETER :: routineN = 'replay_comlog', routineP = moduleN//':'//routineN 135 136 CHARACTER(LEN=default_path_length) :: filename 137 CHARACTER(LEN=default_string_length) :: command_log 138 INTEGER :: handle, i, worker_id 139 LOGICAL :: at_end, explicit 140 TYPE(cp_parser_type), POINTER :: parser 141 TYPE(swarm_message_type) :: cmd_log, report_log 142 TYPE(swarm_message_type), & 143 DIMENSION(master%n_workers) :: last_commands 144 TYPE(swarm_message_type), POINTER :: cmd_now 145 146 NULLIFY (parser) 147 148 ! Initialize parser for trajectory 149 CALL section_vals_val_get(master%swarm_section, "REPLAY_COMMUNICATION_LOG", & 150 c_val=filename, explicit=explicit) 151 152 IF (.NOT. explicit) RETURN 153 IF (master%iw > 0) WRITE (master%iw, '(A,A)') & 154 " SWARM| Starting replay of communication-log: ", TRIM(filename) 155 156 CALL timeset("swarm_master_replay_comlog", handle) 157 CALL parser_create(parser, filename, para_env=master%para_env) 158 159 at_end = .FALSE. 160 DO 161 CALL swarm_message_file_read(report_log, parser, at_end) 162 IF (at_end) EXIT 163 164 CALL swarm_message_file_read(cmd_log, parser, at_end) 165 IF (at_end) EXIT 166 167 ALLOCATE (cmd_now) 168 CALL swarm_master_steer(master, report_log, cmd_now) 169 170 !TODO: maybe we should just exit the loop instead of stopping? 171 CALL swarm_message_get(cmd_log, "command", command_log) 172 IF (TRIM(command_log) /= "shutdown") THEN 173 IF (.NOT. commands_equal(cmd_now, cmd_log, master%iw)) CPABORT("wrong behaviour") 174 END IF 175 176 CALL swarm_message_free(cmd_log) 177 CALL swarm_message_free(report_log) 178 CALL swarm_message_get(cmd_now, "worker_id", worker_id) 179 CALL swarm_message_free(last_commands(worker_id)) 180 last_commands(worker_id) = cmd_now 181 DEALLOCATE (cmd_now) 182 END DO 183 184 CALL swarm_message_free(report_log) !don't worry about double-frees 185 CALL swarm_message_free(cmd_log) 186 187 IF (master%iw > 0) WRITE (master%iw, '(A,A)') & 188 " SWARM| Reached end of communication log. Queueing last commands." 189 190 DO i = 1, master%n_workers 191 ALLOCATE (master%queued_commands(i)%p) 192 master%queued_commands(i)%p = last_commands(i) 193 END DO 194 195 CALL parser_release(parser) 196 CALL timestop(handle) 197 END SUBROUTINE replay_comlog 198 199! ************************************************************************************************** 200!> \brief Helper routine for replay_comlog, compares two commands 201!> \param cmd1 ... 202!> \param cmd2 ... 203!> \param iw ... 204!> \return ... 205!> \author Ole Schuett 206! ************************************************************************************************** 207 FUNCTION commands_equal(cmd1, cmd2, iw) RESULT(res) 208 TYPE(swarm_message_type) :: cmd1, cmd2 209 INTEGER :: iw 210 LOGICAL :: res 211 212 res = swarm_message_equal(cmd1, cmd2) 213 IF (.NOT. res .AND. iw > 0) THEN 214 WRITE (iw, *) "Command 1:" 215 CALL swarm_message_file_write(cmd1, iw) 216 WRITE (iw, *) "Command 2:" 217 CALL swarm_message_file_write(cmd2, iw) 218 END IF 219 END FUNCTION commands_equal 220 221! ************************************************************************************************** 222!> \brief Central steering routine of the swarm master 223!> \param master ... 224!> \param report ... 225!> \param cmd ... 226!> \author Ole Schuett 227! ************************************************************************************************** 228 SUBROUTINE swarm_master_steer(master, report, cmd) 229 TYPE(swarm_master_type), INTENT(INOUT) :: master 230 TYPE(swarm_message_type), INTENT(IN) :: report 231 TYPE(swarm_message_type), INTENT(OUT) :: cmd 232 233 CHARACTER(len=*), PARAMETER :: routineN = 'swarm_master_steer', & 234 routineP = moduleN//':'//routineN 235 236 CHARACTER(len=default_string_length) :: command, status 237 INTEGER :: handle, worker_id 238 LOGICAL :: should_stop 239 240 should_stop = .FALSE. 241 242 CALL timeset("swarm_master_steer", handle) 243 244 ! First check if there are queued commands for this worker 245 CALL swarm_message_get(report, "worker_id", worker_id) 246 247 IF (ASSOCIATED(master%queued_commands(worker_id)%p)) THEN 248 cmd = master%queued_commands(worker_id)%p 249 DEALLOCATE (master%queued_commands(worker_id)%p) 250 IF (master%iw > 0) WRITE (master%iw, '(A,A,A,I9,1X,A)') ' SWARM| ', & 251 REPEAT("*", 9), " Sending out queued command to worker: ", & 252 worker_id, REPEAT("*", 9) 253 CALL timestop(handle) 254 RETURN 255 END IF 256 257 IF (.NOT. master%ignore_last_iteration) THEN 258 ! There are no queued commands. Do the normal processing. 259 master%i_iteration = master%i_iteration + 1 260 261 IF (master%iw > 0) WRITE (master%iw, '(A,A,1X,I8,A,A)') ' SWARM| ', REPEAT("*", 15), & 262 master%i_iteration, ' Master / Worker Communication ', REPEAT("*", 15) 263 ENDIF 264 265 IF (master%i_iteration >= master%max_iter .AND. .NOT. master%should_stop) THEN 266 IF (master%iw > 0) WRITE (master%iw, '(A)') " SWARM| Reached MAX_ITER. Quitting." 267 master%should_stop = .TRUE. 268 ENDIF 269 270 IF (.NOT. master%should_stop) THEN 271 CALL external_control(master%should_stop, "SWARM", master%globenv) 272 IF (master%should_stop .AND. master%iw > 0) & 273 WRITE (master%iw, *) " SWARM| Received stop from external_control. Quitting." 274 END IF 275 276 !IF(unit > 0) & 277 278 IF (master%should_stop) THEN 279 CALL swarm_message_add(cmd, "command", "shutdown") 280 IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') & 281 "SWARM| Sending shutdown command to worker", worker_id 282 ELSE 283 SELECT CASE (master%behavior) 284 CASE (swarm_do_glbopt) 285 CALL glbopt_master_steer(master%glbopt, report, cmd, should_stop) 286 CASE DEFAULT 287 CPABORT("got unknown behavior") 288 END SELECT 289 290 IF (should_stop) THEN 291 CALL swarm_message_free(cmd) 292 CALL swarm_message_add(cmd, "command", "shutdown") !overwrite command 293 IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') & 294 "SWARM| Sending shutdown command to worker", worker_id 295 master%should_stop = .TRUE. 296 ENDIF 297 ENDIF 298 299 CALL swarm_message_add(cmd, "worker_id", worker_id) 300 301 ! Don't pollute comlog with "continue waiting"-commands. 302 CALL swarm_message_get(report, "status", status) 303 CALL swarm_message_get(cmd, "command", command) 304 IF (TRIM(status) == "wait_done") master%n_waiting = master%n_waiting - 1 305 IF (TRIM(command) == "wait") master%n_waiting = master%n_waiting + 1 306 IF (master%n_waiting < 0) CPABORT("master%n_waiting < 0") 307 IF (TRIM(status) /= "wait_done" .OR. TRIM(command) /= "wait") THEN 308 CALL swarm_message_file_write(report, master%comlog_unit) 309 CALL swarm_message_file_write(cmd, master%comlog_unit) 310 IF (master%n_waiting > 0 .AND. master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') & 311 "SWARM| Number of waiting workers:", master%n_waiting 312 master%ignore_last_iteration = .FALSE. 313 ELSE 314 master%ignore_last_iteration = .TRUE. 315 END IF 316 CALL timestop(handle) 317 END SUBROUTINE swarm_master_steer 318 319! ************************************************************************************************** 320!> \brief Finalizes the swarm master 321!> \param master ... 322!> \author Ole Schuett 323! ************************************************************************************************** 324 SUBROUTINE swarm_master_finalize(master) 325 TYPE(swarm_master_type) :: master 326 327 CHARACTER(len=*), PARAMETER :: routineN = 'swarm_master_finalize', & 328 routineP = moduleN//':'//routineN 329 330 TYPE(cp_logger_type), POINTER :: logger 331 332 IF (master%iw > 0) THEN 333 WRITE (master%iw, "(1X,A,T71,I10)") "SWARM| Total number of iterations ", master%i_iteration 334 WRITE (master%iw, "(A)") " SWARM| Shutting down the master." 335 ENDIF 336 337 SELECT CASE (master%behavior) 338 CASE (swarm_do_glbopt) 339 CALL glbopt_master_finalize(master%glbopt) 340 DEALLOCATE (master%glbopt) 341 CASE DEFAULT 342 CPABORT("got unknown behavior") 343 END SELECT 344 345 DEALLOCATE (master%queued_commands) 346 347 logger => cp_get_default_logger() 348 CALL cp_print_key_finished_output(master%iw, logger, & 349 master%swarm_section, "PRINT%MASTER_RUN_INFO") 350 CALL cp_print_key_finished_output(master%comlog_unit, logger, & 351 master%swarm_section, "PRINT%COMMUNICATION_LOG") 352 353 !CALL rm_timer_env() !pops the top-most timer 354 END SUBROUTINE swarm_master_finalize 355 356END MODULE swarm_master 357 358