1!--------------------------------------------------------------------------------------------------! 2! CP2K: A general program to perform molecular dynamics simulations ! 3! Copyright (C) 2000 - 2019 CP2K developers group ! 4!--------------------------------------------------------------------------------------------------! 5 6! ************************************************************************************************** 7MODULE farming_methods 8 USE cp_files, ONLY: get_unit_number 9 USE cp_log_handling, ONLY: cp_get_default_logger,& 10 cp_logger_type 11 USE cp_output_handling, ONLY: cp_print_key_finished_output,& 12 cp_print_key_generate_filename,& 13 cp_print_key_unit_nr 14 USE cp_para_types, ONLY: cp_para_env_type 15 USE farming_types, ONLY: farming_env_type,& 16 init_job_type,& 17 job_finished,& 18 job_pending,& 19 job_running 20 USE input_section_types, ONLY: section_vals_get,& 21 section_vals_get_subs_vals,& 22 section_vals_type,& 23 section_vals_val_get 24 USE message_passing, ONLY: mp_bcast 25#include "./base/base_uses.f90" 26 27 IMPLICIT NONE 28 PRIVATE 29 PUBLIC :: farming_parse_input, get_next_job 30 31 ! must be negative in order to avoid confusion with job numbers 32 INTEGER, PARAMETER, PUBLIC :: do_nothing = -1, & 33 do_wait = -2, & 34 do_deadlock = -3 35 36 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'farming_methods' 37 38CONTAINS 39 40! ************************************************************************************************** 41!> \brief ... 42!> \param farming_env ... 43!> \param start ... 44!> \param END ... 45!> \param current ... 46!> \param todo ... 47! ************************************************************************************************** 48 SUBROUTINE get_next_job(farming_env, start, END, current, todo) 49 TYPE(farming_env_type), POINTER :: farming_env 50 INTEGER, INTENT(IN) :: start, END 51 INTEGER, INTENT(INOUT) :: current 52 INTEGER, INTENT(OUT) :: todo 53 54 INTEGER :: icheck, idep, itry, ndep 55 LOGICAL :: dep_ok 56 57 IF (farming_env%cycle) THEN 58 IF (current < start) THEN 59 current = start 60 ELSE 61 current = current + 1 62 ENDIF 63 IF (current > END) THEN 64 todo = do_nothing 65 ELSE 66 todo = MODULO(current - 1, farming_env%njobs) + 1 67 ENDIF 68 ELSE 69 ! find a pending job 70 itry = start 71 todo = do_nothing 72 DO itry = start, END 73 IF (farming_env%job(itry)%status == job_pending) THEN 74 75 ! see if all dependencies are OK 76 ndep = SIZE(farming_env%job(itry)%dependencies) 77 dep_ok = .TRUE. 78 dep: DO idep = 1, ndep 79 DO icheck = start, END 80 IF (farming_env%job(icheck)%status .NE. job_finished) THEN 81 IF (farming_env%job(icheck)%id == farming_env%job(itry)%dependencies(idep)) THEN 82 dep_ok = .FALSE. 83 EXIT dep 84 ENDIF 85 ENDIF 86 ENDDO 87 ENDDO dep 88 89 ! if there are pending jobs, the slave can not be told to stop 90 ! at least wait if there are unresolved dependencies 91 IF (dep_OK) THEN 92 todo = itry 93 EXIT 94 ELSE 95 todo = do_wait 96 ENDIF 97 ENDIF 98 ENDDO 99 ! If we have to wait, but there are no running jobs we are deadlocked 100 ! which we signal 101 IF (todo == do_wait) THEN 102 dep_OK = .FALSE. 103 DO itry = start, END 104 IF (farming_env%job(itry)%status .EQ. job_running) dep_OK = .TRUE. 105 ENDDO 106 IF (.NOT. dep_OK) todo = do_deadlock 107 ENDIF 108 ENDIF 109 END SUBROUTINE get_next_job 110 111! ************************************************************************************************** 112!> \brief ... 113!> \param farming_env ... 114!> \param root_section ... 115!> \param para_env ... 116! ************************************************************************************************** 117 SUBROUTINE farming_parse_input(farming_env, root_section, para_env) 118 TYPE(farming_env_type), POINTER :: farming_env 119 TYPE(section_vals_type), POINTER :: root_section 120 TYPE(cp_para_env_type), POINTER :: para_env 121 122 CHARACTER(len=*), PARAMETER :: routineN = 'farming_parse_input', & 123 routineP = moduleN//':'//routineN 124 125 CHARACTER(LEN=3) :: text 126 INTEGER :: i, iunit, n_rep_val, num_slaves, & 127 output_unit, stat 128 INTEGER, DIMENSION(:), POINTER :: dependencies, i_vals 129 LOGICAL :: explicit, has_dep 130 TYPE(cp_logger_type), POINTER :: logger 131 TYPE(section_vals_type), POINTER :: farming_section, jobs_section, print_key 132 133 NULLIFY (farming_section, jobs_section, print_key, logger, dependencies, i_vals) 134 logger => cp_get_default_logger() 135 farming_env%group_size_wish_set = .FALSE. 136 farming_env%ngroup_wish_set = .FALSE. 137 farming_section => section_vals_get_subs_vals(root_section, "FARMING") 138 139 IF (ASSOCIATED(farming_env%group_partition)) THEN 140 DEALLOCATE (farming_env%group_partition) 141 END IF 142 143 ! The following input order is used 144 ! 1) GROUP_PARTITION 145 ! 2) NGROUP 146 ! 3) GROUP_SIZE (default 8) 147 CALL section_vals_val_get(farming_section, "GROUP_PARTITION", & 148 n_rep_val=n_rep_val) 149 IF (n_rep_val > 0) THEN 150 CALL section_vals_val_get(farming_section, "GROUP_PARTITION", & 151 i_vals=i_vals) 152 ALLOCATE (farming_env%group_partition(0:SIZE(i_vals) - 1)) 153 farming_env%group_partition(:) = i_vals 154 farming_env%ngroup_wish_set = .TRUE. 155 farming_env%ngroup_wish = SIZE(i_vals) 156 ELSE 157 CALL section_vals_val_get(farming_section, "NGROUP", & 158 n_rep_val=n_rep_val) 159 IF (n_rep_val > 0) THEN 160 CALL section_vals_val_get(farming_section, "NGROUP", & 161 i_val=farming_env%ngroup_wish) 162 farming_env%ngroup_wish_set = .TRUE. 163 ELSE 164 CALL section_vals_val_get(farming_section, "GROUP_SIZE", & 165 i_val=farming_env%group_size_wish) 166 farming_env%group_size_wish_set = .TRUE. 167 END IF 168 END IF 169 CALL section_vals_val_get(farming_section, "STRIDE", & 170 i_val=farming_env%stride) 171 172 CALL section_vals_val_get(farming_section, "RESTART_FILE_NAME", & 173 explicit=explicit) 174 IF (explicit) THEN 175 CALL section_vals_val_get(farming_section, "RESTART_FILE_NAME", & 176 c_val=farming_env%restart_file_name) 177 ELSE 178 print_key => section_vals_get_subs_vals(farming_section, "RESTART") 179 farming_env%restart_file_name = cp_print_key_generate_filename(logger, print_key, extension=".restart", & 180 my_local=.FALSE.) 181 END IF 182 183 CALL section_vals_val_get(farming_section, "DO_RESTART", & 184 l_val=farming_env%restart) 185 CALL section_vals_val_get(farming_section, "MAX_JOBS_PER_GROUP", & 186 i_val=farming_env%max_steps) 187 CALL section_vals_val_get(farming_section, "CYCLE", & 188 l_val=farming_env%cycle) 189 CALL section_vals_val_get(farming_section, "WAIT_TIME", & 190 r_val=farming_env%wait_time) 191 192 CALL section_vals_val_get(farming_section, "MASTER_SLAVE", & 193 l_val=farming_env%master_slave) 194 195 jobs_section => section_vals_get_subs_vals(farming_section, "JOB") 196 CALL section_vals_get(jobs_section, n_repetition=farming_env%njobs) 197 198 ALLOCATE (farming_env%Job(farming_env%njobs)) 199 CALL init_job_type(farming_env%job) 200 201 has_dep = .FALSE. 202 DO i = 1, farming_env%njobs 203 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 204 keyword_name="DIRECTORY", c_val=farming_env%Job(i)%cwd) 205 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 206 keyword_name="INPUT_FILE_NAME", c_val=farming_env%Job(i)%input) 207 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 208 keyword_name="OUTPUT_FILE_NAME", c_val=farming_env%Job(i)%output) 209 210 ! if job id is not specified the job id is the index 211 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 212 keyword_name="JOB_ID", n_rep_val=n_rep_val) 213 IF (n_rep_val == 0) THEN 214 farming_env%Job(i)%id = i 215 ELSE 216 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 217 keyword_name="JOB_ID", i_val=farming_env%Job(i)%id) 218 ENDIF 219 220 ! get dependencies 221 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 222 keyword_name="DEPENDENCIES", n_rep_val=n_rep_val) 223 IF (n_rep_val == 0) THEN 224 ALLOCATE (farming_env%Job(i)%dependencies(0)) 225 ELSE 226 CALL section_vals_val_get(jobs_section, i_rep_section=i, & 227 keyword_name="DEPENDENCIES", i_vals=dependencies) 228 ALLOCATE (farming_env%Job(i)%dependencies(SIZE(dependencies, 1))) 229 farming_env%Job(i)%dependencies = dependencies 230 IF (SIZE(dependencies, 1) .NE. 0) has_dep = .TRUE. 231 ENDIF 232 END DO 233 234 IF (has_dep) THEN 235 CPASSERT(farming_env%master_slave) 236 CPASSERT(.NOT. farming_env%cycle) 237 ENDIF 238 239 output_unit = cp_print_key_unit_nr(logger, farming_section, "PROGRAM_RUN_INFO", & 240 extension=".log") 241 242 ! master slave not supported 243 IF (para_env%num_pe == 1) THEN 244 farming_env%master_slave = .FALSE. 245 WRITE (output_unit, FMT="(T2,A)") "FARMING| Master-slave setup not support for serial runs" 246 ENDIF 247 IF (farming_env%master_slave) THEN 248 num_slaves = para_env%num_pe - 1 249 ELSE 250 num_slaves = para_env%num_pe 251 ENDIF 252 253 IF (output_unit > 0) THEN 254 WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of jobs found", farming_env%njobs 255 IF (farming_env%ngroup_wish_set) THEN 256 WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Ngroup wish:", farming_env%ngroup_wish 257 IF (ASSOCIATED(farming_env%group_partition)) THEN 258 WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| User partition:" 259 DO i = 0, SIZE(farming_env%group_partition) - 1 260 IF (MODULO(i, 4) == 0) WRITE (output_unit, *) 261 WRITE (output_unit, FMT='(I4)', ADVANCE="NO") farming_env%group_partition(i) 262 END DO 263 WRITE (output_unit, *) 264 IF (SUM(farming_env%group_partition) .NE. num_slaves) THEN 265 WRITE (output_unit, FMT="(T2,A,T61,I10,T71,I10)") & 266 "FARMING| WARNING : group partition CPUs not equal to the available number (ignoring master) ", & 267 num_slaves, SUM(farming_env%group_partition) 268 WRITE (output_unit, FMT="(T2,A)") "FARMING| partition data ignored" ! any better idea ?? 269 DEALLOCATE (farming_env%group_partition) 270 ENDIF 271 ENDIF 272 ENDIF 273 IF (farming_env%group_size_wish_set) THEN 274 WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Group size wish:", & 275 farming_env%group_size_wish 276 ENDIF 277 WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Max steps :", farming_env%max_steps 278 IF (farming_env%cycle) THEN 279 text = "YES" 280 ELSE 281 text = " NO" 282 ENDIF 283 WRITE (output_unit, FMT="(T2,A,T78,A3)") "FARMING| Cyclic jobs execution:", text 284 IF (farming_env%restart) THEN 285 text = "YES" 286 ELSE 287 text = " NO" 288 ENDIF 289 WRITE (output_unit, FMT="(T2,A,T78,A3)") "FARMING| Restarting farm:", text 290 farming_env%restart_n = 1 291 IF (farming_env%restart) THEN 292 iunit = get_unit_number() 293 OPEN (UNIT=iunit, FILE=farming_env%restart_file_name, IOSTAT=stat) 294 IF (stat == 0) THEN 295 READ (UNIT=iunit, FMT=*, IOSTAT=stat) farming_env%restart_n 296 IF (stat /= 0) THEN 297 WRITE (output_unit, "(T2,A)") & 298 "FARMING| ---- WARNING ---- failed to read from ("//TRIM(farming_env%restart_file_name)//") starting at 1" 299 ELSE 300 WRITE (output_unit, "(T2,A)") & 301 "FARMING| restarting from ("//TRIM(farming_env%restart_file_name)//")" 302 WRITE (output_unit, "(T2,A,T71,I10)") & 303 "FARMING| restarting at ", farming_env%restart_n 304 ENDIF 305 ELSE 306 WRITE (output_unit, "(T2,A)") & 307 "FARMING| ---- WARNING ---- failed to open ("//TRIM(farming_env%restart_file_name)//"), starting at 1" 308 ENDIF 309 CLOSE (iunit, IOSTAT=stat) 310 ENDIF 311 312 CALL cp_print_key_finished_output(output_unit, logger, farming_section, & 313 "PROGRAM_RUN_INFO") 314 ENDIF 315 CALL mp_bcast(farming_env%restart_n, para_env%source, para_env%group) 316 317 END SUBROUTINE 318 319END MODULE farming_methods 320