1!--------------------------------------------------------------------------------------------------! 2! Copyright (C) by the DBCSR developers group - All rights reserved ! 3! This file is part of the DBCSR library. ! 4! ! 5! For information on the license, see the LICENSE file. ! 6! For further information please visit https://dbcsr.cp2k.org ! 7! SPDX-License-Identifier: GPL-2.0+ ! 8!--------------------------------------------------------------------------------------------------! 9 10MODULE dbcsr_work_operations 11 !! DBCSR work matrix utilities 12 13 USE dbcsr_array_types, ONLY: array_data, & 14 array_hold, & 15 array_i1d_obj, & 16 array_new, & 17 array_nullify, & 18 array_release, & 19 array_size 20 USE dbcsr_btree, ONLY: btree_data_cp2d, & 21 btree_data_dp2d, & 22 btree_data_sp2d, & 23 btree_data_zp2d 24 USE dbcsr_block_operations, ONLY: block_copy_c, & 25 block_copy_d, & 26 block_copy_s, & 27 block_copy_z, & 28 dbcsr_data_copy, & 29 dbcsr_data_set 30 USE dbcsr_config, ONLY: default_resize_factor 31 USE dbcsr_data_methods, ONLY: & 32 dbcsr_data_clear_pointer, dbcsr_data_ensure_size, dbcsr_data_get_memory_type, & 33 dbcsr_data_get_size, dbcsr_data_get_size_referenced, dbcsr_data_get_type, dbcsr_data_hold, & 34 dbcsr_data_init, dbcsr_data_new, dbcsr_data_release, dbcsr_data_set_size_referenced, & 35 dbcsr_data_valid, dbcsr_get_data_p_c, dbcsr_get_data_p_d, dbcsr_get_data_p_s, & 36 dbcsr_get_data_p_z 37 USE dbcsr_data_operations, ONLY: dbcsr_data_copyall, & 38 dbcsr_sort_data, & 39 dbcsr_switch_data_area 40 USE dbcsr_dist_methods, ONLY: dbcsr_distribution_has_threads, & 41 dbcsr_distribution_hold, & 42 dbcsr_distribution_make_threads, & 43 dbcsr_distribution_ncols, & 44 dbcsr_distribution_nrows, & 45 dbcsr_distribution_release 46 USE dbcsr_index_operations, ONLY: dbcsr_addto_index_array, & 47 dbcsr_build_row_index, & 48 dbcsr_clearfrom_index_array, & 49 dbcsr_index_prune_deleted, & 50 dbcsr_make_dbcsr_index, & 51 dbcsr_make_index_exist, & 52 dbcsr_repoint_index, & 53 dbcsr_sort_indices 54 USE dbcsr_iterator_operations, ONLY: dbcsr_iterator_blocks_left, & 55 dbcsr_iterator_next_block, & 56 dbcsr_iterator_start, & 57 dbcsr_iterator_stop 58 USE dbcsr_mem_methods, ONLY: dbcsr_memtype_equal 59 USE dbcsr_methods, ONLY: & 60 dbcsr_col_block_sizes, dbcsr_distribution, dbcsr_get_data_memory_type, & 61 dbcsr_get_data_size_used, dbcsr_get_data_type, dbcsr_get_index_memory_type, & 62 dbcsr_get_matrix_type, dbcsr_get_replication_type, dbcsr_matrix_counter, & 63 dbcsr_max_col_size, dbcsr_max_row_size, dbcsr_mutable_destroy, dbcsr_mutable_init, & 64 dbcsr_mutable_instantiated, dbcsr_mutable_new, dbcsr_mutable_release, dbcsr_name, & 65 dbcsr_row_block_sizes, dbcsr_use_mutable, dbcsr_valid_index, dbcsr_wm_use_mutable 66 USE dbcsr_ptr_util, ONLY: ensure_array_size 67 USE dbcsr_toollib, ONLY: dbcsr_unpack_i8_2i4, & 68 sort 69 USE dbcsr_string_utilities, ONLY: uppercase 70 USE dbcsr_types, ONLY: & 71 dbcsr_data_obj, dbcsr_distribution_obj, dbcsr_iterator, dbcsr_memtype_default, & 72 dbcsr_memtype_type, dbcsr_meta_size, dbcsr_num_slots, dbcsr_repl_col, dbcsr_repl_full, & 73 dbcsr_repl_none, dbcsr_repl_row, dbcsr_slot_blk_p, dbcsr_slot_col_i, dbcsr_slot_home_coli, & 74 dbcsr_slot_home_pcol, dbcsr_slot_home_prow, dbcsr_slot_home_rowi, dbcsr_slot_home_vpcol, & 75 dbcsr_slot_home_vprow, dbcsr_slot_nblkcols_local, dbcsr_slot_nblkcols_total, & 76 dbcsr_slot_nblkrows_local, dbcsr_slot_nblkrows_total, dbcsr_slot_nblks, & 77 dbcsr_slot_nfullcols_local, dbcsr_slot_nfullcols_total, dbcsr_slot_nfullrows_local, & 78 dbcsr_slot_nfullrows_total, dbcsr_slot_nze, dbcsr_slot_row_p, dbcsr_slot_size, dbcsr_type, & 79 dbcsr_type_antihermitian, dbcsr_type_antisymmetric, dbcsr_type_complex_4, & 80 dbcsr_type_complex_8, dbcsr_type_hermitian, dbcsr_type_no_symmetry, dbcsr_type_real_4, & 81 dbcsr_type_real_8, dbcsr_type_real_default, dbcsr_type_symmetric, dbcsr_work_type 82 USE dbcsr_dist_util, ONLY: convert_sizes_to_offsets, & 83 dbcsr_calc_block_sizes, & 84 dbcsr_verify_matrix, & 85 meta_from_dist 86 USE dbcsr_kinds, ONLY: default_string_length, & 87 int_8, & 88 real_4, & 89 real_8 90#include "base/dbcsr_base_uses.f90" 91 92!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads, omp_in_parallel 93 94 IMPLICIT NONE 95 PRIVATE 96 97 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbcsr_work_operations' 98 99 LOGICAL, PARAMETER :: careful_mod = .FALSE. 100 101 PUBLIC :: dbcsr_create, dbcsr_work_create, dbcsr_finalize, dbcsr_special_finalize 102 PUBLIC :: dbcsr_add_wm_from_matrix, & 103 add_work_coordinate 104 PUBLIC :: dbcsr_work_destroy 105 106 INTERFACE dbcsr_create 107 MODULE PROCEDURE dbcsr_create_new, dbcsr_create_template 108 END INTERFACE 109 110 TYPE i_array_p 111 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: p 112 END TYPE i_array_p 113 114CONTAINS 115 116 SUBROUTINE dbcsr_create_new(matrix, name, dist, matrix_type, & 117 row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, & 118 nze, data_type, data_buffer, & 119 data_memory_type, index_memory_type, & 120 max_rbs, max_cbs, & 121 row_blk_offset, col_blk_offset, & 122 thread_dist, & 123 reuse, reuse_arrays, mutable_work, make_index, replication_type) 124 !! Creates a matrix, allocating the essentials. 125 !! 126 !! The matrix itself is allocated, as well as the essential parts of 127 !! the index. When passed the nze argument, the data is also allocated 128 !! to that size. 129 !! see dbcsr_types.F 130 131 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 132 !! new matrix 133 CHARACTER(len=*), INTENT(IN) :: name 134 TYPE(dbcsr_distribution_obj), INTENT(IN) :: dist 135 !! distribution_2d distribution 136 CHARACTER, INTENT(IN) :: matrix_type 137 !! 'N' for normal, 'T' for transposed, 'S' for symmetric, and 'A' for antisymmetric 138 INTEGER, DIMENSION(:), INTENT(IN), POINTER, & 139 CONTIGUOUS, OPTIONAL :: row_blk_size, col_blk_size 140 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_size_obj, col_blk_size_obj 141 INTEGER, INTENT(IN), OPTIONAL :: nze, data_type 142 !! number of elements 143 !! type of data from 'rRcC' for single/double precision real/complex, default is 'R' 144 TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL :: data_buffer 145 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: data_memory_type, index_memory_type 146 !! allocate indices and data using special memory 147 !! allocate indices using special memory 148 INTEGER, INTENT(IN), OPTIONAL :: max_rbs, max_cbs 149 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_offset, col_blk_offset 150 TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: thread_dist 151 LOGICAL, INTENT(IN), OPTIONAL :: reuse, reuse_arrays, mutable_work, & 152 make_index 153 !! reuses an existing matrix, default is to create a fresh one 154 !! uses the mutable data for working and not the append-only data; default is append-only 155 CHARACTER, INTENT(IN), OPTIONAL :: replication_type 156 !! replication to be used for this matrix; default is dbcsr_repl_none 157 158 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_new', & 159 routineP = moduleN//':'//routineN 160 161 CHARACTER :: matrix_type_l 162 INTEGER :: handle, my_nze 163 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: vec_col_blk_offset, vec_row_blk_offset 164 INTEGER, DIMENSION(dbcsr_meta_size) :: new_meta 165 LOGICAL :: hijack, my_make_index 166 167! --------------------------------------------------------------------------- 168 169 MARK_USED(thread_dist) ! only used with OMP 170 171 CALL timeset(routineN, handle) 172 173 ! Reuse matrix only if has actually been allocated. 174 hijack = ASSOCIATED(matrix%index) 175 IF (PRESENT(reuse)) hijack = reuse 176 177 my_make_index = .TRUE. 178 IF (PRESENT(make_index)) my_make_index = make_index 179 180 IF (.NOT. hijack) THEN 181 matrix = dbcsr_type() 182 matrix%refcount = 1 183 ENDIF 184!$OMP CRITICAL (crit_counter) 185 matrix%serial_number = dbcsr_matrix_counter 186 dbcsr_matrix_counter = dbcsr_matrix_counter + 1 187!$OMP END CRITICAL (crit_counter) 188 ! Mark matrix index as having an invalid index. 189 matrix%valid = .FALSE. 190 matrix%name = name 191 ! Sets the type of matrix building/modifying work structures. 192 IF (PRESENT(mutable_work)) THEN 193 matrix%work_mutable = mutable_work 194 ELSE 195 matrix%work_mutable = .FALSE. 196 ENDIF 197 ! Sets the correct data type. 198 IF (PRESENT(data_type)) THEN 199 SELECT CASE (data_type) 200 CASE (dbcsr_type_real_4) 201 matrix%data_type = dbcsr_type_real_4 202 CASE (dbcsr_type_real_8) 203 matrix%data_type = dbcsr_type_real_8 204 CASE (dbcsr_type_complex_4) 205 matrix%data_type = dbcsr_type_complex_4 206 CASE (dbcsr_type_complex_8) 207 matrix%data_type = dbcsr_type_complex_8 208 CASE DEFAULT 209 DBCSR_ABORT("Invalid matrix type") 210 END SELECT 211 ELSE 212 matrix%data_type = dbcsr_type_real_default 213 ENDIF 214 215 matrix%data_memory_type = dbcsr_memtype_default 216 IF (PRESENT(data_memory_type)) & 217 matrix%data_memory_type = data_memory_type 218 219 matrix%index_memory_type = dbcsr_memtype_default 220 IF (PRESENT(index_memory_type)) & 221 matrix%index_memory_type = index_memory_type 222 223 IF (hijack) THEN 224 ! Release/deallocate elements that are replaced or not needed 225 ! by the new matrix. This is similar to what dbcsr_destroy 226 ! does, except that it keeps the index and data. 227 CALL array_release(matrix%row_blk_size) 228 CALL array_release(matrix%col_blk_size) 229 CALL array_release(matrix%row_blk_offset) 230 CALL array_release(matrix%col_blk_offset) 231 IF (matrix%has_local_rows) & 232 CALL array_release(matrix%local_rows) 233 IF (matrix%has_global_rows) & 234 CALL array_release(matrix%global_rows) 235 IF (matrix%has_local_cols) & 236 CALL array_release(matrix%local_cols) 237 IF (matrix%has_global_cols) & 238 CALL array_release(matrix%global_cols) 239 CALL dbcsr_distribution_release(matrix%dist) 240 IF (ASSOCIATED(matrix%wms)) THEN 241 CALL dbcsr_work_destroy_all(matrix) 242 ENDIF 243 CALL array_nullify(matrix%local_rows) 244 CALL array_nullify(matrix%global_rows) 245 CALL array_nullify(matrix%local_cols) 246 CALL array_nullify(matrix%global_cols) 247 ! 248 IF (matrix%data_type /= matrix%data_area%d%data_type) & 249 DBCSR_ABORT("Inconsistent data type for the existing buffer.") 250 CALL dbcsr_data_set_size_referenced(matrix%data_area, 0) 251 ELSE 252 ! Invalidate index 253 NULLIFY (matrix%index) 254 ! Invalidate data 255 IF (PRESENT(data_buffer)) THEN 256 IF (.NOT. dbcsr_data_valid(data_buffer)) & 257 DBCSR_ABORT("Input data buffer not valid.") 258 IF (matrix%data_type /= data_buffer%d%data_type) & 259 DBCSR_ABORT("Input buffer data type different by matrix data type.") 260 matrix%data_memory_type = data_buffer%d%memory_type 261 matrix%data_area = data_buffer 262 CALL dbcsr_data_hold(matrix%data_area) 263 ELSE 264 CALL dbcsr_data_init(matrix%data_area) 265 ENDIF 266 ENDIF 267 ! These are always invalidated. 268 NULLIFY (matrix%row_p, matrix%col_i, matrix%blk_p, matrix%thr_c, & 269 matrix%coo_l) 270 IF (PRESENT(row_blk_size_obj)) THEN 271 matrix%row_blk_size = row_blk_size_obj 272 CALL array_hold(matrix%row_blk_size) 273 ELSEIF (PRESENT(row_blk_size)) THEN 274 CALL array_new(matrix%row_blk_size, row_blk_size, gift=reuse_arrays) 275 ELSE 276 DBCSR_ABORT("Missing row_blk_size") 277 ENDIF 278 IF (PRESENT(max_rbs)) THEN 279 matrix%max_rbs = max_rbs 280 ELSE IF (array_size(matrix%row_blk_size) .GT. 0) THEN 281 matrix%max_rbs = MAXVAL(array_data(matrix%row_blk_size)) 282 ELSE 283 matrix%max_rbs = 0 284 ENDIF 285 IF (PRESENT(col_blk_size_obj)) THEN 286 matrix%col_blk_size = col_blk_size_obj 287 CALL array_hold(matrix%col_blk_size) 288 ELSEIF (PRESENT(col_blk_size)) THEN 289 CALL array_new(matrix%col_blk_size, col_blk_size, gift=reuse_arrays) 290 ELSE 291 DBCSR_ABORT("Missing col_blk_size") 292 ENDIF 293 IF (PRESENT(max_cbs)) THEN 294 matrix%max_cbs = max_cbs 295 ELSE IF (array_size(matrix%col_blk_size) .GT. 0) THEN 296 matrix%max_cbs = MAXVAL(array_data(matrix%col_blk_size)) 297 ELSE 298 matrix%max_cbs = 0 299 ENDIF 300 ! 301 IF (array_size(matrix%row_blk_size) /= dbcsr_distribution_nrows(dist)) & 302 DBCSR_ABORT("Number of blocked rows does match blocked row distribution.") 303 IF (array_size(matrix%col_blk_size) /= dbcsr_distribution_ncols(dist)) & 304 DBCSR_ABORT("Number of blocked columns does match blocked column distribution.") 305 ! initialize row/col offsets 306 IF (PRESENT(row_blk_offset)) THEN 307 IF (dbcsr_distribution_nrows(dist) + 1 /= array_size(row_blk_offset)) & 308 CALL dbcsr_abort(__LOCATION__, & 309 "Number of blocked offset rows does match blocked row distribution.") 310 matrix%row_blk_offset = row_blk_offset 311 CALL array_hold(matrix%row_blk_offset) 312 ELSE 313 ALLOCATE (vec_row_blk_offset(array_size(matrix%row_blk_size) + 1)) 314 CALL convert_sizes_to_offsets(array_data(matrix%row_blk_size), vec_row_blk_offset) 315 CALL array_new(matrix%row_blk_offset, vec_row_blk_offset, gift=.TRUE.) 316 ENDIF 317 318 IF (PRESENT(col_blk_offset)) THEN 319 IF (dbcsr_distribution_ncols(dist) + 1 /= array_size(col_blk_offset)) & 320 CALL dbcsr_abort(__LOCATION__, & 321 "Number of blocked offset columns does match blocked column distribution.") 322 matrix%col_blk_offset = col_blk_offset 323 CALL array_hold(matrix%col_blk_offset) 324 ELSE 325 ALLOCATE (vec_col_blk_offset(array_size(matrix%col_blk_size) + 1)) 326 CALL convert_sizes_to_offsets(array_data(matrix%col_blk_size), vec_col_blk_offset) 327 CALL array_new(matrix%col_blk_offset, vec_col_blk_offset, gift=.TRUE.) 328 ENDIF 329 330 matrix%dist = dist 331 CALL dbcsr_distribution_hold(matrix%dist) 332!$ IF (.NOT. dbcsr_distribution_has_threads(matrix%dist) .AND. PRESENT(thread_dist)) THEN 333!$ IF (dbcsr_distribution_has_threads(thread_dist)) THEN 334!$ matrix%dist%d%num_threads = thread_dist%d%num_threads 335!$ matrix%dist%d%has_thread_dist = .TRUE. 336!$ matrix%dist%d%thread_dist = thread_dist%d%thread_dist 337!$ CALL array_hold(matrix%dist%d%thread_dist) 338!$ ENDIF 339!$ ENDIF 340!$ IF (.NOT. dbcsr_distribution_has_threads(matrix%dist)) THEN 341!$ CALL dbcsr_distribution_make_threads(matrix%dist, & 342!$ array_data(matrix%row_blk_size)) 343!$ ENDIF 344 ! Set up some data. 345 IF (my_make_index) THEN 346 CALL meta_from_dist(new_meta, dist, array_data(matrix%row_blk_size), & 347 array_data(matrix%col_blk_size)) 348 matrix%nblkrows_total = new_meta(dbcsr_slot_nblkrows_total) 349 matrix%nblkcols_total = new_meta(dbcsr_slot_nblkcols_total) 350 matrix%nfullrows_total = new_meta(dbcsr_slot_nfullrows_total) 351 matrix%nfullcols_total = new_meta(dbcsr_slot_nfullcols_total) 352 matrix%nblkrows_local = new_meta(dbcsr_slot_nblkrows_local) 353 matrix%nblkcols_local = new_meta(dbcsr_slot_nblkcols_local) 354 matrix%nfullrows_local = new_meta(dbcsr_slot_nfullrows_local) 355 matrix%nfullcols_local = new_meta(dbcsr_slot_nfullcols_local) 356 ENDIF 357 my_nze = 0; IF (PRESENT(nze)) my_nze = nze 358 matrix%nblks = 0 359 matrix%nze = 0 360 361 IF (PRESENT(replication_type)) THEN 362 IF (replication_type .NE. dbcsr_repl_none & 363 .AND. replication_type .NE. dbcsr_repl_full & 364 .AND. replication_type .NE. dbcsr_repl_row & 365 .AND. replication_type .NE. dbcsr_repl_col) & 366 DBCSR_ABORT("Invalid replication type '"//replication_type//"'") 367 IF (replication_type .EQ. dbcsr_repl_row .OR. replication_type .EQ. dbcsr_repl_col) & 368 DBCSR_WARN("Row and column replication not fully supported") 369 matrix%replication_type = replication_type 370 ELSE 371 matrix%replication_type = dbcsr_repl_none 372 ENDIF 373 ! 374 ! Setup a matrix from scratch 375 IF (.NOT. hijack) THEN 376 IF (.NOT. PRESENT(data_buffer)) THEN 377 CALL dbcsr_data_new(matrix%data_area, matrix%data_type, my_nze, & 378 memory_type=matrix%data_memory_type) 379 CALL dbcsr_data_set_size_referenced(matrix%data_area, 0) 380 ENDIF 381 ! 382 IF (my_make_index) THEN 383 NULLIFY (matrix%index) 384 CALL ensure_array_size(matrix%index, lb=1, ub=dbcsr_num_slots, & 385 zero_pad=.TRUE., memory_type=matrix%index_memory_type) 386 ENDIF 387 ENDIF 388 IF (my_make_index) THEN 389 IF (LBOUND(matrix%index, 1) .GT. 1 & 390 .OR. UBOUND(matrix%index, 1) .LT. dbcsr_num_slots) & 391 DBCSR_ABORT("Index is not large enough") 392 matrix%index(1:dbcsr_num_slots) = 0 393 matrix%index(1:dbcsr_meta_size) = new_meta(1:dbcsr_meta_size) 394 matrix%index(dbcsr_slot_size) = dbcsr_num_slots 395 ENDIF 396 ! 397 matrix%symmetry = .FALSE. 398 matrix%negate_real = .FALSE. 399 matrix%negate_imaginary = .FALSE. 400 !matrix%transpose = .FALSE. 401 matrix_type_l = matrix_type 402 CALL uppercase(matrix_type_l) 403 SELECT CASE (matrix_type_l) 404 CASE (dbcsr_type_no_symmetry) 405 CASE (dbcsr_type_symmetric) 406 matrix%symmetry = .TRUE. 407 CASE (dbcsr_type_antisymmetric) 408 matrix%symmetry = .TRUE. 409 matrix%negate_real = .TRUE. 410 matrix%negate_imaginary = .TRUE. 411 CASE (dbcsr_type_hermitian) 412 matrix%symmetry = .TRUE. 413 matrix%negate_imaginary = .TRUE. 414 CASE (dbcsr_type_antihermitian) 415 matrix%symmetry = .TRUE. 416 matrix%negate_real = .TRUE. 417 CASE DEFAULT 418 DBCSR_ABORT("Invalid matrix type.") 419 END SELECT 420 matrix%bcsc = .FALSE. 421 matrix%local_indexing = .FALSE. 422 matrix%list_indexing = .FALSE. 423 CALL array_nullify(matrix%local_rows) 424 CALL array_nullify(matrix%global_rows) 425 CALL array_nullify(matrix%local_cols) 426 CALL array_nullify(matrix%global_cols) 427 matrix%has_local_rows = .FALSE. 428 matrix%has_global_rows = .FALSE. 429 matrix%has_local_cols = .FALSE. 430 matrix%has_global_cols = .FALSE. 431 IF (my_make_index) THEN 432 CALL dbcsr_make_index_exist(matrix) 433 ENDIF 434 matrix%valid = .TRUE. 435 CALL timestop(handle) 436 END SUBROUTINE dbcsr_create_new 437 438 SUBROUTINE dbcsr_create_template(matrix, template, name, dist, matrix_type, & 439 row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, & 440 nze, data_type, & 441 data_buffer, data_memory_type, index_memory_type, & 442 max_rbs, max_cbs, & 443 row_blk_offset, col_blk_offset, & 444 reuse_arrays, mutable_work, make_index, replication_type) 445 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 446 TYPE(dbcsr_type), INTENT(IN) :: template 447 CHARACTER(len=*), INTENT(IN), OPTIONAL :: name 448 TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: dist 449 CHARACTER, INTENT(IN), OPTIONAL :: matrix_type 450 INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL, & 451 POINTER, CONTIGUOUS :: row_blk_size, col_blk_size 452 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_size_obj, col_blk_size_obj 453 INTEGER, INTENT(IN), OPTIONAL :: nze, data_type 454 TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL :: data_buffer 455 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: data_memory_type, index_memory_type 456 INTEGER, INTENT(IN), OPTIONAL :: max_rbs, max_cbs 457 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_offset, col_blk_offset 458 LOGICAL, INTENT(IN), OPTIONAL :: reuse_arrays, mutable_work, make_index 459 CHARACTER, INTENT(IN), OPTIONAL :: replication_type 460 461 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_template', & 462 routineP = moduleN//':'//routineN 463 464 CHARACTER :: new_matrix_type, new_replication_type 465 CHARACTER(len=default_string_length) :: new_name 466 INTEGER :: new_data_type, new_max_cbs, new_max_rbs 467 LOGICAL :: my_make_index, new_mutable_work 468 TYPE(array_i1d_obj) :: new_col_blk_offset, new_row_blk_offset 469 TYPE(dbcsr_distribution_obj) :: new_dist 470 TYPE(dbcsr_memtype_type) :: new_data_memory_type, & 471 new_index_memory_type 472 473 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: blk_size 474 475! --------------------------------------------------------------------------- 476 477 IF (PRESENT(name)) THEN 478 new_name = TRIM(name) 479 ELSE 480 new_name = TRIM(dbcsr_name(template)) 481 ENDIF 482 IF (PRESENT(dist)) THEN 483 new_dist = dist 484 ELSE 485 new_dist = dbcsr_distribution(template) 486 ENDIF 487 IF (PRESENT(matrix_type)) THEN 488 new_matrix_type = matrix_type 489 ELSE 490 new_matrix_type = dbcsr_get_matrix_type(template) 491 ENDIF 492 ! 493 IF ((PRESENT(row_blk_size) .NEQV. PRESENT(col_blk_size)) .OR. & 494 (PRESENT(row_blk_size_obj) .NEQV. PRESENT(col_blk_size_obj))) THEN 495 DBCSR_ABORT("Both row_blk_size and col_blk_size must be provided") 496 ENDIF 497 ! 498 IF (PRESENT(max_rbs)) new_max_rbs = max_rbs 499 IF (PRESENT(row_blk_offset)) new_row_blk_offset = row_blk_offset 500 NULLIFY (blk_size) 501 IF (PRESENT(row_blk_size_obj)) THEN 502 blk_size => array_data(row_blk_size_obj) 503 ELSEIF (PRESENT(row_blk_size)) THEN 504 blk_size => row_blk_size 505 ENDIF 506 IF (ASSOCIATED(blk_size)) THEN 507 IF (.NOT. PRESENT(max_rbs)) & 508 new_max_rbs = MAXVAL(blk_size) 509 ELSE 510 IF (.NOT. PRESENT(max_rbs)) & 511 new_max_rbs = dbcsr_max_row_size(template) 512 IF (.NOT. PRESENT(row_blk_offset)) & 513 new_row_blk_offset = template%row_blk_offset 514 ENDIF 515 ! 516 IF (PRESENT(max_cbs)) new_max_cbs = max_cbs 517 IF (PRESENT(col_blk_offset)) new_col_blk_offset = col_blk_offset 518 NULLIFY (blk_size) 519 IF (PRESENT(col_blk_size_obj)) THEN 520 blk_size => array_data(col_blk_size_obj) 521 ELSEIF (PRESENT(col_blk_size)) THEN 522 blk_size => col_blk_size 523 ENDIF 524 IF (ASSOCIATED(blk_size)) THEN 525 IF (.NOT. PRESENT(max_cbs)) & 526 new_max_cbs = MAXVAL(blk_size) 527 ELSE 528 IF (.NOT. PRESENT(max_cbs)) & 529 new_max_cbs = dbcsr_max_col_size(template) 530 IF (.NOT. PRESENT(col_blk_offset)) & 531 new_col_blk_offset = template%col_blk_offset 532 ENDIF 533 IF (PRESENT(data_type)) THEN 534 new_data_type = data_type 535 ELSE 536 new_data_type = dbcsr_get_data_type(template) 537 ENDIF 538 IF (PRESENT(data_memory_type)) THEN 539 new_data_memory_type = data_memory_type 540 ELSE 541 new_data_memory_type = dbcsr_get_data_memory_type(template) 542 ENDIF 543 IF (PRESENT(index_memory_type)) THEN 544 new_index_memory_type = index_memory_type 545 ELSE 546 new_index_memory_type = dbcsr_get_index_memory_type(template) 547 ENDIF 548 IF (PRESENT(replication_type)) THEN 549 new_replication_type = replication_type 550 ELSE 551 new_replication_type = dbcsr_get_replication_type(template) 552 ENDIF 553 IF (PRESENT(mutable_work)) THEN 554 new_mutable_work = mutable_work 555 ELSE 556 new_mutable_work = dbcsr_use_mutable(template) 557 ENDIF 558 IF (PRESENT(row_blk_size_obj)) THEN 559 CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & 560 matrix_type=new_matrix_type, & 561 row_blk_size_obj=row_blk_size_obj, & 562 col_blk_size_obj=col_blk_size_obj, & 563 nze=nze, & 564 data_type=new_data_type, & 565 data_buffer=data_buffer, & 566 data_memory_type=new_data_memory_type, & 567 index_memory_type=new_index_memory_type, & 568 max_rbs=new_max_rbs, max_cbs=new_max_cbs, & 569 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, & 570 reuse_arrays=reuse_arrays, & 571 mutable_work=new_mutable_work, & 572 make_index=make_index, & 573 replication_type=new_replication_type) 574 ELSEIF (PRESENT(row_blk_size)) THEN 575 CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & 576 matrix_type=new_matrix_type, & 577 row_blk_size=row_blk_size, & 578 col_blk_size=col_blk_size, & 579 nze=nze, & 580 data_type=new_data_type, & 581 data_buffer=data_buffer, & 582 data_memory_type=new_data_memory_type, & 583 index_memory_type=new_index_memory_type, & 584 max_rbs=new_max_rbs, max_cbs=new_max_cbs, & 585 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, & 586 reuse_arrays=reuse_arrays, & 587 mutable_work=new_mutable_work, & 588 make_index=make_index, & 589 replication_type=new_replication_type) 590 ELSE 591 CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & 592 matrix_type=new_matrix_type, & 593 row_blk_size_obj=template%row_blk_size, & 594 col_blk_size_obj=template%col_blk_size, & 595 nze=nze, & 596 data_type=new_data_type, & 597 data_buffer=data_buffer, & 598 data_memory_type=new_data_memory_type, & 599 index_memory_type=new_index_memory_type, & 600 max_rbs=new_max_rbs, max_cbs=new_max_cbs, & 601 row_blk_offset=new_row_blk_offset, col_blk_offset=new_col_blk_offset, & 602 thread_dist=dbcsr_distribution(template), & 603 reuse_arrays=reuse_arrays, & 604 mutable_work=new_mutable_work, & 605 make_index=make_index, & 606 replication_type=new_replication_type) 607 ENDIF 608 ! Copy stuff from the meta-array. These are not normally needed, 609 ! but have to be here for creating matrices from "image" matrices. 610 my_make_index = .TRUE. 611 IF (PRESENT(make_index)) my_make_index = make_index 612 IF (my_make_index) THEN 613 matrix%index(dbcsr_slot_home_prow) = template%index(dbcsr_slot_home_prow) 614 matrix%index(dbcsr_slot_home_rowi) = template%index(dbcsr_slot_home_rowi) 615 matrix%index(dbcsr_slot_home_pcol) = template%index(dbcsr_slot_home_pcol) 616 matrix%index(dbcsr_slot_home_coli) = template%index(dbcsr_slot_home_coli) 617 matrix%index(dbcsr_slot_home_vprow) = template%index(dbcsr_slot_home_vprow) 618 matrix%index(dbcsr_slot_home_vpcol) = template%index(dbcsr_slot_home_vpcol) 619 ENDIF 620 IF (PRESENT(row_blk_size) .AND. .NOT. PRESENT(row_blk_offset)) THEN 621 CALL array_release(new_row_blk_offset) 622 ENDIF 623 IF (PRESENT(col_blk_size) .AND. .NOT. PRESENT(col_blk_offset)) THEN 624 CALL array_release(new_col_blk_offset) 625 ENDIF 626 627 END SUBROUTINE dbcsr_create_template 628 629 SUBROUTINE dbcsr_init_wm(wm, data_type, nblks_guess, sizedata_guess, memory_type) 630 !! Initializes one work matrix 631 632 TYPE(dbcsr_work_type), INTENT(OUT) :: wm 633 !! initialized work matrix 634 INTEGER, INTENT(IN) :: data_type 635 INTEGER, INTENT(IN), OPTIONAL :: nblks_guess, sizedata_guess 636 !! estimated number of blocks 637 !! estimated size of data 638 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: memory_type 639 640 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_init_wm', routineP = moduleN//':'//routineN 641 642 INTEGER :: handle, nblks, stat 643 644! --------------------------------------------------------------------------- 645 646 IF (careful_mod) CALL timeset(routineN, handle) 647 wm%lastblk = 0 648 wm%datasize = 0 649 ! Index 650 IF (PRESENT(nblks_guess)) THEN 651 nblks = nblks_guess 652 IF (nblks_guess < 0) & 653 DBCSR_ABORT("Can not have negative block count.") 654 ALLOCATE (wm%row_i(nblks), stat=stat) 655 IF (stat /= 0) DBCSR_ABORT("wm%row_i") 656 ALLOCATE (wm%col_i(nblks), stat=stat) 657 IF (stat /= 0) DBCSR_ABORT("wm%col_i") 658 ALLOCATE (wm%blk_p(nblks), stat=stat) 659 IF (stat /= 0) DBCSR_ABORT("wm%blk_p") 660 ELSE 661 NULLIFY (wm%row_i, wm%col_i, wm%blk_p) 662 !nblks = CEILING (REAL (matrix%nblkrows_local * matrix%nblkcols_local)& 663 ! / REAL (dbcsr_mp_numnodes (dbcsr_distribution_mp (matrix%dist)))) 664 ENDIF 665 ! Data 666 CALL dbcsr_data_init(wm%data_area) 667 IF (PRESENT(sizedata_guess)) THEN 668 IF (sizedata_guess < 0) & 669 DBCSR_ABORT("Can not have negative data size.") 670 CALL dbcsr_data_new(wm%data_area, data_type, & 671 data_size=sizedata_guess, memory_type=memory_type) 672 ELSE 673 CALL dbcsr_data_new(wm%data_area, data_type, memory_type=memory_type) 674 ENDIF 675 CALL dbcsr_mutable_init(wm%mutable) 676 IF (careful_mod) CALL timestop(handle) 677 END SUBROUTINE dbcsr_init_wm 678 679 SUBROUTINE dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, & 680 work_mutable, memory_type) 681 !! Creates a the working matrix(es) for a DBCSR matrix. 682 683 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 684 !! new matrix 685 INTEGER, INTENT(IN), OPTIONAL :: nblks_guess, sizedata_guess, n 686 !! estimated number of blocks 687 !! estimated size of data 688 !! number work matrices to create, default is 1 689 LOGICAL, INTENT(in), OPTIONAL :: work_mutable 690 !! use mutable work type, default is what was specified in create 691 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: memory_type 692 693 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_create', & 694 routineP = moduleN//':'//routineN 695 696 INTEGER :: handle, iw, nw, ow 697 LOGICAL :: wms_new, wms_realloc 698 TYPE(dbcsr_work_type), DIMENSION(:), POINTER :: wms 699 700! --------------------------------------------------------------------------- 701 702 CALL timeset(routineN, handle) 703 IF (PRESENT(n)) THEN 704 nw = n 705 ELSE 706 nw = 1 707!$ IF (omp_in_parallel()) THEN 708!$ nw = omp_get_num_threads() 709!$ ELSE 710!$ nw = omp_get_max_threads() 711!$ ENDIF 712 ENDIF 713!$OMP MASTER 714 wms_new = .NOT. ASSOCIATED(matrix%wms) 715 wms_realloc = .FALSE. 716 IF (ASSOCIATED(matrix%wms)) THEN 717 ow = SIZE(matrix%wms) 718 IF (ow .LT. nw) & 719 DBCSR_WARN("Number of work matrices less than threads.") 720 IF (ow .LT. nw) wms_realloc = .TRUE. 721 ENDIF 722 IF (PRESENT(work_mutable)) THEN 723 matrix%work_mutable = work_mutable 724 ENDIF 725 IF (wms_realloc) THEN 726 ALLOCATE (wms(nw)) 727 wms(1:ow) = matrix%wms(1:ow) 728 DEALLOCATE (matrix%wms) 729 matrix%wms => wms 730 DO iw = ow + 1, nw 731 CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, & 732 nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, & 733 memory_type=memory_type) 734 IF (matrix%work_mutable) & 735 CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, & 736 dbcsr_get_data_type(matrix)) 737 END DO 738 ENDIF 739 IF (wms_new) THEN 740 ALLOCATE (matrix%wms(nw)) 741 DO iw = 1, nw 742 CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, & 743 nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, & 744 memory_type=memory_type) 745 IF (matrix%work_mutable) & 746 CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, & 747 dbcsr_get_data_type(matrix)) 748 END DO 749 ENDIF 750 matrix%valid = .FALSE. 751!$OMP END MASTER 752 CALL timestop(handle) 753 END SUBROUTINE dbcsr_work_create 754 755 SUBROUTINE dbcsr_finalize(matrix, reshuffle) 756 !! Creates the final dbcsr_type matrix from the working matrix. 757 !! Work matrices (array or tree-based) are merged into the base DBCSR matrix. 758 !! If a matrix is marked as having a valid index, then nothing is done. 759 !! Deleted blocks are pruned from the index. 760 761 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 762 !! final matrix 763 LOGICAL, INTENT(IN), OPTIONAL :: reshuffle 764 !! whether the data should be reshuffled, default is false 765 766 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_finalize', routineP = moduleN//':'//routineN 767 LOGICAL, PARAMETER :: dbg = .FALSE. 768 769 INTEGER :: handle, i, nblks, nwms, start_offset 770 INTEGER, ALLOCATABLE, DIMENSION(:) :: empty_row_p 771 INTEGER, DIMENSION(:), POINTER, SAVE :: old_blk_p, old_col_i, old_row_p 772 LOGICAL :: can_quick, fake_row_p, sort_data, spawn 773 774! --------------------------------------------------------------------------- 775 776 CALL timeset(routineN, handle) 777 778 NULLIFY (old_blk_p, old_col_i, old_row_p) 779 780!$OMP BARRIER 781 ! If the matrix is not marked as dirty then skip the work. 782 IF (dbcsr_valid_index(matrix)) THEN 783 !"No need to finalize a valid matrix, skipping." 784 ! 785 ! A matrix with a valid index should not have associated work 786 ! arrays. This may happen when this routine is called on a 787 ! matrix that was not changed. 788!$OMP BARRIER 789!$OMP MASTER 790 IF (ASSOCIATED(matrix%wms)) & 791 CALL dbcsr_work_destroy_all(matrix) 792 matrix%valid = .TRUE. 793!$OMP END MASTER 794!$OMP BARRIER 795 CALL timestop(handle) 796 RETURN 797 ENDIF 798 ! 799 ! If possible, data copying is avoided. 800 IF (PRESENT(reshuffle)) THEN 801 sort_data = reshuffle 802 ELSE 803 sort_data = .FALSE. 804 ENDIF 805 ! 806 ! Now make sure that a valid row_p exists. Also clear the row_p if 807 ! the matrix is declared to have 0 blocks. 808!$OMP MASTER 809 fake_row_p = .NOT. ASSOCIATED(matrix%row_p) 810 IF (ASSOCIATED(matrix%row_p)) THEN 811 fake_row_p = SIZE(matrix%row_p) .LE. 1 812 ENDIF 813 fake_row_p = fake_row_p .OR. matrix%nblks .EQ. 0 814!$OMP END MASTER 815 ! 816 ! See where data will be appended in the main data 817 ! area. Alternatively, set to the start if the matrix is declared 818 ! to have no data. (This value is ignored if reshuffle is true 819 ! because the main data area is always new.) 820 start_offset = matrix%nze 821 i = dbcsr_get_data_size_used(matrix) 822!$OMP MASTER 823 matrix%nze = 0 824!$OMP END MASTER 825!$OMP BARRIER 826!$OMP ATOMIC 827 matrix%nze = matrix%nze + i 828!$OMP BARRIER 829 IF (dbg) THEN 830 WRITE (*, *) routineN//" sizes", matrix%nze, i, & 831 dbcsr_data_get_size_referenced(matrix%data_area), & 832 dbcsr_data_get_size(matrix%data_area) 833 ENDIF 834 IF (.FALSE. .AND. dbcsr_data_get_size_referenced(matrix%data_area) .NE. & 835 matrix%nze) THEN 836 IF (matrix%nze .NE. dbcsr_data_get_size_referenced(matrix%data_area)) & 837 DBCSR_WARN("Should reshuffle.") 838 IF (ASSOCIATED(matrix%wms)) THEN 839 sort_data = .NOT. dbcsr_wm_use_mutable(matrix%wms(1)) 840 ENDIF 841 ENDIF 842 IF (sort_data .AND. matrix%nze .GT. 0) THEN 843 CALL dbcsr_add_wm_from_matrix(matrix) 844 matrix%nze = 0 845!$OMP MASTER 846 fake_row_p = .TRUE. 847!$OMP END MASTER 848 ENDIF 849 start_offset = dbcsr_data_get_size_referenced(matrix%data_area) + 1 850 IF (matrix%nze .EQ. 0) start_offset = 1 851!$OMP MASTER 852 matrix%index(dbcsr_slot_nze) = matrix%nze 853 IF (fake_row_p) THEN 854 ALLOCATE (empty_row_p(matrix%nblkrows_total + 1)) 855 empty_row_p(:) = 0 856 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 857 DATA=empty_row_p, extra=0) 858 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 859 reservation=0) 860 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 861 reservation=0) 862 CALL dbcsr_repoint_index(matrix) 863 ENDIF 864!$OMP END MASTER 865 ! 866!$OMP BARRIER 867 can_quick = can_quickly_finalize(matrix) 868!$OMP BARRIER 869 ! If the matrix, work matrices, and environment fit several 870 ! criteria, then a quick O(1) finalization is performed. 871 IF (can_quick .AND. .NOT. sort_data) THEN 872 CALL quick_finalize(matrix) 873 ELSE 874 ! 875!$OMP MASTER 876 ! 877 ! Create work matrices if not yet existing 878 IF (.NOT. ASSOCIATED(matrix%wms)) THEN 879 nwms = 1 880!$ nwms = omp_get_num_threads() 881 CALL dbcsr_work_create(matrix, n=nwms) 882 ENDIF 883!$OMP END MASTER 884!$OMP BARRIER 885 ! 886 ! Ensure index arrays at least exist. 887!$OMP DO SCHEDULE (STATIC, 1) 888 DO i = 1, SIZE(matrix%wms) 889 IF (.NOT. ASSOCIATED(matrix%wms(i)%row_i)) THEN 890 CALL ensure_array_size(matrix%wms(i)%row_i, ub=0) 891 ENDIF 892 IF (.NOT. ASSOCIATED(matrix%wms(i)%col_i)) THEN 893 CALL ensure_array_size(matrix%wms(i)%col_i, ub=0) 894 ENDIF 895 IF (.NOT. ASSOCIATED(matrix%wms(i)%blk_p)) THEN 896 CALL ensure_array_size(matrix%wms(i)%blk_p, ub=0) 897 ENDIF 898 ENDDO 899!$OMP ENDDO 900 ! 901 ! Check for deleted blocks 902!$OMP MASTER 903 nblks = matrix%row_p(matrix%nblkrows_total + 1) 904 IF (ANY(matrix%blk_p(1:nblks) .EQ. 0)) THEN 905 CALL dbcsr_index_prune_deleted(matrix) 906 ENDIF 907 old_row_p => matrix%row_p 908 old_col_i => matrix%col_i 909 old_blk_p => matrix%blk_p 910!$OMP END MASTER 911 ! 912!$OMP BARRIER 913 ! Check to see if we will need to create a parallel environment 914 ! (needed when there are multiple work matrices but we are not 915 ! in an OpenMP parallel section.) 916 ! 917 ! A parallel section is created is used when the matrix has 918 ! more work matrices. It's a shortcut when the finalize is 919 ! called from a non-parallel environment whereas the matrix was 920 ! built/modified in a parallel environment 921 nwms = SIZE(matrix%wms) 922 spawn = .FALSE. 923!$ IF (.NOT. OMP_IN_PARALLEL()) THEN 924!$ IF (nwms .GT. 1) spawn = .TRUE. 925!$ ENDIF 926 IF (spawn) THEN 927!$OMP PARALLEL IF (spawn) & 928!$OMP DEFAULT (NONE) & 929!$OMP SHARED (matrix, old_row_p, old_col_i, old_blk_p,& 930!$OMP start_offset, sort_data) 931 CALL dbcsr_merge_all(matrix, & 932 old_row_p, old_col_i, old_blk_p, & 933 sort_data=sort_data) 934!$OMP END PARALLEL 935 ELSE 936 CALL dbcsr_merge_all(matrix, & 937 old_row_p, old_col_i, old_blk_p, & 938 sort_data=sort_data) 939 ENDIF 940 ENDIF 941!$OMP BARRIER 942!$OMP MASTER 943 ! Clean up. 944 IF (ASSOCIATED(matrix%wms)) THEN 945 CALL dbcsr_work_destroy_all(matrix) 946 ENDIF 947 matrix%valid = .TRUE. 948!$OMP END MASTER 949!$OMP BARRIER 950 IF (dbg) THEN 951!$OMP SINGLE 952 CALL dbcsr_verify_matrix(matrix) 953!$OMP END SINGLE 954 ENDIF 955!$OMP MASTER 956 IF (fake_row_p) THEN 957 DEALLOCATE (empty_row_p) 958 ENDIF 959!$OMP END MASTER 960!$OMP BARRIER 961 CALL timestop(handle) 962 END SUBROUTINE dbcsr_finalize 963 964 SUBROUTINE dbcsr_special_finalize(matrix, reshuffle) 965 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 966 LOGICAL, INTENT(IN), OPTIONAL :: reshuffle 967 968 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_special_finalize', & 969 routineP = moduleN//':'//routineN 970 LOGICAL, PARAMETER :: dbg = .FALSE. 971 972 INTEGER :: handle 973 LOGICAL :: can_quick, sort_data 974 975! --------------------------------------------------------------------------- 976 977 CALL timeset(routineN, handle) 978 979 IF (matrix%nblks .NE. 0) & 980 DBCSR_ABORT("Optimized finalize requires empty matrix.") 981 IF (dbcsr_valid_index(matrix)) & 982 DBCSR_ABORT("Optimized finalize requires invalid matrix.") 983 IF (.NOT. ASSOCIATED(matrix%wms)) & 984 DBCSR_ABORT("Optimized finalize requires work matrices exist.") 985 IF (SIZE(matrix%wms) .NE. 1) & 986 DBCSR_ABORT("Optimized finalize requires single work matrix") 987 988 ! 989 ! If possible, data copying is avoided. 990 IF (PRESENT(reshuffle)) THEN 991 sort_data = reshuffle 992 ELSE 993 sort_data = .FALSE. 994 ENDIF 995!$OMP BARRIER 996 can_quick = can_quickly_finalize(matrix) 997!$OMP BARRIER 998 999 ! If the matrix, work matrices, and environment fit several 1000 ! criteria, then a quick O(1) finalization is performed. 1001 IF (can_quick .AND. .NOT. sort_data) THEN 1002 CALL quick_finalize(matrix) 1003 ELSE 1004 IF (.NOT. sort_data) & 1005 DBCSR_ABORT("merge_single_wm only supports data sorting") 1006 ! 1007 ! Ensure needed index arrays at least exist. 1008!$OMP MASTER 1009 ! 1010 IF (.NOT. ASSOCIATED(matrix%wms(1)%row_i)) THEN 1011 CALL ensure_array_size(matrix%wms(1)%row_i, ub=0) 1012 ENDIF 1013 IF (.NOT. ASSOCIATED(matrix%wms(1)%col_i)) THEN 1014 CALL ensure_array_size(matrix%wms(1)%col_i, ub=0) 1015 ENDIF 1016 IF (.NOT. ASSOCIATED(matrix%wms(1)%blk_p)) THEN 1017 CALL ensure_array_size(matrix%wms(1)%blk_p, ub=0) 1018 ENDIF 1019 ! 1020!$OMP END MASTER 1021!$OMP BARRIER 1022 ! 1023!$OMP PARALLEL DEFAULT (NONE), SHARED(matrix) 1024 CALL dbcsr_merge_single_wm(matrix) 1025!$OMP END PARALLEL 1026 1027 ENDIF 1028 1029!$OMP MASTER 1030 ! Clean up. 1031 IF (ASSOCIATED(matrix%wms)) THEN 1032 CALL dbcsr_work_destroy_all(matrix) 1033 ENDIF 1034 matrix%valid = .TRUE. 1035!$OMP END MASTER 1036!$OMP BARRIER 1037 IF (dbg) THEN 1038!$OMP SINGLE 1039 CALL dbcsr_verify_matrix(matrix) 1040!$OMP END SINGLE 1041 ENDIF 1042!$OMP BARRIER 1043 CALL timestop(handle) 1044 END SUBROUTINE dbcsr_special_finalize 1045 1046 FUNCTION can_quickly_finalize(matrix) RESULT(quick) 1047 !! Checks whether the matrix can be finalized with minimal copying. 1048 1049 TYPE(dbcsr_type), INTENT(IN) :: matrix 1050 !! matrix to check 1051 LOGICAL :: quick 1052 !! whether matrix can be quickly finalized 1053 1054! --------------------------------------------------------------------------- 1055 1056 IF (ASSOCIATED(matrix%wms)) THEN 1057 quick = matrix%nblks .EQ. 0 1058 quick = quick .AND. SIZE(matrix%wms) .EQ. 1 .AND. & 1059 .NOT. dbcsr_wm_use_mutable(matrix%wms(1)) 1060 IF (quick) THEN 1061 quick = quick .AND. & 1062 dbcsr_memtype_equal( & 1063 dbcsr_data_get_memory_type(matrix%wms(1)%data_area), & 1064 dbcsr_data_get_memory_type(matrix%data_area)) 1065 quick = quick .AND. & 1066 ASSOCIATED(matrix%wms(1)%row_i) 1067 quick = quick .AND. & 1068 (matrix%wms(1)%datasize_after_filtering .LT. 0 .OR. & 1069 matrix%wms(1)%datasize .EQ. matrix%wms(1)%datasize_after_filtering) 1070 ENDIF 1071 ELSE 1072 quick = .FALSE. 1073 ENDIF 1074 END FUNCTION can_quickly_finalize 1075 1076 SUBROUTINE quick_finalize(matrix) 1077 !! Performs quick finalization of matrix 1078 !! The data area from the work matrix is accepted as the new matrix's data 1079 !! area and the index is built from the work matrix. 1080 1081 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1082 !! matrix to finalize 1083 1084 CHARACTER(len=*), PARAMETER :: routineN = 'quick_finalize', routineP = moduleN//':'//routineN 1085 1086 INTEGER :: error_handle, nblks, nrows 1087 1088! --------------------------------------------------------------------------- 1089 1090 CALL timeset(routineN, error_handle) 1091!$OMP SECTIONS 1092!$OMP SECTION 1093 nblks = matrix%wms(1)%lastblk 1094 nrows = matrix%nblkrows_total 1095 CALL dbcsr_sort_indices(nblks, & 1096 matrix%wms(1)%row_i, & 1097 matrix%wms(1)%col_i, & 1098 matrix%wms(1)%blk_p) 1099 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) 1100 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) 1101 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) 1102 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 1103 reservation=nrows + 1, extra=2*nblks) 1104 CALL dbcsr_make_dbcsr_index(matrix%row_p, matrix%wms(1)%row_i, & 1105 nrows, nblks) 1106 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 1107 DATA=matrix%wms(1)%col_i(1:nblks)) 1108 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1109 DATA=matrix%wms(1)%blk_p(1:nblks)) 1110 matrix%nblks = nblks 1111 matrix%nze = matrix%wms(1)%datasize 1112 matrix%index(dbcsr_slot_nblks) = nblks 1113 matrix%index(dbcsr_slot_nze) = matrix%wms(1)%datasize 1114 CALL dbcsr_repoint_index(matrix) 1115!$OMP SECTION 1116 CALL dbcsr_switch_data_area(matrix, matrix%wms(1)%data_area) 1117!$OMP END SECTIONS 1118 CALL timestop(error_handle) 1119 END SUBROUTINE quick_finalize 1120 1121 SUBROUTINE dbcsr_add_wm_from_matrix(matrix, limits) 1122 !! Creates a work matrix from the data present in a finalized matrix. 1123 1124 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1125 !! DBCSR matrix 1126 INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL :: limits 1127 !! the limits to use for copying 1128 1129 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_add_wm_from_matrix', & 1130 routineP = moduleN//':'//routineN 1131 1132 INTEGER :: handle, ithread, nthreads, nwms, & 1133 old_nwms, size_used 1134 LOGICAL :: preexists 1135 1136! --------------------------------------------------------------------------- 1137 1138 CALL timeset(routineN, handle) 1139!$OMP BARRIER 1140 preexists = ASSOCIATED(matrix%wms) 1141 IF (preexists) THEN 1142 old_nwms = SIZE(matrix%wms) 1143 IF (old_nwms .EQ. 0) & 1144 DBCSR_WARN("Nonexisting work matrices?!") 1145 ELSE 1146 old_nwms = 0 1147 ENDIF 1148 nthreads = 1; ithread = 0 1149!$ nthreads = OMP_GET_NUM_THREADS(); ithread = OMP_GET_THREAD_NUM() 1150 IF (nthreads .GT. 1) THEN 1151 IF (old_nwms /= nthreads .AND. old_nwms /= 0) & 1152 DBCSR_ABORT("Number of work matrices and threads do not match") 1153 ENDIF 1154 nwms = MAX(1, old_nwms) 1155!$ nwms = MAX(nwms, nthreads) 1156!$OMP BARRIER 1157!$OMP MASTER 1158 IF (.NOT. ASSOCIATED(matrix%wms)) THEN 1159 CALL dbcsr_work_create(matrix, & 1160 INT(matrix%nblks*default_resize_factor/nwms), & 1161 INT(matrix%nze*default_resize_factor/nwms), & 1162 n=nwms, work_mutable=.FALSE.) 1163 ENDIF 1164!$OMP END MASTER 1165!$OMP BARRIER 1166 size_used = matrix%nze 1167 CALL dbcsr_fill_wm_from_matrix(matrix%wms, matrix, size_used, & 1168 limits=limits) 1169!$OMP BARRIER 1170 CALL timestop(handle) 1171 END SUBROUTINE dbcsr_add_wm_from_matrix 1172 1173 SUBROUTINE dbcsr_fill_wm_from_matrix(wm, matrix, size_used, limits) 1174 !! Fills index and data of the work matrix from the 1175 !! previously-finalized one. 1176 !! 1177 !! limits 1178 !! The limits is a 4-tuple 1179 !! (lower_row, higher_row, lower_column, higher_column). 1180 1181 TYPE(dbcsr_work_type), DIMENSION(:), INTENT(INOUT) :: wm 1182 !! the work matrix to fill 1183 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1184 !! DBCSR matrix 1185 INTEGER, INTENT(IN) :: size_used 1186 INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL :: limits 1187 !! only fills blocks within this range 1188 1189 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_fill_wm_from_matrix', & 1190 routineP = moduleN//':'//routineN 1191 1192 INTEGER :: blk, blk_p, col, handle, ithread, & 1193 nthreads, nwms, nze, row, wblk_p, & 1194 which_wm, wm_first, wm_last 1195 LOGICAL :: careful, limit, mt, tr 1196 LOGICAL, SAVE :: mutable 1197 TYPE(dbcsr_data_obj) :: data_block 1198 TYPE(dbcsr_iterator) :: iter 1199 1200! --------------------------------------------------------------------------- 1201 1202 CALL timeset(routineN, handle) 1203 nwms = SIZE(matrix%wms) 1204 mt = .FALSE. 1205!$ IF (nwms .GT. 1) mt = omp_get_num_threads() .GT. 1 1206 ithread = 0; nthreads = 1 1207!$ ithread = omp_get_thread_num() 1208!$ nthreads = omp_get_num_threads() 1209 limit = PRESENT(limits) 1210 careful = size_used + size_used/8 & 1211 .LT. dbcsr_data_get_size_referenced(matrix%data_area) 1212 CALL dbcsr_data_init(data_block) 1213 CALL dbcsr_data_new(data_block, dbcsr_data_get_type(matrix%data_area)) 1214 IF (mt) THEN 1215 wm_first = ithread + 1 1216 wm_last = ithread + 1 1217 ELSE 1218 wm_first = 1 1219 wm_last = nwms 1220 ENDIF 1221 ! Prepares the work matrices to accept the main data. 1222!$OMP MASTER 1223 mutable = .FALSE. 1224 DO which_wm = 1, nwms 1225 mutable = mutable .OR. dbcsr_wm_use_mutable(wm(which_wm)) 1226 ENDDO 1227!$OMP END MASTER 1228!$OMP BARRIER 1229 DO which_wm = wm_first, wm_last 1230 IF (dbcsr_wm_use_mutable(wm(which_wm))) & 1231 DBCSR_ABORT("Adding main matrix into mutable not supported.") 1232 IF (mutable) THEN 1233 IF (.NOT. dbcsr_mutable_instantiated(wm(which_wm)%mutable)) THEN 1234 CALL dbcsr_mutable_new(wm(which_wm)%mutable, matrix%data_type) 1235 ENDIF 1236 ELSE 1237 ! We don't know how much data we'll get so we have to be generous. 1238 CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & 1239 size_used/nwms) 1240 CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, 0) 1241 ENDIF 1242 ENDDO 1243 ! Now copy the data 1244 CALL dbcsr_iterator_start(iter, matrix, shared=mt, & 1245 contiguous_pointers=.FALSE., read_only=.TRUE.) 1246 DO WHILE (dbcsr_iterator_blocks_left(iter)) 1247 CALL dbcsr_iterator_next_block(iter, row, col, data_block, & 1248 transposed=tr, block_number=blk) 1249 IF (limit) THEN 1250 IF (.NOT. within_limits(row, col, limits)) CYCLE 1251 ENDIF 1252 blk_p = matrix%blk_p(blk) 1253 which_wm = ithread + 1 1254 wblk_p = SIGN(wm(which_wm)%datasize + 1, blk_p) 1255 nze = dbcsr_data_get_size(data_block) 1256 IF (mt .OR. limit .OR. careful .OR. mutable) THEN 1257 ! The data gets copied block by block so the block pointers 1258 ! are ordered accordingly. 1259 IF (.NOT. mutable) THEN 1260 CALL add_work_coordinate(wm(which_wm), row, col, wblk_p) 1261 CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & 1262 ABS(wblk_p) + nze - 1, factor=default_resize_factor) 1263 CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, & 1264 ABS(wblk_p) + nze - 1) 1265 CALL dbcsr_data_set(wm(which_wm)%data_area, & 1266 lb=ABS(wblk_p), & 1267 data_size=nze, & 1268 src=data_block, source_lb=1) 1269 ENDIF 1270 ELSE 1271 ! The data gets copied all at once so the block pointers 1272 ! should remain the same as they were. 1273 CALL add_work_coordinate(wm(which_wm), row, col, blk_p) 1274 ENDIF 1275 IF (.NOT. mutable) & 1276 wm(which_wm)%datasize = wm(which_wm)%datasize + nze 1277 ENDDO 1278 CALL dbcsr_iterator_stop(iter) 1279 CALL dbcsr_data_clear_pointer(data_block) 1280 CALL dbcsr_data_release(data_block) 1281 ! Copy all blocks at once 1282 IF (.NOT. mt .AND. .NOT. limit .AND. .NOT. careful .AND. .NOT. mutable) THEN 1283 DO which_wm = 1, nwms 1284 CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & 1285 dbcsr_data_get_size_referenced(matrix%data_area)) 1286 CALL dbcsr_data_copyall(wm(which_wm)%data_area, matrix%data_area) 1287 wm(which_wm)%datasize = size_used 1288 ENDDO 1289 ENDIF 1290 CALL timestop(handle) 1291 END SUBROUTINE dbcsr_fill_wm_from_matrix 1292 1293 PURE FUNCTION within_limits(row, column, limits) 1294 !! Checks whether a point is within bounds 1295 !! \return whether the point is within the bounds 1296 1297 INTEGER, INTENT(IN) :: row, column 1298 !! point to check 1299 !! point to check 1300 INTEGER, DIMENSION(4), INTENT(IN) :: limits 1301 !! limits (low_row, high_row, low_col, high_col) 1302 LOGICAL :: within_limits 1303 1304 within_limits = row .GE. limits(1) .AND. row .LE. limits(2) .AND. & 1305 column .GE. limits(3) .AND. column .LE. limits(4) 1306 END FUNCTION within_limits 1307 1308 SUBROUTINE dbcsr_work_destroy(wm) 1309 !! Deallocates and destroys a work matrix. 1310 1311 TYPE(dbcsr_work_type), INTENT(INOUT) :: wm 1312 !! work matrix 1313 1314 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy', & 1315 routineP = moduleN//':'//routineN 1316 1317 INTEGER :: handle 1318 1319! --------------------------------------------------------------------------- 1320 1321 IF (careful_mod) CALL timeset(routineN, handle) 1322 1323 IF (ASSOCIATED(wm%row_i)) THEN 1324 DEALLOCATE (wm%row_i) 1325 ENDIF 1326 IF (ASSOCIATED(wm%col_i)) THEN 1327 DEALLOCATE (wm%col_i) 1328 ENDIF 1329 IF (ASSOCIATED(wm%blk_p)) THEN 1330 DEALLOCATE (wm%blk_p) 1331 ENDIF 1332 CALL dbcsr_data_release(wm%data_area) 1333 CALL dbcsr_mutable_destroy(wm%mutable) 1334 1335 IF (careful_mod) CALL timestop(handle) 1336 1337 END SUBROUTINE dbcsr_work_destroy 1338 1339 SUBROUTINE dbcsr_work_destroy_all(m) 1340 !! Deallocates and destroys a work matrix. 1341 TYPE(dbcsr_type), INTENT(INOUT) :: m 1342 1343 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy_all', & 1344 routineP = moduleN//':'//routineN 1345 1346 INTEGER :: handle, i 1347 1348! --------------------------------------------------------------------------- 1349 1350 CALL timeset(routineN, handle) 1351 1352 IF (.NOT. ASSOCIATED(m%wms)) & 1353 DBCSR_WARN("Want to destroy nonexisting work matrices.") 1354 IF (ASSOCIATED(m%wms)) THEN 1355 DO i = 1, SIZE(m%wms) 1356 CALL dbcsr_work_destroy(m%wms(i)) 1357 ENDDO 1358 DEALLOCATE (m%wms) 1359 ENDIF 1360 1361 CALL timestop(handle) 1362 1363 END SUBROUTINE dbcsr_work_destroy_all 1364 1365 SUBROUTINE add_work_coordinate(matrix, row, col, blk, index) 1366 !! Adds a coordinate (or other data) into a work matrix's row_i and 1367 !! col_i arrays and returns its position. 1368 !! @note Uses the matrix%lastblk to keep track of the current position. 1369 1370 TYPE(dbcsr_work_type), INTENT(INOUT) :: matrix 1371 !! work matrix 1372 INTEGER, INTENT(IN) :: row, col 1373 !! row data to add 1374 !! col data to add 1375 INTEGER, INTENT(IN), OPTIONAL :: blk 1376 !! block pointer to add 1377 INTEGER, INTENT(OUT), OPTIONAL :: index 1378 !! saved position 1379 1380 CHARACTER(len=*), PARAMETER :: routineN = 'add_work_coordinate', & 1381 routineP = moduleN//':'//routineN 1382 1383 INTEGER :: handle 1384 1385! --------------------------------------------------------------------------- 1386 1387 IF (careful_mod) CALL timeset(routineN, handle) 1388 matrix%lastblk = matrix%lastblk + 1 1389 CALL ensure_array_size(matrix%row_i, ub=matrix%lastblk, & 1390 factor=default_resize_factor) 1391 CALL ensure_array_size(matrix%col_i, ub=matrix%lastblk, & 1392 factor=default_resize_factor) 1393 matrix%row_i(matrix%lastblk) = row 1394 matrix%col_i(matrix%lastblk) = col 1395 IF (PRESENT(blk)) THEN 1396 CALL ensure_array_size(matrix%blk_p, ub=matrix%lastblk, & 1397 factor=default_resize_factor) 1398 matrix%blk_p(matrix%lastblk) = blk 1399 ENDIF 1400 IF (PRESENT(index)) index = matrix%lastblk 1401 IF (careful_mod) CALL timestop(handle) 1402 END SUBROUTINE add_work_coordinate 1403 1404 SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, & 1405 sort_data) 1406 !! Merge data from matrix and work matrices into the final matrix. 1407 1408 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1409 !! matrix to work on 1410 INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p 1411 LOGICAL, INTENT(IN) :: sort_data 1412 !! whether data will be fully sorted 1413 1414 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all', & 1415 routineP = moduleN//':'//routineN 1416 LOGICAL, PARAMETER :: dbg = .FALSE. 1417 1418 INTEGER :: handle, my_row_count, nblks, & 1419 nrows, nwms, row, t, & 1420 which_wm, wm_datasize 1421 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: all_data_offsets, all_data_sizes, & 1422 new_blk_p_sorted, new_blk_sizes, & 1423 new_row_p 1424 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d, new_blk_p, new_col_i 1425 INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: my_row_p 1426 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs 1427 INTEGER, SAVE :: max_row_count, new_nblks = 0, new_nze = 0 1428 TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), & 1429 SAVE :: all_data_areas 1430 TYPE(dbcsr_work_type), POINTER :: wm 1431 TYPE(i_array_p), DIMENSION(:), POINTER, SAVE :: all_blk_p, all_col_i, all_row_p 1432 1433! --------------------------------------------------------------------------- 1434 1435 CALL timeset(routineN, handle) 1436 1437 ! Outline: 1438 ! Each thread has a work matrix. These must be merged and made 1439 ! into a new index. If sort_data is False, then the data areas 1440 ! are simply appended. This is probably quicker but the data 1441 ! blocks are not in order and the data size may expand beyond 1442 ! what is needed. If sort_data is True, then data blocks are 1443 ! sorted in order. 1444 1445 IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data 1446!$OMP BARRIER 1447 nrows = matrix%nblkrows_total 1448 nwms = SIZE(matrix%wms) 1449!$ IF (nwms /= OMP_GET_NUM_THREADS()) & 1450!$ DBCSR_ABORT("Number of threads does not match number of work matrices.") 1451 which_wm = 1 1452!$ which_wm = OMP_GET_THREAD_NUM() + 1 1453 wm => matrix%wms(which_wm) 1454 ! 1455 ! Currently B-tree-based work matrices are converted to array form. 1456 IF (dbcsr_wm_use_mutable(wm)) THEN 1457 SELECT CASE (wm%mutable%m%data_type) 1458 CASE (dbcsr_type_real_4) 1459 CALL tree_to_linear_s(wm) 1460 CASE (dbcsr_type_real_8) 1461 CALL tree_to_linear_d(wm) 1462 CASE (dbcsr_type_complex_4) 1463 CALL tree_to_linear_c(wm) 1464 CASE (dbcsr_type_complex_8) 1465 CALL tree_to_linear_z(wm) 1466 CASE default 1467 DBCSR_ABORT("Invalid data type") 1468 END SELECT 1469 ENDIF 1470 ! 1471 ! Initializations and some counts from the threads are summed. 1472 ! 1473!$OMP MASTER 1474 new_nblks = old_row_p(nrows + 1) 1475 new_nze = matrix%nze 1476 ALLOCATE (all_row_p(nwms)) 1477 ALLOCATE (all_col_i(nwms)) 1478 ALLOCATE (all_blk_p(nwms)) 1479 ALLOCATE (all_data_sizes(0:nwms)) 1480 ALLOCATE (all_data_offsets(nwms)) 1481 IF (sort_data) ALLOCATE (all_data_areas(0:nwms)) 1482 IF (sort_data) THEN 1483 CALL dbcsr_data_init(all_data_areas(0)) 1484 all_data_areas(0) = matrix%data_area 1485!$OMP CRITICAL (crit_data) 1486 CALL dbcsr_data_hold(all_data_areas(0)) 1487!$OMP END CRITICAL (crit_data) 1488 ENDIF 1489 ! The following is valid because the data actually referenced 1490 ! by blocks is explicitly calculated in dbcsr_finalize. 1491 all_data_sizes(0) = matrix%nze 1492 rbs => array_data(matrix%row_blk_size) 1493 cbs => array_data(matrix%col_blk_size) 1494!$OMP END MASTER 1495 ! 1496!$OMP BARRIER 1497 IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN 1498 wm_datasize = wm%datasize_after_filtering 1499 ELSE 1500 wm_datasize = wm%datasize 1501 ENDIF 1502 nblks = wm%lastblk 1503!$OMP ATOMIC 1504 new_nblks = new_nblks + nblks 1505!$OMP ATOMIC 1506 new_nze = new_nze + wm_datasize 1507!$OMP BARRIER 1508 ! 1509!$OMP MASTER 1510 ALLOCATE (new_row_p(nrows + 1)) 1511 ALLOCATE (new_col_i(new_nblks)) 1512 ALLOCATE (new_blk_p(new_nblks)) 1513 IF (sort_data) THEN 1514 ALLOCATE (blk_d(new_nblks)) 1515 ELSE 1516 ALLOCATE (blk_d(1)) 1517 ENDIF 1518!$OMP END MASTER 1519 ! 1520 ! Each thread creates a row_p index for its new blocks. This gives the 1521 ! number of new blocks per thread per row. 1522 CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) 1523 ALLOCATE (my_row_p(nrows + 1)) 1524 CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks) 1525 ! 1526 ! The threads must share their row_p, col_i, blk_p, and data areas 1527 ! among themselves. 1528 all_row_p(which_wm)%p => my_row_p 1529 all_data_sizes(which_wm) = wm_datasize 1530 IF (.NOT. sort_data) THEN 1531 IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) & 1532 DBCSR_ABORT("Data size mismatch.") 1533 ENDIF 1534 all_col_i(which_wm)%p => wm%col_i 1535 all_blk_p(which_wm)%p => wm%blk_p 1536!$OMP BARRIER 1537 IF (sort_data) THEN 1538!$OMP MASTER 1539 all_data_offsets(:) = 0 1540!$OMP END MASTER 1541 CALL dbcsr_data_init(all_data_areas(which_wm)) 1542 all_data_areas(which_wm) = wm%data_area 1543!$OMP CRITICAL (crit_data) 1544 CALL dbcsr_data_hold(all_data_areas(which_wm)) 1545!$OMP END CRITICAL (crit_data) 1546 ELSE 1547!$OMP MASTER 1548 all_data_offsets(1) = all_data_sizes(0) 1549 DO t = 2, nwms 1550 all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1) 1551 ENDDO 1552!$OMP END MASTER 1553 ENDIF 1554 ! 1555 ! The new row counts are created, then the new row_p index is created. 1556 ! 1557!$OMP DO 1558 DO row = 1, nrows 1559 my_row_count = old_row_p(row + 1) - old_row_p(row) 1560 DO t = 1, nwms 1561 my_row_count = my_row_count & 1562 & + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row) 1563 ENDDO 1564 new_row_p(row) = my_row_count 1565 ENDDO 1566!$OMP END DO 1567!$OMP MASTER 1568 max_row_count = MAXVAL(new_row_p(1:nrows)) 1569 CALL dbcsr_build_row_index(new_row_p, nrows) 1570!$OMP END MASTER 1571!$OMP BARRIER 1572 ! 1573 ! The new index is built 1574 CALL merge_index(new_row_p, new_col_i, new_blk_p, & 1575 blk_d, & 1576 old_row_p, old_col_i, old_blk_p, & 1577 all_row_p, all_col_i, all_blk_p, & 1578 all_data_offsets, nwms, nrows, max_row_count, sort_data) 1579 ! 1580 ! The data is then merged in one of two ways. 1581 ! 1582!$OMP MASTER 1583 IF (sort_data) THEN 1584 ! The matrix gets a fresh data area. Blocks from the work 1585 ! matrices will be copied into it in order. 1586 ! 1587!$OMP CRITICAL (crit_data) 1588 CALL dbcsr_data_release(matrix%data_area) 1589 CALL dbcsr_data_init(matrix%data_area) 1590!$OMP END CRITICAL (crit_data) 1591 CALL dbcsr_data_new(matrix%data_area, & 1592 data_size=new_nze, & 1593 data_type=dbcsr_data_get_type(all_data_areas(0)), & 1594 memory_type=dbcsr_data_get_memory_type(all_data_areas(0))) 1595 ALLOCATE (new_blk_p_sorted(new_nblks)) 1596 ALLOCATE (new_blk_sizes(new_nblks)) 1597 ELSE 1598 ! Data stored in the work matrices will be just appended to the 1599 ! current data area. 1600 CALL dbcsr_data_ensure_size(matrix%data_area, new_nze) 1601 ENDIF 1602!$OMP END MASTER 1603!$OMP BARRIER 1604 IF (sort_data) THEN 1605 CALL dbcsr_calc_block_sizes(new_blk_sizes, & 1606 new_row_p, new_col_i, rbs, cbs) 1607 CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, & 1608 new_blk_sizes, dsts=matrix%data_area, & 1609 src=all_data_areas(0), & 1610 srcs=all_data_areas, old_blk_d=blk_d) 1611 ELSE 1612 IF (all_data_sizes(which_wm) .GT. 0) THEN 1613 CALL dbcsr_data_copy(dst=matrix%data_area, & 1614 dst_lb=(/all_data_offsets(which_wm) + 1/), & 1615 dst_sizes=(/all_data_sizes(which_wm)/), & 1616 src=wm%data_area, & 1617 src_lb=(/1/), & 1618 src_sizes=(/all_data_sizes(which_wm)/)) 1619 ENDIF 1620 ENDIF 1621 ! 1622 ! Creates a new index array. 1623 ! 1624!$OMP BARRIER 1625!$OMP MASTER 1626 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) 1627 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) 1628 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) 1629 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 1630 DATA=new_row_p(1:nrows + 1), extra=new_nblks*2) 1631 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 1632 DATA=new_col_i(1:new_nblks)) 1633 IF (sort_data) THEN 1634 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1635 DATA=new_blk_p_sorted) 1636 ELSE 1637 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1638 DATA=new_blk_p) 1639 ENDIF 1640 matrix%nblks = new_nblks 1641 matrix%nze = new_nze 1642 matrix%index(dbcsr_slot_nblks) = matrix%nblks 1643 matrix%index(dbcsr_slot_nze) = matrix%nze 1644 CALL dbcsr_repoint_index(matrix) 1645!$OMP END MASTER 1646 ! 1647!$OMP BARRIER 1648 ! 1649 DEALLOCATE (my_row_p) 1650 IF (sort_data) THEN 1651!$OMP MASTER 1652!$OMP CRITICAL (crit_data) 1653 CALL dbcsr_data_release(all_data_areas(0)) 1654!$OMP END CRITICAL (crit_data) 1655 DO which_wm = 1, nwms 1656!$OMP CRITICAL (crit_data) 1657 CALL dbcsr_data_release(all_data_areas(which_wm)) 1658!$OMP END CRITICAL (crit_data) 1659 ENDDO 1660!$OMP END MASTER 1661 ENDIF 1662!$OMP BARRIER 1663!$OMP MASTER 1664 DEALLOCATE (all_row_p) 1665 DEALLOCATE (all_col_i) 1666 DEALLOCATE (all_blk_p) 1667 DEALLOCATE (new_row_p) 1668 DEALLOCATE (new_col_i) 1669 DEALLOCATE (new_blk_p) 1670 DEALLOCATE (all_data_sizes) 1671 DEALLOCATE (all_data_offsets) 1672 IF (sort_data) THEN 1673 DEALLOCATE (all_data_areas) 1674 DEALLOCATE (new_blk_sizes) 1675 DEALLOCATE (new_blk_p_sorted) 1676 ENDIF 1677 DEALLOCATE (blk_d) 1678!$OMP END MASTER 1679!$OMP BARRIER 1680 IF (dbg) WRITE (*, *) routineN//" stopped" 1681 CALL timestop(handle) 1682 END SUBROUTINE dbcsr_merge_all 1683 1684 SUBROUTINE dbcsr_merge_single_wm(matrix) 1685 !! Sort data from the WM into the final matrix, based closely on dbcsr_merge_all 1686 1687 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1688 !! matrix to work on 1689 1690 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_single_wm', & 1691 routineP = moduleN//':'//routineN 1692 LOGICAL, PARAMETER :: dbg = .FALSE. 1693 1694 INTEGER :: handle, nblks, nrows 1695 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: new_blk_p_sorted, new_blk_sizes, & 1696 new_row_p 1697 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d 1698 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs 1699 TYPE(dbcsr_work_type), POINTER :: wm 1700 1701! --------------------------------------------------------------------------- 1702 1703 CALL timeset(routineN, handle) 1704 1705 ! Outline: 1706 ! There is a single work matrix. Data blocks are sorted and copied 1707 ! into the matrix data area (which is empty). The index is made consistent 1708 1709 nrows = matrix%nblkrows_total 1710 wm => matrix%wms(1) 1711 nblks = wm%lastblk 1712 IF (dbcsr_wm_use_mutable(wm)) & 1713 DBCSR_ABORT("Number of threads does not match number of work matrices.") 1714 ! 1715 ! The following is valid because the data actually referenced 1716 ! by blocks is explicitly calculated in dbcsr_finalize. 1717!$OMP MASTER 1718 rbs => array_data(matrix%row_blk_size) 1719 cbs => array_data(matrix%col_blk_size) 1720 ! 1721 ! Initializations 1722 ! 1723 ALLOCATE (new_row_p(nrows + 1)) 1724 ALLOCATE (blk_d(nblks)) 1725 ALLOCATE (new_blk_p_sorted(nblks)) 1726 ALLOCATE (new_blk_sizes(nblks)) 1727 ! 1728 ! Master thread creates a row_p index for the (sorted) blocks. 1729 CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) 1730 CALL dbcsr_make_dbcsr_index(new_row_p, wm%row_i, nrows, nblks) 1731 ! 1732 ! 1733 ! The matrix data area is resized. Blocks from the work 1734 ! matrices will be copied into it in order. 1735 ! 1736 CALL dbcsr_data_ensure_size(matrix%data_area, wm%datasize, nocopy=.TRUE.) 1737!$OMP END MASTER 1738!$OMP BARRIER 1739 CALL dbcsr_calc_block_sizes(new_blk_sizes, & 1740 new_row_p, wm%col_i, rbs, cbs) 1741 CALL dbcsr_sort_data(new_blk_p_sorted, wm%blk_p, & 1742 new_blk_sizes, dsts=matrix%data_area, & 1743 src=wm%data_area) 1744 ! 1745 ! Creates a new index array. 1746 ! 1747!$OMP BARRIER 1748!$OMP MASTER 1749 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) 1750 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) 1751 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) 1752 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 1753 DATA=new_row_p(1:nrows + 1), extra=nblks*2) 1754 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 1755 DATA=wm%col_i(1:nblks)) 1756 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1757 DATA=new_blk_p_sorted) 1758 matrix%nblks = nblks 1759 matrix%nze = wm%datasize 1760 matrix%index(dbcsr_slot_nblks) = matrix%nblks 1761 matrix%index(dbcsr_slot_nze) = matrix%nze 1762 CALL dbcsr_repoint_index(matrix) 1763 DEALLOCATE (new_row_p) 1764 DEALLOCATE (new_blk_sizes) 1765 DEALLOCATE (new_blk_p_sorted) 1766 DEALLOCATE (blk_d) 1767!$OMP END MASTER 1768 IF (dbg) WRITE (*, *) routineN//" stopped" 1769 CALL timestop(handle) 1770 END SUBROUTINE dbcsr_merge_single_wm 1771 1772 SUBROUTINE merge_index(new_row_p, new_col_i, new_blk_p, & 1773 !! Builds a new index from several work matrices. 1774 blk_d, old_row_p, old_col_i, old_blk_p, & 1775 all_row_p, all_col_i, all_blk_p, & 1776 all_data_offsets, nwms, nrows, max_row_count, sort_data) 1777 INTEGER, DIMENSION(*), INTENT(IN) :: new_row_p 1778 INTEGER, DIMENSION(*), INTENT(OUT), TARGET :: new_col_i, new_blk_p 1779 INTEGER, DIMENSION(*), INTENT(IN), TARGET :: blk_d 1780 INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p 1781 TYPE(i_array_p), DIMENSION(*), INTENT(IN) :: all_row_p, all_col_i, all_blk_p 1782 INTEGER, DIMENSION(*), INTENT(IN) :: all_data_offsets 1783 INTEGER, INTENT(IN) :: nwms, nrows, max_row_count 1784 LOGICAL, INTENT(IN) :: sort_data 1785 1786 CHARACTER(len=*), PARAMETER :: routineN = 'merge_index', routineP = moduleN//':'//routineN 1787 1788 INTEGER :: blk1, blk2, error_handle, first_row_blk, & 1789 last_row_blk, row, src_blk_1, & 1790 src_blk_2, t 1791 INTEGER, ALLOCATABLE, DIMENSION(:) :: blk_p_buff, tmp_arr 1792 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: blk_span, col_span, d_span 1793 1794! --------------------------------------------------------------------------- 1795 1796 CALL timeset(routineN, error_handle) 1797 ! 1798 ALLOCATE (tmp_arr(max_row_count)) 1799 ALLOCATE (blk_p_buff(max_row_count)) 1800 ! 1801!$OMP DO 1802 DO row = 1, nrows 1803 first_row_blk = new_row_p(row) + 1 1804 last_row_blk = new_row_p(row + 1) 1805 col_span => new_col_i(first_row_blk:last_row_blk) 1806 blk_span => new_blk_p(first_row_blk:last_row_blk) 1807 IF (sort_data) d_span => blk_d(first_row_blk:last_row_blk) 1808 src_blk_1 = old_row_p(row) + 1 1809 src_blk_2 = old_row_p(row + 1) 1810 blk1 = 1 1811 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 1812 col_span(blk1:blk2) = old_col_i(src_blk_1:src_blk_2) 1813 blk_span(blk1:blk2) = old_blk_p(src_blk_1:src_blk_2) 1814 IF (sort_data) THEN 1815 d_span(blk1:blk2) = 1 1816 DO t = 1, nwms 1817 src_blk_1 = all_row_p(t)%p(row) + 1 1818 src_blk_2 = all_row_p(t)%p(row + 1) 1819 blk1 = blk2 + 1 1820 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 1821 col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2) 1822 blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) 1823 d_span(blk1:blk2) = t + 1 1824 ENDDO 1825 ELSE 1826 DO t = 1, nwms 1827 src_blk_1 = all_row_p(t)%p(row) + 1 1828 src_blk_2 = all_row_p(t)%p(row + 1) 1829 blk1 = blk2 + 1 1830 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 1831 col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2) 1832 blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) & 1833 & + all_data_offsets(t) 1834 ENDDO 1835 ENDIF 1836 CALL sort(col_span, SIZE(col_span), tmp_arr) 1837 blk_p_buff(1:SIZE(blk_span)) = blk_span(:) 1838 blk_span(1:SIZE(blk_span)) = blk_p_buff(tmp_arr(1:SIZE(blk_span))) 1839 ENDDO 1840!$OMP END DO 1841 ! 1842 DEALLOCATE (tmp_arr) 1843 DEALLOCATE (blk_p_buff) 1844 ! 1845 CALL timestop(error_handle) 1846 END SUBROUTINE merge_index 1847 1848#:include '../data/dbcsr.fypp' 1849#:for n, nametype1, base1, prec1, kind1, type1, dkind1 in inst_params_float 1850 SUBROUTINE tree_to_linear_${nametype1}$ (wm) 1851 !! Converts mutable data to linear (array) type. 1852 1853 USE dbcsr_btree, & 1854 ONLY: btree_2d_data_${nametype1}$ => btree_data_${nametype1}$p2d, & 1855 btree_destroy_${nametype1}$ => btree_delete, & 1856 btree_size_${nametype1}$ => btree_get_entries 1857 TYPE(dbcsr_work_type), INTENT(INOUT) :: wm 1858 !! work matrix to convert 1859 1860 CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_${nametype1}$', & 1861 routineP = moduleN//':'//routineN 1862 1863 INTEGER :: blk, blk_p, treesize, & 1864 error_handler, needed_size 1865 INTEGER(KIND=int_8), ALLOCATABLE, & 1866 DIMENSION(:) :: keys 1867 ${type1}$, DIMENSION(:), POINTER, CONTIGUOUS :: target_data 1868 ${type1}$, DIMENSION(:, :), POINTER :: block_2d 1869 TYPE(btree_2d_data_${nametype1}$), ALLOCATABLE, & 1870 DIMENSION(:) :: values 1871 1872! --------------------------------------------------------------------------- 1873 1874 CALL timeset(routineN, error_handler) 1875 ! srt = .TRUE. ! Not needed because of the copy 1876 treesize = btree_size_${nametype1}$ (wm%mutable%m%btree_${nametype1}$) 1877 IF (wm%lastblk .NE. treesize) & 1878 DBCSR_ABORT("Mismatch in number of blocks") 1879 ALLOCATE (keys(treesize), values(treesize)) 1880 CALL btree_destroy_${nametype1}$ (wm%mutable%m%btree_${nametype1}$, keys, values) 1881 CALL ensure_array_size(wm%row_i, ub=treesize) 1882 CALL ensure_array_size(wm%col_i, ub=treesize) 1883 CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, & 1884 wm%col_i) 1885 ! For now we also fill the data, sloooowly, but this should 1886 ! be avoided and the data should be copied directly from the 1887 ! source in the subroutine's main loop. 1888 CALL ensure_array_size(wm%blk_p, ub=treesize) 1889 needed_size = 0 1890 DO blk = 1, treesize 1891 block_2d => values(blk)%p 1892 needed_size = needed_size + SIZE(block_2d) 1893 ENDDO 1894 wm%datasize = needed_size 1895 CALL dbcsr_data_ensure_size(wm%data_area, & 1896 wm%datasize) 1897 target_data => dbcsr_get_data_p_${nametype1}$ (wm%data_area) 1898 blk_p = 1 1899 DO blk = 1, treesize 1900 block_2d => values(blk)%p 1901 IF (.NOT. values(blk)%tr) THEN 1902 wm%blk_p(blk) = blk_p 1903 ELSE 1904 wm%blk_p(blk) = -blk_p 1905 ENDIF 1906 CALL block_copy_${nametype1}$ (target_data, block_2d, & 1907 SIZE(block_2d), blk_p, 1) 1908 blk_p = blk_p + SIZE(block_2d) 1909 DEALLOCATE (block_2d) 1910 ENDDO 1911 DEALLOCATE (keys, values) 1912 CALL dbcsr_mutable_release(wm%mutable) 1913 CALL timestop(error_handler) 1914 END SUBROUTINE tree_to_linear_${nametype1}$ 1915 1916#:endfor 1917 1918END MODULE dbcsr_work_operations 1919