1!--------------------------------------------------------------------------------------------------! 2! Copyright (C) by the DBCSR developers group - All rights reserved ! 3! This file is part of the DBCSR library. ! 4! ! 5! For information on the license, see the LICENSE file. ! 6! For further information please visit https://dbcsr.cp2k.org ! 7! SPDX-License-Identifier: GPL-2.0+ ! 8!--------------------------------------------------------------------------------------------------! 9 10MODULE dbcsr_work_operations 11 !! DBCSR work matrix utilities 12 13 USE dbcsr_array_types, ONLY: array_data, & 14 array_hold, & 15 array_i1d_obj, & 16 array_new, & 17 array_nullify, & 18 array_release, & 19 array_size 20 USE dbcsr_btree, ONLY: btree_data_cp2d, & 21 btree_data_dp2d, & 22 btree_data_sp2d, & 23 btree_data_zp2d 24 USE dbcsr_block_operations, ONLY: block_copy_c, & 25 block_copy_d, & 26 block_copy_s, & 27 block_copy_z, & 28 dbcsr_data_copy, & 29 dbcsr_data_set 30 USE dbcsr_config, ONLY: default_resize_factor 31 USE dbcsr_data_methods, ONLY: & 32 dbcsr_data_clear_pointer, dbcsr_data_ensure_size, dbcsr_data_get_memory_type, & 33 dbcsr_data_get_size, dbcsr_data_get_size_referenced, dbcsr_data_get_type, dbcsr_data_hold, & 34 dbcsr_data_init, dbcsr_data_new, dbcsr_data_release, dbcsr_data_set_size_referenced, & 35 dbcsr_data_valid, dbcsr_get_data_p_c, dbcsr_get_data_p_d, dbcsr_get_data_p_s, & 36 dbcsr_get_data_p_z 37 USE dbcsr_data_operations, ONLY: dbcsr_data_copyall, & 38 dbcsr_sort_data, & 39 dbcsr_switch_data_area 40 USE dbcsr_dist_methods, ONLY: dbcsr_distribution_has_threads, & 41 dbcsr_distribution_hold, & 42 dbcsr_distribution_make_threads, & 43 dbcsr_distribution_ncols, & 44 dbcsr_distribution_nrows, & 45 dbcsr_distribution_release 46 USE dbcsr_index_operations, ONLY: dbcsr_addto_index_array, & 47 dbcsr_build_row_index, & 48 dbcsr_clearfrom_index_array, & 49 dbcsr_index_prune_deleted, & 50 dbcsr_make_dbcsr_index, & 51 dbcsr_make_index_exist, & 52 dbcsr_repoint_index, & 53 dbcsr_sort_indices 54 USE dbcsr_iterator_operations, ONLY: dbcsr_iterator_blocks_left, & 55 dbcsr_iterator_next_block, & 56 dbcsr_iterator_start, & 57 dbcsr_iterator_stop 58 USE dbcsr_mem_methods, ONLY: dbcsr_memtype_equal 59 USE dbcsr_methods, ONLY: & 60 dbcsr_col_block_sizes, dbcsr_distribution, dbcsr_get_data_memory_type, & 61 dbcsr_get_data_size_used, dbcsr_get_data_type, dbcsr_get_index_memory_type, & 62 dbcsr_get_matrix_type, dbcsr_get_replication_type, dbcsr_matrix_counter, & 63 dbcsr_max_col_size, dbcsr_max_row_size, dbcsr_mutable_destroy, dbcsr_mutable_init, & 64 dbcsr_mutable_instantiated, dbcsr_mutable_new, dbcsr_mutable_release, dbcsr_name, & 65 dbcsr_row_block_sizes, dbcsr_use_mutable, dbcsr_valid_index, dbcsr_wm_use_mutable 66 USE dbcsr_ptr_util, ONLY: ensure_array_size 67 USE dbcsr_toollib, ONLY: dbcsr_unpack_i8_2i4, & 68 sort 69 USE dbcsr_string_utilities, ONLY: uppercase 70 USE dbcsr_types, ONLY: & 71 dbcsr_data_obj, dbcsr_distribution_obj, dbcsr_iterator, dbcsr_memtype_default, & 72 dbcsr_memtype_type, dbcsr_meta_size, dbcsr_num_slots, dbcsr_repl_col, dbcsr_repl_full, & 73 dbcsr_repl_none, dbcsr_repl_row, dbcsr_slot_blk_p, dbcsr_slot_col_i, dbcsr_slot_home_coli, & 74 dbcsr_slot_home_pcol, dbcsr_slot_home_prow, dbcsr_slot_home_rowi, dbcsr_slot_home_vpcol, & 75 dbcsr_slot_home_vprow, dbcsr_slot_nblkcols_local, dbcsr_slot_nblkcols_total, & 76 dbcsr_slot_nblkrows_local, dbcsr_slot_nblkrows_total, dbcsr_slot_nblks, & 77 dbcsr_slot_nfullcols_local, dbcsr_slot_nfullcols_total, dbcsr_slot_nfullrows_local, & 78 dbcsr_slot_nfullrows_total, dbcsr_slot_nze, dbcsr_slot_row_p, dbcsr_slot_size, dbcsr_type, & 79 dbcsr_type_antihermitian, dbcsr_type_antisymmetric, dbcsr_type_complex_4, & 80 dbcsr_type_complex_8, dbcsr_type_hermitian, dbcsr_type_no_symmetry, dbcsr_type_real_4, & 81 dbcsr_type_real_8, dbcsr_type_real_default, dbcsr_type_symmetric, dbcsr_work_type 82 USE dbcsr_dist_util, ONLY: convert_sizes_to_offsets, & 83 dbcsr_calc_block_sizes, & 84 dbcsr_verify_matrix, & 85 meta_from_dist 86 USE dbcsr_kinds, ONLY: default_string_length, & 87 int_8, & 88 real_4, & 89 real_8 90#include "base/dbcsr_base_uses.f90" 91 92!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads, omp_in_parallel 93 94 IMPLICIT NONE 95 PRIVATE 96 97 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbcsr_work_operations' 98 99 LOGICAL, PARAMETER :: careful_mod = .FALSE. 100 101 PUBLIC :: dbcsr_create, dbcsr_work_create, dbcsr_finalize, dbcsr_special_finalize 102 PUBLIC :: dbcsr_add_wm_from_matrix, & 103 add_work_coordinate 104 PUBLIC :: dbcsr_work_destroy 105 106 INTERFACE dbcsr_create 107 MODULE PROCEDURE dbcsr_create_new, dbcsr_create_template 108 END INTERFACE 109 110 TYPE i_array_p 111 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: p 112 END TYPE i_array_p 113 114CONTAINS 115 116 SUBROUTINE dbcsr_create_new(matrix, name, dist, matrix_type, & 117 row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, & 118 nze, data_type, data_buffer, & 119 data_memory_type, index_memory_type, & 120 max_rbs, max_cbs, & 121 row_blk_offset, col_blk_offset, & 122 thread_dist, & 123 reuse, reuse_arrays, mutable_work, make_index, replication_type) 124 !! Creates a matrix, allocating the essentials. 125 !! 126 !! The matrix itself is allocated, as well as the essential parts of 127 !! the index. When passed the nze argument, the data is also allocated 128 !! to that size. 129 !! see dbcsr_types.F 130 131 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 132 !! new matrix 133 CHARACTER(len=*), INTENT(IN) :: name 134 TYPE(dbcsr_distribution_obj), INTENT(IN) :: dist 135 !! distribution_2d distribution 136 CHARACTER, INTENT(IN) :: matrix_type 137 !! 'N' for normal, 'T' for transposed, 'S' for symmetric, and 'A' for antisymmetric 138 INTEGER, DIMENSION(:), INTENT(IN), POINTER, & 139 CONTIGUOUS, OPTIONAL :: row_blk_size, col_blk_size 140 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_size_obj, col_blk_size_obj 141 INTEGER, INTENT(IN), OPTIONAL :: nze, data_type 142 !! number of elements 143 !! type of data from 'rRcC' for single/double precision real/complex, default is 'R' 144 TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL :: data_buffer 145 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: data_memory_type, index_memory_type 146 !! allocate indices and data using special memory 147 !! allocate indices using special memory 148 INTEGER, INTENT(IN), OPTIONAL :: max_rbs, max_cbs 149 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_offset, col_blk_offset 150 TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: thread_dist 151 LOGICAL, INTENT(IN), OPTIONAL :: reuse, reuse_arrays, mutable_work, & 152 make_index 153 !! reuses an existing matrix, default is to create a fresh one 154 !! uses the mutable data for working and not the append-only data; default is append-only 155 CHARACTER, INTENT(IN), OPTIONAL :: replication_type 156 !! replication to be used for this matrix; default is dbcsr_repl_none 157 158 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_new' 159 160 CHARACTER :: matrix_type_l 161 INTEGER :: handle, my_nze 162 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: vec_col_blk_offset, vec_row_blk_offset 163 INTEGER, DIMENSION(dbcsr_meta_size) :: new_meta 164 LOGICAL :: hijack, my_make_index 165 166! --------------------------------------------------------------------------- 167 168 MARK_USED(thread_dist) ! only used with OMP 169 170 CALL timeset(routineN, handle) 171 172 ! Reuse matrix only if has actually been allocated. 173 hijack = ASSOCIATED(matrix%index) 174 IF (PRESENT(reuse)) hijack = reuse 175 176 my_make_index = .TRUE. 177 IF (PRESENT(make_index)) my_make_index = make_index 178 179 IF (.NOT. hijack) THEN 180 matrix = dbcsr_type() 181 matrix%refcount = 1 182 ENDIF 183!$OMP CRITICAL (crit_counter) 184 matrix%serial_number = dbcsr_matrix_counter 185 dbcsr_matrix_counter = dbcsr_matrix_counter + 1 186!$OMP END CRITICAL (crit_counter) 187 ! Mark matrix index as having an invalid index. 188 matrix%valid = .FALSE. 189 matrix%name = name 190 ! Sets the type of matrix building/modifying work structures. 191 IF (PRESENT(mutable_work)) THEN 192 matrix%work_mutable = mutable_work 193 ELSE 194 matrix%work_mutable = .FALSE. 195 ENDIF 196 ! Sets the correct data type. 197 IF (PRESENT(data_type)) THEN 198 SELECT CASE (data_type) 199 CASE (dbcsr_type_real_4) 200 matrix%data_type = dbcsr_type_real_4 201 CASE (dbcsr_type_real_8) 202 matrix%data_type = dbcsr_type_real_8 203 CASE (dbcsr_type_complex_4) 204 matrix%data_type = dbcsr_type_complex_4 205 CASE (dbcsr_type_complex_8) 206 matrix%data_type = dbcsr_type_complex_8 207 CASE DEFAULT 208 DBCSR_ABORT("Invalid matrix type") 209 END SELECT 210 ELSE 211 matrix%data_type = dbcsr_type_real_default 212 ENDIF 213 214 matrix%data_memory_type = dbcsr_memtype_default 215 IF (PRESENT(data_memory_type)) & 216 matrix%data_memory_type = data_memory_type 217 218 matrix%index_memory_type = dbcsr_memtype_default 219 IF (PRESENT(index_memory_type)) & 220 matrix%index_memory_type = index_memory_type 221 222 IF (hijack) THEN 223 ! Release/deallocate elements that are replaced or not needed 224 ! by the new matrix. This is similar to what dbcsr_destroy 225 ! does, except that it keeps the index and data. 226 CALL array_release(matrix%row_blk_size) 227 CALL array_release(matrix%col_blk_size) 228 CALL array_release(matrix%row_blk_offset) 229 CALL array_release(matrix%col_blk_offset) 230 IF (matrix%has_local_rows) & 231 CALL array_release(matrix%local_rows) 232 IF (matrix%has_global_rows) & 233 CALL array_release(matrix%global_rows) 234 IF (matrix%has_local_cols) & 235 CALL array_release(matrix%local_cols) 236 IF (matrix%has_global_cols) & 237 CALL array_release(matrix%global_cols) 238 CALL dbcsr_distribution_release(matrix%dist) 239 IF (ASSOCIATED(matrix%wms)) THEN 240 CALL dbcsr_work_destroy_all(matrix) 241 ENDIF 242 CALL array_nullify(matrix%local_rows) 243 CALL array_nullify(matrix%global_rows) 244 CALL array_nullify(matrix%local_cols) 245 CALL array_nullify(matrix%global_cols) 246 ! 247 IF (matrix%data_type /= matrix%data_area%d%data_type) & 248 DBCSR_ABORT("Inconsistent data type for the existing buffer.") 249 CALL dbcsr_data_set_size_referenced(matrix%data_area, 0) 250 ELSE 251 ! Invalidate index 252 NULLIFY (matrix%index) 253 ! Invalidate data 254 IF (PRESENT(data_buffer)) THEN 255 IF (.NOT. dbcsr_data_valid(data_buffer)) & 256 DBCSR_ABORT("Input data buffer not valid.") 257 IF (matrix%data_type /= data_buffer%d%data_type) & 258 DBCSR_ABORT("Input buffer data type different by matrix data type.") 259 matrix%data_memory_type = data_buffer%d%memory_type 260 matrix%data_area = data_buffer 261 CALL dbcsr_data_hold(matrix%data_area) 262 ELSE 263 CALL dbcsr_data_init(matrix%data_area) 264 ENDIF 265 ENDIF 266 ! These are always invalidated. 267 NULLIFY (matrix%row_p, matrix%col_i, matrix%blk_p, matrix%thr_c, & 268 matrix%coo_l) 269 IF (PRESENT(row_blk_size_obj)) THEN 270 matrix%row_blk_size = row_blk_size_obj 271 CALL array_hold(matrix%row_blk_size) 272 ELSEIF (PRESENT(row_blk_size)) THEN 273 CALL array_new(matrix%row_blk_size, row_blk_size, gift=reuse_arrays) 274 ELSE 275 DBCSR_ABORT("Missing row_blk_size") 276 ENDIF 277 IF (PRESENT(max_rbs)) THEN 278 matrix%max_rbs = max_rbs 279 ELSE IF (array_size(matrix%row_blk_size) .GT. 0) THEN 280 matrix%max_rbs = MAXVAL(array_data(matrix%row_blk_size)) 281 ELSE 282 matrix%max_rbs = 0 283 ENDIF 284 IF (PRESENT(col_blk_size_obj)) THEN 285 matrix%col_blk_size = col_blk_size_obj 286 CALL array_hold(matrix%col_blk_size) 287 ELSEIF (PRESENT(col_blk_size)) THEN 288 CALL array_new(matrix%col_blk_size, col_blk_size, gift=reuse_arrays) 289 ELSE 290 DBCSR_ABORT("Missing col_blk_size") 291 ENDIF 292 IF (PRESENT(max_cbs)) THEN 293 matrix%max_cbs = max_cbs 294 ELSE IF (array_size(matrix%col_blk_size) .GT. 0) THEN 295 matrix%max_cbs = MAXVAL(array_data(matrix%col_blk_size)) 296 ELSE 297 matrix%max_cbs = 0 298 ENDIF 299 ! 300 IF (array_size(matrix%row_blk_size) /= dbcsr_distribution_nrows(dist)) & 301 DBCSR_ABORT("Number of blocked rows does match blocked row distribution.") 302 IF (array_size(matrix%col_blk_size) /= dbcsr_distribution_ncols(dist)) & 303 DBCSR_ABORT("Number of blocked columns does match blocked column distribution.") 304 ! initialize row/col offsets 305 IF (PRESENT(row_blk_offset)) THEN 306 IF (dbcsr_distribution_nrows(dist) + 1 /= array_size(row_blk_offset)) & 307 CALL dbcsr_abort(__LOCATION__, & 308 "Number of blocked offset rows does match blocked row distribution.") 309 matrix%row_blk_offset = row_blk_offset 310 CALL array_hold(matrix%row_blk_offset) 311 ELSE 312 ALLOCATE (vec_row_blk_offset(array_size(matrix%row_blk_size) + 1)) 313 CALL convert_sizes_to_offsets(array_data(matrix%row_blk_size), vec_row_blk_offset) 314 CALL array_new(matrix%row_blk_offset, vec_row_blk_offset, gift=.TRUE.) 315 ENDIF 316 317 IF (PRESENT(col_blk_offset)) THEN 318 IF (dbcsr_distribution_ncols(dist) + 1 /= array_size(col_blk_offset)) & 319 CALL dbcsr_abort(__LOCATION__, & 320 "Number of blocked offset columns does match blocked column distribution.") 321 matrix%col_blk_offset = col_blk_offset 322 CALL array_hold(matrix%col_blk_offset) 323 ELSE 324 ALLOCATE (vec_col_blk_offset(array_size(matrix%col_blk_size) + 1)) 325 CALL convert_sizes_to_offsets(array_data(matrix%col_blk_size), vec_col_blk_offset) 326 CALL array_new(matrix%col_blk_offset, vec_col_blk_offset, gift=.TRUE.) 327 ENDIF 328 329 matrix%dist = dist 330 CALL dbcsr_distribution_hold(matrix%dist) 331!$ IF (.NOT. dbcsr_distribution_has_threads(matrix%dist) .AND. PRESENT(thread_dist)) THEN 332!$ IF (dbcsr_distribution_has_threads(thread_dist)) THEN 333!$ matrix%dist%d%num_threads = thread_dist%d%num_threads 334!$ matrix%dist%d%has_thread_dist = .TRUE. 335!$ matrix%dist%d%thread_dist = thread_dist%d%thread_dist 336!$ CALL array_hold(matrix%dist%d%thread_dist) 337!$ ENDIF 338!$ ENDIF 339!$ IF (.NOT. dbcsr_distribution_has_threads(matrix%dist)) THEN 340!$ CALL dbcsr_distribution_make_threads(matrix%dist, & 341!$ array_data(matrix%row_blk_size)) 342!$ ENDIF 343 ! Set up some data. 344 IF (my_make_index) THEN 345 CALL meta_from_dist(new_meta, dist, array_data(matrix%row_blk_size), & 346 array_data(matrix%col_blk_size)) 347 matrix%nblkrows_total = new_meta(dbcsr_slot_nblkrows_total) 348 matrix%nblkcols_total = new_meta(dbcsr_slot_nblkcols_total) 349 matrix%nfullrows_total = new_meta(dbcsr_slot_nfullrows_total) 350 matrix%nfullcols_total = new_meta(dbcsr_slot_nfullcols_total) 351 matrix%nblkrows_local = new_meta(dbcsr_slot_nblkrows_local) 352 matrix%nblkcols_local = new_meta(dbcsr_slot_nblkcols_local) 353 matrix%nfullrows_local = new_meta(dbcsr_slot_nfullrows_local) 354 matrix%nfullcols_local = new_meta(dbcsr_slot_nfullcols_local) 355 ENDIF 356 my_nze = 0; IF (PRESENT(nze)) my_nze = nze 357 matrix%nblks = 0 358 matrix%nze = 0 359 360 IF (PRESENT(replication_type)) THEN 361 IF (replication_type .NE. dbcsr_repl_none & 362 .AND. replication_type .NE. dbcsr_repl_full & 363 .AND. replication_type .NE. dbcsr_repl_row & 364 .AND. replication_type .NE. dbcsr_repl_col) & 365 DBCSR_ABORT("Invalid replication type '"//replication_type//"'") 366 IF (replication_type .EQ. dbcsr_repl_row .OR. replication_type .EQ. dbcsr_repl_col) & 367 DBCSR_WARN("Row and column replication not fully supported") 368 matrix%replication_type = replication_type 369 ELSE 370 matrix%replication_type = dbcsr_repl_none 371 ENDIF 372 ! 373 ! Setup a matrix from scratch 374 IF (.NOT. hijack) THEN 375 IF (.NOT. PRESENT(data_buffer)) THEN 376 CALL dbcsr_data_new(matrix%data_area, matrix%data_type, my_nze, & 377 memory_type=matrix%data_memory_type) 378 CALL dbcsr_data_set_size_referenced(matrix%data_area, 0) 379 ENDIF 380 ! 381 IF (my_make_index) THEN 382 NULLIFY (matrix%index) 383 CALL ensure_array_size(matrix%index, lb=1, ub=dbcsr_num_slots, & 384 zero_pad=.TRUE., memory_type=matrix%index_memory_type) 385 ENDIF 386 ENDIF 387 IF (my_make_index) THEN 388 IF (LBOUND(matrix%index, 1) .GT. 1 & 389 .OR. UBOUND(matrix%index, 1) .LT. dbcsr_num_slots) & 390 DBCSR_ABORT("Index is not large enough") 391 matrix%index(1:dbcsr_num_slots) = 0 392 matrix%index(1:dbcsr_meta_size) = new_meta(1:dbcsr_meta_size) 393 matrix%index(dbcsr_slot_size) = dbcsr_num_slots 394 ENDIF 395 ! 396 matrix%symmetry = .FALSE. 397 matrix%negate_real = .FALSE. 398 matrix%negate_imaginary = .FALSE. 399 !matrix%transpose = .FALSE. 400 matrix_type_l = matrix_type 401 CALL uppercase(matrix_type_l) 402 SELECT CASE (matrix_type_l) 403 CASE (dbcsr_type_no_symmetry) 404 CASE (dbcsr_type_symmetric) 405 matrix%symmetry = .TRUE. 406 CASE (dbcsr_type_antisymmetric) 407 matrix%symmetry = .TRUE. 408 matrix%negate_real = .TRUE. 409 matrix%negate_imaginary = .TRUE. 410 CASE (dbcsr_type_hermitian) 411 matrix%symmetry = .TRUE. 412 matrix%negate_imaginary = .TRUE. 413 CASE (dbcsr_type_antihermitian) 414 matrix%symmetry = .TRUE. 415 matrix%negate_real = .TRUE. 416 CASE DEFAULT 417 DBCSR_ABORT("Invalid matrix type.") 418 END SELECT 419 matrix%bcsc = .FALSE. 420 matrix%local_indexing = .FALSE. 421 matrix%list_indexing = .FALSE. 422 CALL array_nullify(matrix%local_rows) 423 CALL array_nullify(matrix%global_rows) 424 CALL array_nullify(matrix%local_cols) 425 CALL array_nullify(matrix%global_cols) 426 matrix%has_local_rows = .FALSE. 427 matrix%has_global_rows = .FALSE. 428 matrix%has_local_cols = .FALSE. 429 matrix%has_global_cols = .FALSE. 430 IF (my_make_index) THEN 431 CALL dbcsr_make_index_exist(matrix) 432 ENDIF 433 matrix%valid = .TRUE. 434 CALL timestop(handle) 435 END SUBROUTINE dbcsr_create_new 436 437 SUBROUTINE dbcsr_create_template(matrix, template, name, dist, matrix_type, & 438 row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, & 439 nze, data_type, & 440 data_buffer, data_memory_type, index_memory_type, & 441 max_rbs, max_cbs, & 442 row_blk_offset, col_blk_offset, & 443 reuse_arrays, mutable_work, make_index, replication_type) 444 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 445 TYPE(dbcsr_type), INTENT(IN) :: template 446 CHARACTER(len=*), INTENT(IN), OPTIONAL :: name 447 TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: dist 448 CHARACTER, INTENT(IN), OPTIONAL :: matrix_type 449 INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL, & 450 POINTER, CONTIGUOUS :: row_blk_size, col_blk_size 451 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_size_obj, col_blk_size_obj 452 INTEGER, INTENT(IN), OPTIONAL :: nze, data_type 453 TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL :: data_buffer 454 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: data_memory_type, index_memory_type 455 INTEGER, INTENT(IN), OPTIONAL :: max_rbs, max_cbs 456 TYPE(array_i1d_obj), INTENT(IN), OPTIONAL :: row_blk_offset, col_blk_offset 457 LOGICAL, INTENT(IN), OPTIONAL :: reuse_arrays, mutable_work, make_index 458 CHARACTER, INTENT(IN), OPTIONAL :: replication_type 459 460 CHARACTER :: new_matrix_type, new_replication_type 461 CHARACTER(len=default_string_length) :: new_name 462 INTEGER :: new_data_type, new_max_cbs, new_max_rbs 463 LOGICAL :: my_make_index, new_mutable_work 464 TYPE(array_i1d_obj) :: new_col_blk_offset, new_row_blk_offset 465 TYPE(dbcsr_distribution_obj) :: new_dist 466 TYPE(dbcsr_memtype_type) :: new_data_memory_type, & 467 new_index_memory_type 468 469 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: blk_size 470 471! --------------------------------------------------------------------------- 472 473 IF (PRESENT(name)) THEN 474 new_name = TRIM(name) 475 ELSE 476 new_name = TRIM(dbcsr_name(template)) 477 ENDIF 478 IF (PRESENT(dist)) THEN 479 new_dist = dist 480 ELSE 481 new_dist = dbcsr_distribution(template) 482 ENDIF 483 IF (PRESENT(matrix_type)) THEN 484 new_matrix_type = matrix_type 485 ELSE 486 new_matrix_type = dbcsr_get_matrix_type(template) 487 ENDIF 488 ! 489 IF ((PRESENT(row_blk_size) .NEQV. PRESENT(col_blk_size)) .OR. & 490 (PRESENT(row_blk_size_obj) .NEQV. PRESENT(col_blk_size_obj))) THEN 491 DBCSR_ABORT("Both row_blk_size and col_blk_size must be provided") 492 ENDIF 493 ! 494 IF (PRESENT(max_rbs)) new_max_rbs = max_rbs 495 IF (PRESENT(row_blk_offset)) new_row_blk_offset = row_blk_offset 496 NULLIFY (blk_size) 497 IF (PRESENT(row_blk_size_obj)) THEN 498 blk_size => array_data(row_blk_size_obj) 499 ELSEIF (PRESENT(row_blk_size)) THEN 500 blk_size => row_blk_size 501 ENDIF 502 IF (ASSOCIATED(blk_size)) THEN 503 IF (.NOT. PRESENT(max_rbs)) & 504 new_max_rbs = MAXVAL(blk_size) 505 ELSE 506 IF (.NOT. PRESENT(max_rbs)) & 507 new_max_rbs = dbcsr_max_row_size(template) 508 IF (.NOT. PRESENT(row_blk_offset)) & 509 new_row_blk_offset = template%row_blk_offset 510 ENDIF 511 ! 512 IF (PRESENT(max_cbs)) new_max_cbs = max_cbs 513 IF (PRESENT(col_blk_offset)) new_col_blk_offset = col_blk_offset 514 NULLIFY (blk_size) 515 IF (PRESENT(col_blk_size_obj)) THEN 516 blk_size => array_data(col_blk_size_obj) 517 ELSEIF (PRESENT(col_blk_size)) THEN 518 blk_size => col_blk_size 519 ENDIF 520 IF (ASSOCIATED(blk_size)) THEN 521 IF (.NOT. PRESENT(max_cbs)) & 522 new_max_cbs = MAXVAL(blk_size) 523 ELSE 524 IF (.NOT. PRESENT(max_cbs)) & 525 new_max_cbs = dbcsr_max_col_size(template) 526 IF (.NOT. PRESENT(col_blk_offset)) & 527 new_col_blk_offset = template%col_blk_offset 528 ENDIF 529 IF (PRESENT(data_type)) THEN 530 new_data_type = data_type 531 ELSE 532 new_data_type = dbcsr_get_data_type(template) 533 ENDIF 534 IF (PRESENT(data_memory_type)) THEN 535 new_data_memory_type = data_memory_type 536 ELSE 537 new_data_memory_type = dbcsr_get_data_memory_type(template) 538 ENDIF 539 IF (PRESENT(index_memory_type)) THEN 540 new_index_memory_type = index_memory_type 541 ELSE 542 new_index_memory_type = dbcsr_get_index_memory_type(template) 543 ENDIF 544 IF (PRESENT(replication_type)) THEN 545 new_replication_type = replication_type 546 ELSE 547 new_replication_type = dbcsr_get_replication_type(template) 548 ENDIF 549 IF (PRESENT(mutable_work)) THEN 550 new_mutable_work = mutable_work 551 ELSE 552 new_mutable_work = dbcsr_use_mutable(template) 553 ENDIF 554 IF (PRESENT(row_blk_size_obj)) THEN 555 CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & 556 matrix_type=new_matrix_type, & 557 row_blk_size_obj=row_blk_size_obj, & 558 col_blk_size_obj=col_blk_size_obj, & 559 nze=nze, & 560 data_type=new_data_type, & 561 data_buffer=data_buffer, & 562 data_memory_type=new_data_memory_type, & 563 index_memory_type=new_index_memory_type, & 564 max_rbs=new_max_rbs, max_cbs=new_max_cbs, & 565 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, & 566 reuse_arrays=reuse_arrays, & 567 mutable_work=new_mutable_work, & 568 make_index=make_index, & 569 replication_type=new_replication_type) 570 ELSEIF (PRESENT(row_blk_size)) THEN 571 CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & 572 matrix_type=new_matrix_type, & 573 row_blk_size=row_blk_size, & 574 col_blk_size=col_blk_size, & 575 nze=nze, & 576 data_type=new_data_type, & 577 data_buffer=data_buffer, & 578 data_memory_type=new_data_memory_type, & 579 index_memory_type=new_index_memory_type, & 580 max_rbs=new_max_rbs, max_cbs=new_max_cbs, & 581 row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, & 582 reuse_arrays=reuse_arrays, & 583 mutable_work=new_mutable_work, & 584 make_index=make_index, & 585 replication_type=new_replication_type) 586 ELSE 587 CALL dbcsr_create(matrix, name=new_name, dist=new_dist, & 588 matrix_type=new_matrix_type, & 589 row_blk_size_obj=template%row_blk_size, & 590 col_blk_size_obj=template%col_blk_size, & 591 nze=nze, & 592 data_type=new_data_type, & 593 data_buffer=data_buffer, & 594 data_memory_type=new_data_memory_type, & 595 index_memory_type=new_index_memory_type, & 596 max_rbs=new_max_rbs, max_cbs=new_max_cbs, & 597 row_blk_offset=new_row_blk_offset, col_blk_offset=new_col_blk_offset, & 598 thread_dist=dbcsr_distribution(template), & 599 reuse_arrays=reuse_arrays, & 600 mutable_work=new_mutable_work, & 601 make_index=make_index, & 602 replication_type=new_replication_type) 603 ENDIF 604 ! Copy stuff from the meta-array. These are not normally needed, 605 ! but have to be here for creating matrices from "image" matrices. 606 my_make_index = .TRUE. 607 IF (PRESENT(make_index)) my_make_index = make_index 608 IF (my_make_index) THEN 609 matrix%index(dbcsr_slot_home_prow) = template%index(dbcsr_slot_home_prow) 610 matrix%index(dbcsr_slot_home_rowi) = template%index(dbcsr_slot_home_rowi) 611 matrix%index(dbcsr_slot_home_pcol) = template%index(dbcsr_slot_home_pcol) 612 matrix%index(dbcsr_slot_home_coli) = template%index(dbcsr_slot_home_coli) 613 matrix%index(dbcsr_slot_home_vprow) = template%index(dbcsr_slot_home_vprow) 614 matrix%index(dbcsr_slot_home_vpcol) = template%index(dbcsr_slot_home_vpcol) 615 ENDIF 616 IF (PRESENT(row_blk_size) .AND. .NOT. PRESENT(row_blk_offset)) THEN 617 CALL array_release(new_row_blk_offset) 618 ENDIF 619 IF (PRESENT(col_blk_size) .AND. .NOT. PRESENT(col_blk_offset)) THEN 620 CALL array_release(new_col_blk_offset) 621 ENDIF 622 623 END SUBROUTINE dbcsr_create_template 624 625 SUBROUTINE dbcsr_init_wm(wm, data_type, nblks_guess, sizedata_guess, memory_type) 626 !! Initializes one work matrix 627 628 TYPE(dbcsr_work_type), INTENT(OUT) :: wm 629 !! initialized work matrix 630 INTEGER, INTENT(IN) :: data_type 631 INTEGER, INTENT(IN), OPTIONAL :: nblks_guess, sizedata_guess 632 !! estimated number of blocks 633 !! estimated size of data 634 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: memory_type 635 636 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_init_wm' 637 INTEGER :: handle, nblks, stat 638 639! --------------------------------------------------------------------------- 640 641 IF (careful_mod) CALL timeset(routineN, handle) 642 wm%lastblk = 0 643 wm%datasize = 0 644 ! Index 645 IF (PRESENT(nblks_guess)) THEN 646 nblks = nblks_guess 647 IF (nblks_guess < 0) & 648 DBCSR_ABORT("Can not have negative block count.") 649 ALLOCATE (wm%row_i(nblks), stat=stat) 650 IF (stat /= 0) DBCSR_ABORT("wm%row_i") 651 ALLOCATE (wm%col_i(nblks), stat=stat) 652 IF (stat /= 0) DBCSR_ABORT("wm%col_i") 653 ALLOCATE (wm%blk_p(nblks), stat=stat) 654 IF (stat /= 0) DBCSR_ABORT("wm%blk_p") 655 ELSE 656 NULLIFY (wm%row_i, wm%col_i, wm%blk_p) 657 !nblks = CEILING (REAL (matrix%nblkrows_local * matrix%nblkcols_local)& 658 ! / REAL (dbcsr_mp_numnodes (dbcsr_distribution_mp (matrix%dist)))) 659 ENDIF 660 ! Data 661 CALL dbcsr_data_init(wm%data_area) 662 IF (PRESENT(sizedata_guess)) THEN 663 IF (sizedata_guess < 0) & 664 DBCSR_ABORT("Can not have negative data size.") 665 CALL dbcsr_data_new(wm%data_area, data_type, & 666 data_size=sizedata_guess, memory_type=memory_type) 667 ELSE 668 CALL dbcsr_data_new(wm%data_area, data_type, memory_type=memory_type) 669 ENDIF 670 CALL dbcsr_mutable_init(wm%mutable) 671 IF (careful_mod) CALL timestop(handle) 672 END SUBROUTINE dbcsr_init_wm 673 674 SUBROUTINE dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, & 675 work_mutable, memory_type) 676 !! Creates a the working matrix(es) for a DBCSR matrix. 677 678 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 679 !! new matrix 680 INTEGER, INTENT(IN), OPTIONAL :: nblks_guess, sizedata_guess, n 681 !! estimated number of blocks 682 !! estimated size of data 683 !! number work matrices to create, default is 1 684 LOGICAL, INTENT(in), OPTIONAL :: work_mutable 685 !! use mutable work type, default is what was specified in create 686 TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL :: memory_type 687 688 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_create' 689 690 INTEGER :: handle, iw, nw, ow 691 LOGICAL :: wms_new, wms_realloc 692 TYPE(dbcsr_work_type), DIMENSION(:), POINTER :: wms 693 694! --------------------------------------------------------------------------- 695 696 CALL timeset(routineN, handle) 697 IF (PRESENT(n)) THEN 698 nw = n 699 ELSE 700 nw = 1 701!$ IF (omp_in_parallel()) THEN 702!$ nw = omp_get_num_threads() 703!$ ELSE 704!$ nw = omp_get_max_threads() 705!$ ENDIF 706 ENDIF 707!$OMP MASTER 708 wms_new = .NOT. ASSOCIATED(matrix%wms) 709 wms_realloc = .FALSE. 710 IF (ASSOCIATED(matrix%wms)) THEN 711 ow = SIZE(matrix%wms) 712 IF (ow .LT. nw) & 713 DBCSR_WARN("Number of work matrices less than threads.") 714 IF (ow .LT. nw) wms_realloc = .TRUE. 715 ENDIF 716 IF (PRESENT(work_mutable)) THEN 717 matrix%work_mutable = work_mutable 718 ENDIF 719 IF (wms_realloc) THEN 720 ALLOCATE (wms(nw)) 721 wms(1:ow) = matrix%wms(1:ow) 722 DEALLOCATE (matrix%wms) 723 matrix%wms => wms 724 DO iw = ow + 1, nw 725 CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, & 726 nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, & 727 memory_type=memory_type) 728 IF (matrix%work_mutable) & 729 CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, & 730 dbcsr_get_data_type(matrix)) 731 END DO 732 ENDIF 733 IF (wms_new) THEN 734 ALLOCATE (matrix%wms(nw)) 735 DO iw = 1, nw 736 CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, & 737 nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, & 738 memory_type=memory_type) 739 IF (matrix%work_mutable) & 740 CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, & 741 dbcsr_get_data_type(matrix)) 742 END DO 743 ENDIF 744 matrix%valid = .FALSE. 745!$OMP END MASTER 746 CALL timestop(handle) 747 END SUBROUTINE dbcsr_work_create 748 749 SUBROUTINE dbcsr_finalize(matrix, reshuffle) 750 !! Creates the final dbcsr_type matrix from the working matrix. 751 !! Work matrices (array or tree-based) are merged into the base DBCSR matrix. 752 !! If a matrix is marked as having a valid index, then nothing is done. 753 !! Deleted blocks are pruned from the index. 754 755 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 756 !! final matrix 757 LOGICAL, INTENT(IN), OPTIONAL :: reshuffle 758 !! whether the data should be reshuffled, default is false 759 760 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_finalize' 761 LOGICAL, PARAMETER :: dbg = .FALSE. 762 763 INTEGER :: handle, i, nblks, nwms, start_offset 764 INTEGER, ALLOCATABLE, DIMENSION(:) :: empty_row_p 765 INTEGER, DIMENSION(:), POINTER, SAVE :: old_blk_p, old_col_i, old_row_p 766 LOGICAL :: can_quick, fake_row_p, sort_data, spawn 767 768! --------------------------------------------------------------------------- 769 770 CALL timeset(routineN, handle) 771 772 NULLIFY (old_blk_p, old_col_i, old_row_p) 773 774!$OMP BARRIER 775 ! If the matrix is not marked as dirty then skip the work. 776 IF (dbcsr_valid_index(matrix)) THEN 777 !"No need to finalize a valid matrix, skipping." 778 ! 779 ! A matrix with a valid index should not have associated work 780 ! arrays. This may happen when this routine is called on a 781 ! matrix that was not changed. 782!$OMP BARRIER 783!$OMP MASTER 784 IF (ASSOCIATED(matrix%wms)) & 785 CALL dbcsr_work_destroy_all(matrix) 786 matrix%valid = .TRUE. 787!$OMP END MASTER 788!$OMP BARRIER 789 CALL timestop(handle) 790 RETURN 791 ENDIF 792 ! 793 ! If possible, data copying is avoided. 794 IF (PRESENT(reshuffle)) THEN 795 sort_data = reshuffle 796 ELSE 797 sort_data = .FALSE. 798 ENDIF 799 ! 800 ! Now make sure that a valid row_p exists. Also clear the row_p if 801 ! the matrix is declared to have 0 blocks. 802!$OMP MASTER 803 fake_row_p = .NOT. ASSOCIATED(matrix%row_p) 804 IF (ASSOCIATED(matrix%row_p)) THEN 805 fake_row_p = SIZE(matrix%row_p) .LE. 1 806 ENDIF 807 fake_row_p = fake_row_p .OR. matrix%nblks .EQ. 0 808!$OMP END MASTER 809 ! 810 ! See where data will be appended in the main data 811 ! area. Alternatively, set to the start if the matrix is declared 812 ! to have no data. (This value is ignored if reshuffle is true 813 ! because the main data area is always new.) 814 start_offset = matrix%nze 815 i = dbcsr_get_data_size_used(matrix) 816!$OMP MASTER 817 matrix%nze = 0 818!$OMP END MASTER 819!$OMP BARRIER 820!$OMP ATOMIC 821 matrix%nze = matrix%nze + i 822!$OMP BARRIER 823 IF (dbg) THEN 824 WRITE (*, *) routineN//" sizes", matrix%nze, i, & 825 dbcsr_data_get_size_referenced(matrix%data_area), & 826 dbcsr_data_get_size(matrix%data_area) 827 ENDIF 828 IF (.FALSE. .AND. dbcsr_data_get_size_referenced(matrix%data_area) .NE. & 829 matrix%nze) THEN 830 IF (matrix%nze .NE. dbcsr_data_get_size_referenced(matrix%data_area)) & 831 DBCSR_WARN("Should reshuffle.") 832 IF (ASSOCIATED(matrix%wms)) THEN 833 sort_data = .NOT. dbcsr_wm_use_mutable(matrix%wms(1)) 834 ENDIF 835 ENDIF 836 IF (sort_data .AND. matrix%nze .GT. 0) THEN 837 CALL dbcsr_add_wm_from_matrix(matrix) 838 matrix%nze = 0 839!$OMP MASTER 840 fake_row_p = .TRUE. 841!$OMP END MASTER 842 ENDIF 843 start_offset = dbcsr_data_get_size_referenced(matrix%data_area) + 1 844 IF (matrix%nze .EQ. 0) start_offset = 1 845!$OMP MASTER 846 matrix%index(dbcsr_slot_nze) = matrix%nze 847 IF (fake_row_p) THEN 848 ALLOCATE (empty_row_p(matrix%nblkrows_total + 1)) 849 empty_row_p(:) = 0 850 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 851 DATA=empty_row_p, extra=0) 852 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 853 reservation=0) 854 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 855 reservation=0) 856 CALL dbcsr_repoint_index(matrix) 857 ENDIF 858!$OMP END MASTER 859 ! 860!$OMP BARRIER 861 can_quick = can_quickly_finalize(matrix) 862!$OMP BARRIER 863 ! If the matrix, work matrices, and environment fit several 864 ! criteria, then a quick O(1) finalization is performed. 865 IF (can_quick .AND. .NOT. sort_data) THEN 866 CALL quick_finalize(matrix) 867 ELSE 868 ! 869!$OMP MASTER 870 ! 871 ! Create work matrices if not yet existing 872 IF (.NOT. ASSOCIATED(matrix%wms)) THEN 873 nwms = 1 874!$ nwms = omp_get_num_threads() 875 CALL dbcsr_work_create(matrix, n=nwms) 876 ENDIF 877!$OMP END MASTER 878!$OMP BARRIER 879 ! 880 ! Ensure index arrays at least exist. 881!$OMP DO SCHEDULE (STATIC, 1) 882 DO i = 1, SIZE(matrix%wms) 883 IF (.NOT. ASSOCIATED(matrix%wms(i)%row_i)) THEN 884 CALL ensure_array_size(matrix%wms(i)%row_i, ub=0) 885 ENDIF 886 IF (.NOT. ASSOCIATED(matrix%wms(i)%col_i)) THEN 887 CALL ensure_array_size(matrix%wms(i)%col_i, ub=0) 888 ENDIF 889 IF (.NOT. ASSOCIATED(matrix%wms(i)%blk_p)) THEN 890 CALL ensure_array_size(matrix%wms(i)%blk_p, ub=0) 891 ENDIF 892 ENDDO 893!$OMP ENDDO 894 ! 895 ! Check for deleted blocks 896!$OMP MASTER 897 nblks = matrix%row_p(matrix%nblkrows_total + 1) 898 IF (ANY(matrix%blk_p(1:nblks) .EQ. 0)) THEN 899 CALL dbcsr_index_prune_deleted(matrix) 900 ENDIF 901 old_row_p => matrix%row_p 902 old_col_i => matrix%col_i 903 old_blk_p => matrix%blk_p 904!$OMP END MASTER 905 ! 906!$OMP BARRIER 907 ! Check to see if we will need to create a parallel environment 908 ! (needed when there are multiple work matrices but we are not 909 ! in an OpenMP parallel section.) 910 ! 911 ! A parallel section is created is used when the matrix has 912 ! more work matrices. It's a shortcut when the finalize is 913 ! called from a non-parallel environment whereas the matrix was 914 ! built/modified in a parallel environment 915 nwms = SIZE(matrix%wms) 916 spawn = .FALSE. 917!$ IF (.NOT. OMP_IN_PARALLEL()) THEN 918!$ IF (nwms .GT. 1) spawn = .TRUE. 919!$ ENDIF 920 IF (spawn) THEN 921!$OMP PARALLEL IF (spawn) & 922!$OMP DEFAULT (NONE) & 923!$OMP SHARED (matrix, old_row_p, old_col_i, old_blk_p,& 924!$OMP start_offset, sort_data) 925 CALL dbcsr_merge_all(matrix, & 926 old_row_p, old_col_i, old_blk_p, & 927 sort_data=sort_data) 928!$OMP END PARALLEL 929 ELSE 930 CALL dbcsr_merge_all(matrix, & 931 old_row_p, old_col_i, old_blk_p, & 932 sort_data=sort_data) 933 ENDIF 934 ENDIF 935!$OMP BARRIER 936!$OMP MASTER 937 ! Clean up. 938 IF (ASSOCIATED(matrix%wms)) THEN 939 CALL dbcsr_work_destroy_all(matrix) 940 ENDIF 941 matrix%valid = .TRUE. 942!$OMP END MASTER 943!$OMP BARRIER 944 IF (dbg) THEN 945!$OMP SINGLE 946 CALL dbcsr_verify_matrix(matrix) 947!$OMP END SINGLE 948 ENDIF 949!$OMP MASTER 950 IF (fake_row_p) THEN 951 DEALLOCATE (empty_row_p) 952 ENDIF 953!$OMP END MASTER 954!$OMP BARRIER 955 CALL timestop(handle) 956 END SUBROUTINE dbcsr_finalize 957 958 SUBROUTINE dbcsr_special_finalize(matrix, reshuffle) 959 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 960 LOGICAL, INTENT(IN), OPTIONAL :: reshuffle 961 962 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_special_finalize' 963 LOGICAL, PARAMETER :: dbg = .FALSE. 964 965 INTEGER :: handle 966 LOGICAL :: can_quick, sort_data 967 968! --------------------------------------------------------------------------- 969 970 CALL timeset(routineN, handle) 971 972 IF (matrix%nblks .NE. 0) & 973 DBCSR_ABORT("Optimized finalize requires empty matrix.") 974 IF (dbcsr_valid_index(matrix)) & 975 DBCSR_ABORT("Optimized finalize requires invalid matrix.") 976 IF (.NOT. ASSOCIATED(matrix%wms)) & 977 DBCSR_ABORT("Optimized finalize requires work matrices exist.") 978 IF (SIZE(matrix%wms) .NE. 1) & 979 DBCSR_ABORT("Optimized finalize requires single work matrix") 980 981 ! 982 ! If possible, data copying is avoided. 983 IF (PRESENT(reshuffle)) THEN 984 sort_data = reshuffle 985 ELSE 986 sort_data = .FALSE. 987 ENDIF 988!$OMP BARRIER 989 can_quick = can_quickly_finalize(matrix) 990!$OMP BARRIER 991 992 ! If the matrix, work matrices, and environment fit several 993 ! criteria, then a quick O(1) finalization is performed. 994 IF (can_quick .AND. .NOT. sort_data) THEN 995 CALL quick_finalize(matrix) 996 ELSE 997 IF (.NOT. sort_data) & 998 DBCSR_ABORT("merge_single_wm only supports data sorting") 999 ! 1000 ! Ensure needed index arrays at least exist. 1001!$OMP MASTER 1002 ! 1003 IF (.NOT. ASSOCIATED(matrix%wms(1)%row_i)) THEN 1004 CALL ensure_array_size(matrix%wms(1)%row_i, ub=0) 1005 ENDIF 1006 IF (.NOT. ASSOCIATED(matrix%wms(1)%col_i)) THEN 1007 CALL ensure_array_size(matrix%wms(1)%col_i, ub=0) 1008 ENDIF 1009 IF (.NOT. ASSOCIATED(matrix%wms(1)%blk_p)) THEN 1010 CALL ensure_array_size(matrix%wms(1)%blk_p, ub=0) 1011 ENDIF 1012 ! 1013!$OMP END MASTER 1014!$OMP BARRIER 1015 ! 1016!$OMP PARALLEL DEFAULT (NONE), SHARED(matrix) 1017 CALL dbcsr_merge_single_wm(matrix) 1018!$OMP END PARALLEL 1019 1020 ENDIF 1021 1022!$OMP MASTER 1023 ! Clean up. 1024 IF (ASSOCIATED(matrix%wms)) THEN 1025 CALL dbcsr_work_destroy_all(matrix) 1026 ENDIF 1027 matrix%valid = .TRUE. 1028!$OMP END MASTER 1029!$OMP BARRIER 1030 IF (dbg) THEN 1031!$OMP SINGLE 1032 CALL dbcsr_verify_matrix(matrix) 1033!$OMP END SINGLE 1034 ENDIF 1035!$OMP BARRIER 1036 CALL timestop(handle) 1037 END SUBROUTINE dbcsr_special_finalize 1038 1039 FUNCTION can_quickly_finalize(matrix) RESULT(quick) 1040 !! Checks whether the matrix can be finalized with minimal copying. 1041 1042 TYPE(dbcsr_type), INTENT(IN) :: matrix 1043 !! matrix to check 1044 LOGICAL :: quick 1045 !! whether matrix can be quickly finalized 1046 1047! --------------------------------------------------------------------------- 1048 1049 IF (ASSOCIATED(matrix%wms)) THEN 1050 quick = matrix%nblks .EQ. 0 1051 quick = quick .AND. SIZE(matrix%wms) .EQ. 1 .AND. & 1052 .NOT. dbcsr_wm_use_mutable(matrix%wms(1)) 1053 IF (quick) THEN 1054 quick = quick .AND. & 1055 dbcsr_memtype_equal( & 1056 dbcsr_data_get_memory_type(matrix%wms(1)%data_area), & 1057 dbcsr_data_get_memory_type(matrix%data_area)) 1058 quick = quick .AND. & 1059 ASSOCIATED(matrix%wms(1)%row_i) 1060 quick = quick .AND. & 1061 (matrix%wms(1)%datasize_after_filtering .LT. 0 .OR. & 1062 matrix%wms(1)%datasize .EQ. matrix%wms(1)%datasize_after_filtering) 1063 ENDIF 1064 ELSE 1065 quick = .FALSE. 1066 ENDIF 1067 END FUNCTION can_quickly_finalize 1068 1069 SUBROUTINE quick_finalize(matrix) 1070 !! Performs quick finalization of matrix 1071 !! The data area from the work matrix is accepted as the new matrix's data 1072 !! area and the index is built from the work matrix. 1073 1074 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1075 !! matrix to finalize 1076 1077 CHARACTER(len=*), PARAMETER :: routineN = 'quick_finalize' 1078 INTEGER :: error_handle, nblks, nrows 1079 1080! --------------------------------------------------------------------------- 1081 1082 CALL timeset(routineN, error_handle) 1083!$OMP SECTIONS 1084!$OMP SECTION 1085 nblks = matrix%wms(1)%lastblk 1086 nrows = matrix%nblkrows_total 1087 CALL dbcsr_sort_indices(nblks, & 1088 matrix%wms(1)%row_i, & 1089 matrix%wms(1)%col_i, & 1090 matrix%wms(1)%blk_p) 1091 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) 1092 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) 1093 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) 1094 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 1095 reservation=nrows + 1, extra=2*nblks) 1096 CALL dbcsr_make_dbcsr_index(matrix%row_p, matrix%wms(1)%row_i, & 1097 nrows, nblks) 1098 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 1099 DATA=matrix%wms(1)%col_i(1:nblks)) 1100 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1101 DATA=matrix%wms(1)%blk_p(1:nblks)) 1102 matrix%nblks = nblks 1103 matrix%nze = matrix%wms(1)%datasize 1104 matrix%index(dbcsr_slot_nblks) = nblks 1105 matrix%index(dbcsr_slot_nze) = matrix%wms(1)%datasize 1106 CALL dbcsr_repoint_index(matrix) 1107!$OMP SECTION 1108 CALL dbcsr_switch_data_area(matrix, matrix%wms(1)%data_area) 1109!$OMP END SECTIONS 1110 CALL timestop(error_handle) 1111 END SUBROUTINE quick_finalize 1112 1113 SUBROUTINE dbcsr_add_wm_from_matrix(matrix, limits) 1114 !! Creates a work matrix from the data present in a finalized matrix. 1115 1116 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1117 !! DBCSR matrix 1118 INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL :: limits 1119 !! the limits to use for copying 1120 1121 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_add_wm_from_matrix' 1122 1123 INTEGER :: handle, ithread, nthreads, nwms, & 1124 old_nwms, size_used 1125 LOGICAL :: preexists 1126 1127! --------------------------------------------------------------------------- 1128 1129 CALL timeset(routineN, handle) 1130!$OMP BARRIER 1131 preexists = ASSOCIATED(matrix%wms) 1132 IF (preexists) THEN 1133 old_nwms = SIZE(matrix%wms) 1134 IF (old_nwms .EQ. 0) & 1135 DBCSR_WARN("Nonexisting work matrices?!") 1136 ELSE 1137 old_nwms = 0 1138 ENDIF 1139 nthreads = 1; ithread = 0 1140!$ nthreads = OMP_GET_NUM_THREADS(); ithread = OMP_GET_THREAD_NUM() 1141 IF (nthreads .GT. 1) THEN 1142 IF (old_nwms /= nthreads .AND. old_nwms /= 0) & 1143 DBCSR_ABORT("Number of work matrices and threads do not match") 1144 ENDIF 1145 nwms = MAX(1, old_nwms) 1146!$ nwms = MAX(nwms, nthreads) 1147!$OMP BARRIER 1148!$OMP MASTER 1149 IF (.NOT. ASSOCIATED(matrix%wms)) THEN 1150 CALL dbcsr_work_create(matrix, & 1151 INT(matrix%nblks*default_resize_factor/nwms), & 1152 INT(matrix%nze*default_resize_factor/nwms), & 1153 n=nwms, work_mutable=.FALSE.) 1154 ENDIF 1155!$OMP END MASTER 1156!$OMP BARRIER 1157 size_used = matrix%nze 1158 CALL dbcsr_fill_wm_from_matrix(matrix%wms, matrix, size_used, & 1159 limits=limits) 1160!$OMP BARRIER 1161 CALL timestop(handle) 1162 END SUBROUTINE dbcsr_add_wm_from_matrix 1163 1164 SUBROUTINE dbcsr_fill_wm_from_matrix(wm, matrix, size_used, limits) 1165 !! Fills index and data of the work matrix from the 1166 !! previously-finalized one. 1167 !! 1168 !! limits 1169 !! The limits is a 4-tuple 1170 !! (lower_row, higher_row, lower_column, higher_column). 1171 1172 TYPE(dbcsr_work_type), DIMENSION(:), INTENT(INOUT) :: wm 1173 !! the work matrix to fill 1174 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1175 !! DBCSR matrix 1176 INTEGER, INTENT(IN) :: size_used 1177 INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL :: limits 1178 !! only fills blocks within this range 1179 1180 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_fill_wm_from_matrix' 1181 1182 INTEGER :: blk, blk_p, col, handle, ithread, & 1183 nthreads, nwms, nze, row, wblk_p, & 1184 which_wm, wm_first, wm_last 1185 LOGICAL :: careful, limit, mt, tr 1186 LOGICAL, SAVE :: mutable 1187 TYPE(dbcsr_data_obj) :: data_block 1188 TYPE(dbcsr_iterator) :: iter 1189 1190! --------------------------------------------------------------------------- 1191 1192 CALL timeset(routineN, handle) 1193 nwms = SIZE(matrix%wms) 1194 mt = .FALSE. 1195!$ IF (nwms .GT. 1) mt = omp_get_num_threads() .GT. 1 1196 ithread = 0; nthreads = 1 1197!$ ithread = omp_get_thread_num() 1198!$ nthreads = omp_get_num_threads() 1199 limit = PRESENT(limits) 1200 careful = size_used + size_used/8 & 1201 .LT. dbcsr_data_get_size_referenced(matrix%data_area) 1202 CALL dbcsr_data_init(data_block) 1203 CALL dbcsr_data_new(data_block, dbcsr_data_get_type(matrix%data_area)) 1204 IF (mt) THEN 1205 wm_first = ithread + 1 1206 wm_last = ithread + 1 1207 ELSE 1208 wm_first = 1 1209 wm_last = nwms 1210 ENDIF 1211 ! Prepares the work matrices to accept the main data. 1212!$OMP MASTER 1213 mutable = .FALSE. 1214 DO which_wm = 1, nwms 1215 mutable = mutable .OR. dbcsr_wm_use_mutable(wm(which_wm)) 1216 ENDDO 1217!$OMP END MASTER 1218!$OMP BARRIER 1219 DO which_wm = wm_first, wm_last 1220 IF (dbcsr_wm_use_mutable(wm(which_wm))) & 1221 DBCSR_ABORT("Adding main matrix into mutable not supported.") 1222 IF (mutable) THEN 1223 IF (.NOT. dbcsr_mutable_instantiated(wm(which_wm)%mutable)) THEN 1224 CALL dbcsr_mutable_new(wm(which_wm)%mutable, matrix%data_type) 1225 ENDIF 1226 ELSE 1227 ! We don't know how much data we'll get so we have to be generous. 1228 CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & 1229 size_used/nwms) 1230 CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, 0) 1231 ENDIF 1232 ENDDO 1233 ! Now copy the data 1234 CALL dbcsr_iterator_start(iter, matrix, shared=mt, & 1235 contiguous_pointers=.FALSE., read_only=.TRUE.) 1236 DO WHILE (dbcsr_iterator_blocks_left(iter)) 1237 CALL dbcsr_iterator_next_block(iter, row, col, data_block, & 1238 transposed=tr, block_number=blk) 1239 IF (limit) THEN 1240 IF (.NOT. within_limits(row, col, limits)) CYCLE 1241 ENDIF 1242 blk_p = matrix%blk_p(blk) 1243 which_wm = ithread + 1 1244 wblk_p = SIGN(wm(which_wm)%datasize + 1, blk_p) 1245 nze = dbcsr_data_get_size(data_block) 1246 IF (mt .OR. limit .OR. careful .OR. mutable) THEN 1247 ! The data gets copied block by block so the block pointers 1248 ! are ordered accordingly. 1249 IF (.NOT. mutable) THEN 1250 CALL add_work_coordinate(wm(which_wm), row, col, wblk_p) 1251 CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & 1252 ABS(wblk_p) + nze - 1, factor=default_resize_factor) 1253 CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, & 1254 ABS(wblk_p) + nze - 1) 1255 CALL dbcsr_data_set(wm(which_wm)%data_area, & 1256 lb=ABS(wblk_p), & 1257 data_size=nze, & 1258 src=data_block, source_lb=1) 1259 ENDIF 1260 ELSE 1261 ! The data gets copied all at once so the block pointers 1262 ! should remain the same as they were. 1263 CALL add_work_coordinate(wm(which_wm), row, col, blk_p) 1264 ENDIF 1265 IF (.NOT. mutable) & 1266 wm(which_wm)%datasize = wm(which_wm)%datasize + nze 1267 ENDDO 1268 CALL dbcsr_iterator_stop(iter) 1269 CALL dbcsr_data_clear_pointer(data_block) 1270 CALL dbcsr_data_release(data_block) 1271 ! Copy all blocks at once 1272 IF (.NOT. mt .AND. .NOT. limit .AND. .NOT. careful .AND. .NOT. mutable) THEN 1273 DO which_wm = 1, nwms 1274 CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, & 1275 dbcsr_data_get_size_referenced(matrix%data_area)) 1276 CALL dbcsr_data_copyall(wm(which_wm)%data_area, matrix%data_area) 1277 wm(which_wm)%datasize = size_used 1278 ENDDO 1279 ENDIF 1280 CALL timestop(handle) 1281 END SUBROUTINE dbcsr_fill_wm_from_matrix 1282 1283 PURE FUNCTION within_limits(row, column, limits) 1284 !! Checks whether a point is within bounds 1285 !! \return whether the point is within the bounds 1286 1287 INTEGER, INTENT(IN) :: row, column 1288 !! point to check 1289 !! point to check 1290 INTEGER, DIMENSION(4), INTENT(IN) :: limits 1291 !! limits (low_row, high_row, low_col, high_col) 1292 LOGICAL :: within_limits 1293 1294 within_limits = row .GE. limits(1) .AND. row .LE. limits(2) .AND. & 1295 column .GE. limits(3) .AND. column .LE. limits(4) 1296 END FUNCTION within_limits 1297 1298 SUBROUTINE dbcsr_work_destroy(wm) 1299 !! Deallocates and destroys a work matrix. 1300 1301 TYPE(dbcsr_work_type), INTENT(INOUT) :: wm 1302 !! work matrix 1303 1304 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy' 1305 1306 INTEGER :: handle 1307 1308! --------------------------------------------------------------------------- 1309 1310 IF (careful_mod) CALL timeset(routineN, handle) 1311 1312 IF (ASSOCIATED(wm%row_i)) THEN 1313 DEALLOCATE (wm%row_i) 1314 ENDIF 1315 IF (ASSOCIATED(wm%col_i)) THEN 1316 DEALLOCATE (wm%col_i) 1317 ENDIF 1318 IF (ASSOCIATED(wm%blk_p)) THEN 1319 DEALLOCATE (wm%blk_p) 1320 ENDIF 1321 CALL dbcsr_data_release(wm%data_area) 1322 CALL dbcsr_mutable_destroy(wm%mutable) 1323 1324 IF (careful_mod) CALL timestop(handle) 1325 1326 END SUBROUTINE dbcsr_work_destroy 1327 1328 SUBROUTINE dbcsr_work_destroy_all(m) 1329 !! Deallocates and destroys a work matrix. 1330 TYPE(dbcsr_type), INTENT(INOUT) :: m 1331 1332 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy_all' 1333 1334 INTEGER :: handle, i 1335 1336! --------------------------------------------------------------------------- 1337 1338 CALL timeset(routineN, handle) 1339 1340 IF (.NOT. ASSOCIATED(m%wms)) & 1341 DBCSR_WARN("Want to destroy nonexisting work matrices.") 1342 IF (ASSOCIATED(m%wms)) THEN 1343 DO i = 1, SIZE(m%wms) 1344 CALL dbcsr_work_destroy(m%wms(i)) 1345 ENDDO 1346 DEALLOCATE (m%wms) 1347 ENDIF 1348 1349 CALL timestop(handle) 1350 1351 END SUBROUTINE dbcsr_work_destroy_all 1352 1353 SUBROUTINE add_work_coordinate(matrix, row, col, blk, index) 1354 !! Adds a coordinate (or other data) into a work matrix's row_i and 1355 !! col_i arrays and returns its position. 1356 !! @note Uses the matrix%lastblk to keep track of the current position. 1357 1358 TYPE(dbcsr_work_type), INTENT(INOUT) :: matrix 1359 !! work matrix 1360 INTEGER, INTENT(IN) :: row, col 1361 !! row data to add 1362 !! col data to add 1363 INTEGER, INTENT(IN), OPTIONAL :: blk 1364 !! block pointer to add 1365 INTEGER, INTENT(OUT), OPTIONAL :: index 1366 !! saved position 1367 1368 CHARACTER(len=*), PARAMETER :: routineN = 'add_work_coordinate' 1369 1370 INTEGER :: handle 1371 1372! --------------------------------------------------------------------------- 1373 1374 IF (careful_mod) CALL timeset(routineN, handle) 1375 matrix%lastblk = matrix%lastblk + 1 1376 CALL ensure_array_size(matrix%row_i, ub=matrix%lastblk, & 1377 factor=default_resize_factor) 1378 CALL ensure_array_size(matrix%col_i, ub=matrix%lastblk, & 1379 factor=default_resize_factor) 1380 matrix%row_i(matrix%lastblk) = row 1381 matrix%col_i(matrix%lastblk) = col 1382 IF (PRESENT(blk)) THEN 1383 CALL ensure_array_size(matrix%blk_p, ub=matrix%lastblk, & 1384 factor=default_resize_factor) 1385 matrix%blk_p(matrix%lastblk) = blk 1386 ENDIF 1387 IF (PRESENT(index)) index = matrix%lastblk 1388 IF (careful_mod) CALL timestop(handle) 1389 END SUBROUTINE add_work_coordinate 1390 1391 SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, & 1392 sort_data) 1393 !! Merge data from matrix and work matrices into the final matrix. 1394 1395 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1396 !! matrix to work on 1397 INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p 1398 LOGICAL, INTENT(IN) :: sort_data 1399 !! whether data will be fully sorted 1400 1401 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all' 1402 LOGICAL, PARAMETER :: dbg = .FALSE. 1403 1404 INTEGER :: handle, my_row_count, nblks, & 1405 nrows, nwms, row, t, & 1406 which_wm, wm_datasize 1407 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: all_data_offsets, all_data_sizes, & 1408 new_blk_p_sorted, new_blk_sizes, & 1409 new_row_p 1410 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d, new_blk_p, new_col_i 1411 INTEGER, DIMENSION(:), CONTIGUOUS, POINTER :: my_row_p 1412 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs 1413 INTEGER, SAVE :: max_row_count, new_nblks = 0, new_nze = 0 1414 TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), & 1415 SAVE :: all_data_areas 1416 TYPE(dbcsr_work_type), POINTER :: wm 1417 TYPE(i_array_p), DIMENSION(:), POINTER, SAVE :: all_blk_p, all_col_i, all_row_p 1418 1419! --------------------------------------------------------------------------- 1420 1421 CALL timeset(routineN, handle) 1422 1423 ! Outline: 1424 ! Each thread has a work matrix. These must be merged and made 1425 ! into a new index. If sort_data is False, then the data areas 1426 ! are simply appended. This is probably quicker but the data 1427 ! blocks are not in order and the data size may expand beyond 1428 ! what is needed. If sort_data is True, then data blocks are 1429 ! sorted in order. 1430 1431 IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data 1432!$OMP BARRIER 1433 nrows = matrix%nblkrows_total 1434 nwms = SIZE(matrix%wms) 1435!$ IF (nwms /= OMP_GET_NUM_THREADS()) & 1436!$ DBCSR_ABORT("Number of threads does not match number of work matrices.") 1437 which_wm = 1 1438!$ which_wm = OMP_GET_THREAD_NUM() + 1 1439 wm => matrix%wms(which_wm) 1440 ! 1441 ! Currently B-tree-based work matrices are converted to array form. 1442 IF (dbcsr_wm_use_mutable(wm)) THEN 1443 SELECT CASE (wm%mutable%m%data_type) 1444 CASE (dbcsr_type_real_4) 1445 CALL tree_to_linear_s(wm) 1446 CASE (dbcsr_type_real_8) 1447 CALL tree_to_linear_d(wm) 1448 CASE (dbcsr_type_complex_4) 1449 CALL tree_to_linear_c(wm) 1450 CASE (dbcsr_type_complex_8) 1451 CALL tree_to_linear_z(wm) 1452 CASE default 1453 DBCSR_ABORT("Invalid data type") 1454 END SELECT 1455 ENDIF 1456 ! 1457 ! Initializations and some counts from the threads are summed. 1458 ! 1459!$OMP MASTER 1460 new_nblks = old_row_p(nrows + 1) 1461 new_nze = matrix%nze 1462 ALLOCATE (all_row_p(nwms)) 1463 ALLOCATE (all_col_i(nwms)) 1464 ALLOCATE (all_blk_p(nwms)) 1465 ALLOCATE (all_data_sizes(0:nwms)) 1466 ALLOCATE (all_data_offsets(nwms)) 1467 IF (sort_data) ALLOCATE (all_data_areas(0:nwms)) 1468 IF (sort_data) THEN 1469 CALL dbcsr_data_init(all_data_areas(0)) 1470 all_data_areas(0) = matrix%data_area 1471!$OMP CRITICAL (crit_data) 1472 CALL dbcsr_data_hold(all_data_areas(0)) 1473!$OMP END CRITICAL (crit_data) 1474 ENDIF 1475 ! The following is valid because the data actually referenced 1476 ! by blocks is explicitly calculated in dbcsr_finalize. 1477 all_data_sizes(0) = matrix%nze 1478 rbs => array_data(matrix%row_blk_size) 1479 cbs => array_data(matrix%col_blk_size) 1480!$OMP END MASTER 1481 ! 1482!$OMP BARRIER 1483 IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN 1484 wm_datasize = wm%datasize_after_filtering 1485 ELSE 1486 wm_datasize = wm%datasize 1487 ENDIF 1488 nblks = wm%lastblk 1489!$OMP ATOMIC 1490 new_nblks = new_nblks + nblks 1491!$OMP ATOMIC 1492 new_nze = new_nze + wm_datasize 1493!$OMP BARRIER 1494 ! 1495!$OMP MASTER 1496 ALLOCATE (new_row_p(nrows + 1)) 1497 ALLOCATE (new_col_i(new_nblks)) 1498 ALLOCATE (new_blk_p(new_nblks)) 1499 IF (sort_data) THEN 1500 ALLOCATE (blk_d(new_nblks)) 1501 ELSE 1502 ALLOCATE (blk_d(1)) 1503 ENDIF 1504!$OMP END MASTER 1505 ! 1506 ! Each thread creates a row_p index for its new blocks. This gives the 1507 ! number of new blocks per thread per row. 1508 CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) 1509 ALLOCATE (my_row_p(nrows + 1)) 1510 CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks) 1511 ! 1512 ! The threads must share their row_p, col_i, blk_p, and data areas 1513 ! among themselves. 1514 all_row_p(which_wm)%p => my_row_p 1515 all_data_sizes(which_wm) = wm_datasize 1516 IF (.NOT. sort_data) THEN 1517 IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) & 1518 DBCSR_ABORT("Data size mismatch.") 1519 ENDIF 1520 all_col_i(which_wm)%p => wm%col_i 1521 all_blk_p(which_wm)%p => wm%blk_p 1522!$OMP BARRIER 1523 IF (sort_data) THEN 1524!$OMP MASTER 1525 all_data_offsets(:) = 0 1526!$OMP END MASTER 1527 CALL dbcsr_data_init(all_data_areas(which_wm)) 1528 all_data_areas(which_wm) = wm%data_area 1529!$OMP CRITICAL (crit_data) 1530 CALL dbcsr_data_hold(all_data_areas(which_wm)) 1531!$OMP END CRITICAL (crit_data) 1532 ELSE 1533!$OMP MASTER 1534 all_data_offsets(1) = all_data_sizes(0) 1535 DO t = 2, nwms 1536 all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1) 1537 ENDDO 1538!$OMP END MASTER 1539 ENDIF 1540 ! 1541 ! The new row counts are created, then the new row_p index is created. 1542 ! 1543!$OMP DO 1544 DO row = 1, nrows 1545 my_row_count = old_row_p(row + 1) - old_row_p(row) 1546 DO t = 1, nwms 1547 my_row_count = my_row_count & 1548 & + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row) 1549 ENDDO 1550 new_row_p(row) = my_row_count 1551 ENDDO 1552!$OMP END DO 1553!$OMP MASTER 1554 max_row_count = MAXVAL(new_row_p(1:nrows)) 1555 CALL dbcsr_build_row_index(new_row_p, nrows) 1556!$OMP END MASTER 1557!$OMP BARRIER 1558 ! 1559 ! The new index is built 1560 CALL merge_index(new_row_p, new_col_i, new_blk_p, & 1561 blk_d, & 1562 old_row_p, old_col_i, old_blk_p, & 1563 all_row_p, all_col_i, all_blk_p, & 1564 all_data_offsets, nwms, nrows, max_row_count, sort_data) 1565 ! 1566 ! The data is then merged in one of two ways. 1567 ! 1568!$OMP MASTER 1569 IF (sort_data) THEN 1570 ! The matrix gets a fresh data area. Blocks from the work 1571 ! matrices will be copied into it in order. 1572 ! 1573!$OMP CRITICAL (crit_data) 1574 CALL dbcsr_data_release(matrix%data_area) 1575 CALL dbcsr_data_init(matrix%data_area) 1576!$OMP END CRITICAL (crit_data) 1577 CALL dbcsr_data_new(matrix%data_area, & 1578 data_size=new_nze, & 1579 data_type=dbcsr_data_get_type(all_data_areas(0)), & 1580 memory_type=dbcsr_data_get_memory_type(all_data_areas(0))) 1581 ALLOCATE (new_blk_p_sorted(new_nblks)) 1582 ALLOCATE (new_blk_sizes(new_nblks)) 1583 ELSE 1584 ! Data stored in the work matrices will be just appended to the 1585 ! current data area. 1586 CALL dbcsr_data_ensure_size(matrix%data_area, new_nze) 1587 ENDIF 1588!$OMP END MASTER 1589!$OMP BARRIER 1590 IF (sort_data) THEN 1591 CALL dbcsr_calc_block_sizes(new_blk_sizes, & 1592 new_row_p, new_col_i, rbs, cbs) 1593 CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, & 1594 new_blk_sizes, dsts=matrix%data_area, & 1595 src=all_data_areas(0), & 1596 srcs=all_data_areas, old_blk_d=blk_d) 1597 ELSE 1598 IF (all_data_sizes(which_wm) .GT. 0) THEN 1599 CALL dbcsr_data_copy(dst=matrix%data_area, & 1600 dst_lb=(/all_data_offsets(which_wm) + 1/), & 1601 dst_sizes=(/all_data_sizes(which_wm)/), & 1602 src=wm%data_area, & 1603 src_lb=(/1/), & 1604 src_sizes=(/all_data_sizes(which_wm)/)) 1605 ENDIF 1606 ENDIF 1607 ! 1608 ! Creates a new index array. 1609 ! 1610!$OMP BARRIER 1611!$OMP MASTER 1612 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) 1613 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) 1614 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) 1615 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 1616 DATA=new_row_p(1:nrows + 1), extra=new_nblks*2) 1617 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 1618 DATA=new_col_i(1:new_nblks)) 1619 IF (sort_data) THEN 1620 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1621 DATA=new_blk_p_sorted) 1622 ELSE 1623 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1624 DATA=new_blk_p) 1625 ENDIF 1626 matrix%nblks = new_nblks 1627 matrix%nze = new_nze 1628 matrix%index(dbcsr_slot_nblks) = matrix%nblks 1629 matrix%index(dbcsr_slot_nze) = matrix%nze 1630 CALL dbcsr_repoint_index(matrix) 1631!$OMP END MASTER 1632 ! 1633!$OMP BARRIER 1634 ! 1635 DEALLOCATE (my_row_p) 1636 IF (sort_data) THEN 1637!$OMP MASTER 1638!$OMP CRITICAL (crit_data) 1639 CALL dbcsr_data_release(all_data_areas(0)) 1640!$OMP END CRITICAL (crit_data) 1641 DO which_wm = 1, nwms 1642!$OMP CRITICAL (crit_data) 1643 CALL dbcsr_data_release(all_data_areas(which_wm)) 1644!$OMP END CRITICAL (crit_data) 1645 ENDDO 1646!$OMP END MASTER 1647 ENDIF 1648!$OMP BARRIER 1649!$OMP MASTER 1650 DEALLOCATE (all_row_p) 1651 DEALLOCATE (all_col_i) 1652 DEALLOCATE (all_blk_p) 1653 DEALLOCATE (new_row_p) 1654 DEALLOCATE (new_col_i) 1655 DEALLOCATE (new_blk_p) 1656 DEALLOCATE (all_data_sizes) 1657 DEALLOCATE (all_data_offsets) 1658 IF (sort_data) THEN 1659 DEALLOCATE (all_data_areas) 1660 DEALLOCATE (new_blk_sizes) 1661 DEALLOCATE (new_blk_p_sorted) 1662 ENDIF 1663 DEALLOCATE (blk_d) 1664!$OMP END MASTER 1665!$OMP BARRIER 1666 IF (dbg) WRITE (*, *) routineN//" stopped" 1667 CALL timestop(handle) 1668 END SUBROUTINE dbcsr_merge_all 1669 1670 SUBROUTINE dbcsr_merge_single_wm(matrix) 1671 !! Sort data from the WM into the final matrix, based closely on dbcsr_merge_all 1672 1673 TYPE(dbcsr_type), INTENT(INOUT) :: matrix 1674 !! matrix to work on 1675 1676 CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_single_wm' 1677 LOGICAL, PARAMETER :: dbg = .FALSE. 1678 1679 INTEGER :: handle, nblks, nrows 1680 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE :: new_blk_p_sorted, new_blk_sizes, & 1681 new_row_p 1682 INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET :: blk_d 1683 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE :: cbs, rbs 1684 TYPE(dbcsr_work_type), POINTER :: wm 1685 1686! --------------------------------------------------------------------------- 1687 1688 CALL timeset(routineN, handle) 1689 1690 ! Outline: 1691 ! There is a single work matrix. Data blocks are sorted and copied 1692 ! into the matrix data area (which is empty). The index is made consistent 1693 1694 nrows = matrix%nblkrows_total 1695 wm => matrix%wms(1) 1696 nblks = wm%lastblk 1697 IF (dbcsr_wm_use_mutable(wm)) & 1698 DBCSR_ABORT("Number of threads does not match number of work matrices.") 1699 ! 1700 ! The following is valid because the data actually referenced 1701 ! by blocks is explicitly calculated in dbcsr_finalize. 1702!$OMP MASTER 1703 rbs => array_data(matrix%row_blk_size) 1704 cbs => array_data(matrix%col_blk_size) 1705 ! 1706 ! Initializations 1707 ! 1708 ALLOCATE (new_row_p(nrows + 1)) 1709 ALLOCATE (blk_d(nblks)) 1710 ALLOCATE (new_blk_p_sorted(nblks)) 1711 ALLOCATE (new_blk_sizes(nblks)) 1712 ! 1713 ! Master thread creates a row_p index for the (sorted) blocks. 1714 CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p) 1715 CALL dbcsr_make_dbcsr_index(new_row_p, wm%row_i, nrows, nblks) 1716 ! 1717 ! 1718 ! The matrix data area is resized. Blocks from the work 1719 ! matrices will be copied into it in order. 1720 ! 1721 CALL dbcsr_data_ensure_size(matrix%data_area, wm%datasize, nocopy=.TRUE.) 1722!$OMP END MASTER 1723!$OMP BARRIER 1724 CALL dbcsr_calc_block_sizes(new_blk_sizes, & 1725 new_row_p, wm%col_i, rbs, cbs) 1726 CALL dbcsr_sort_data(new_blk_p_sorted, wm%blk_p, & 1727 new_blk_sizes, dsts=matrix%data_area, & 1728 src=wm%data_area) 1729 ! 1730 ! Creates a new index array. 1731 ! 1732!$OMP BARRIER 1733!$OMP MASTER 1734 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p) 1735 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i) 1736 CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p) 1737 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, & 1738 DATA=new_row_p(1:nrows + 1), extra=nblks*2) 1739 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, & 1740 DATA=wm%col_i(1:nblks)) 1741 CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, & 1742 DATA=new_blk_p_sorted) 1743 matrix%nblks = nblks 1744 matrix%nze = wm%datasize 1745 matrix%index(dbcsr_slot_nblks) = matrix%nblks 1746 matrix%index(dbcsr_slot_nze) = matrix%nze 1747 CALL dbcsr_repoint_index(matrix) 1748 DEALLOCATE (new_row_p) 1749 DEALLOCATE (new_blk_sizes) 1750 DEALLOCATE (new_blk_p_sorted) 1751 DEALLOCATE (blk_d) 1752!$OMP END MASTER 1753 IF (dbg) WRITE (*, *) routineN//" stopped" 1754 CALL timestop(handle) 1755 END SUBROUTINE dbcsr_merge_single_wm 1756 1757 SUBROUTINE merge_index(new_row_p, new_col_i, new_blk_p, & 1758 !! Builds a new index from several work matrices. 1759 blk_d, old_row_p, old_col_i, old_blk_p, & 1760 all_row_p, all_col_i, all_blk_p, & 1761 all_data_offsets, nwms, nrows, max_row_count, sort_data) 1762 INTEGER, DIMENSION(*), INTENT(IN) :: new_row_p 1763 INTEGER, DIMENSION(*), INTENT(OUT), TARGET :: new_col_i, new_blk_p 1764 INTEGER, DIMENSION(*), INTENT(IN), TARGET :: blk_d 1765 INTEGER, DIMENSION(*), INTENT(IN) :: old_row_p, old_col_i, old_blk_p 1766 TYPE(i_array_p), DIMENSION(*), INTENT(IN) :: all_row_p, all_col_i, all_blk_p 1767 INTEGER, DIMENSION(*), INTENT(IN) :: all_data_offsets 1768 INTEGER, INTENT(IN) :: nwms, nrows, max_row_count 1769 LOGICAL, INTENT(IN) :: sort_data 1770 1771 CHARACTER(len=*), PARAMETER :: routineN = 'merge_index' 1772 INTEGER :: blk1, blk2, error_handle, first_row_blk, & 1773 last_row_blk, row, src_blk_1, & 1774 src_blk_2, t 1775 INTEGER, ALLOCATABLE, DIMENSION(:) :: blk_p_buff, tmp_arr 1776 INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: blk_span, col_span, d_span 1777 1778! --------------------------------------------------------------------------- 1779 1780 CALL timeset(routineN, error_handle) 1781 ! 1782 ALLOCATE (tmp_arr(max_row_count)) 1783 ALLOCATE (blk_p_buff(max_row_count)) 1784 ! 1785!$OMP DO 1786 DO row = 1, nrows 1787 first_row_blk = new_row_p(row) + 1 1788 last_row_blk = new_row_p(row + 1) 1789 col_span => new_col_i(first_row_blk:last_row_blk) 1790 blk_span => new_blk_p(first_row_blk:last_row_blk) 1791 IF (sort_data) d_span => blk_d(first_row_blk:last_row_blk) 1792 src_blk_1 = old_row_p(row) + 1 1793 src_blk_2 = old_row_p(row + 1) 1794 blk1 = 1 1795 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 1796 col_span(blk1:blk2) = old_col_i(src_blk_1:src_blk_2) 1797 blk_span(blk1:blk2) = old_blk_p(src_blk_1:src_blk_2) 1798 IF (sort_data) THEN 1799 d_span(blk1:blk2) = 1 1800 DO t = 1, nwms 1801 src_blk_1 = all_row_p(t)%p(row) + 1 1802 src_blk_2 = all_row_p(t)%p(row + 1) 1803 blk1 = blk2 + 1 1804 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 1805 col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2) 1806 blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) 1807 d_span(blk1:blk2) = t + 1 1808 ENDDO 1809 ELSE 1810 DO t = 1, nwms 1811 src_blk_1 = all_row_p(t)%p(row) + 1 1812 src_blk_2 = all_row_p(t)%p(row + 1) 1813 blk1 = blk2 + 1 1814 blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1 1815 col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2) 1816 blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) & 1817 & + all_data_offsets(t) 1818 ENDDO 1819 ENDIF 1820 CALL sort(col_span, SIZE(col_span), tmp_arr) 1821 blk_p_buff(1:SIZE(blk_span)) = blk_span(:) 1822 blk_span(1:SIZE(blk_span)) = blk_p_buff(tmp_arr(1:SIZE(blk_span))) 1823 ENDDO 1824!$OMP END DO 1825 ! 1826 DEALLOCATE (tmp_arr) 1827 DEALLOCATE (blk_p_buff) 1828 ! 1829 CALL timestop(error_handle) 1830 END SUBROUTINE merge_index 1831 1832#:include '../data/dbcsr.fypp' 1833#:for n, nametype1, base1, prec1, kind1, type1, dkind1 in inst_params_float 1834 SUBROUTINE tree_to_linear_${nametype1}$ (wm) 1835 !! Converts mutable data to linear (array) type. 1836 1837 USE dbcsr_btree, & 1838 ONLY: btree_2d_data_${nametype1}$ => btree_data_${nametype1}$p2d, & 1839 btree_destroy_${nametype1}$ => btree_delete, & 1840 btree_size_${nametype1}$ => btree_get_entries 1841 TYPE(dbcsr_work_type), INTENT(INOUT) :: wm 1842 !! work matrix to convert 1843 1844 CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_${nametype1}$' 1845 1846 INTEGER :: blk, blk_p, treesize, & 1847 error_handler, needed_size 1848 INTEGER(KIND=int_8), ALLOCATABLE, & 1849 DIMENSION(:) :: keys 1850 ${type1}$, DIMENSION(:), POINTER, CONTIGUOUS :: target_data 1851 ${type1}$, DIMENSION(:, :), POINTER :: block_2d 1852 TYPE(btree_2d_data_${nametype1}$), ALLOCATABLE, & 1853 DIMENSION(:) :: values 1854 1855! --------------------------------------------------------------------------- 1856 1857 CALL timeset(routineN, error_handler) 1858 ! srt = .TRUE. ! Not needed because of the copy 1859 treesize = btree_size_${nametype1}$ (wm%mutable%m%btree_${nametype1}$) 1860 IF (wm%lastblk .NE. treesize) & 1861 DBCSR_ABORT("Mismatch in number of blocks") 1862 ALLOCATE (keys(treesize), values(treesize)) 1863 CALL btree_destroy_${nametype1}$ (wm%mutable%m%btree_${nametype1}$, keys, values) 1864 CALL ensure_array_size(wm%row_i, ub=treesize) 1865 CALL ensure_array_size(wm%col_i, ub=treesize) 1866 CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, & 1867 wm%col_i) 1868 ! For now we also fill the data, sloooowly, but this should 1869 ! be avoided and the data should be copied directly from the 1870 ! source in the subroutine's main loop. 1871 CALL ensure_array_size(wm%blk_p, ub=treesize) 1872 needed_size = 0 1873 DO blk = 1, treesize 1874 block_2d => values(blk)%p 1875 needed_size = needed_size + SIZE(block_2d) 1876 ENDDO 1877 wm%datasize = needed_size 1878 CALL dbcsr_data_ensure_size(wm%data_area, & 1879 wm%datasize) 1880 target_data => dbcsr_get_data_p_${nametype1}$ (wm%data_area) 1881 blk_p = 1 1882 DO blk = 1, treesize 1883 block_2d => values(blk)%p 1884 IF (.NOT. values(blk)%tr) THEN 1885 wm%blk_p(blk) = blk_p 1886 ELSE 1887 wm%blk_p(blk) = -blk_p 1888 ENDIF 1889 CALL block_copy_${nametype1}$ (target_data, block_2d, & 1890 SIZE(block_2d), blk_p, 1) 1891 blk_p = blk_p + SIZE(block_2d) 1892 DEALLOCATE (block_2d) 1893 ENDDO 1894 DEALLOCATE (keys, values) 1895 CALL dbcsr_mutable_release(wm%mutable) 1896 CALL timestop(error_handler) 1897 END SUBROUTINE tree_to_linear_${nametype1}$ 1898 1899#:endfor 1900 1901END MODULE dbcsr_work_operations 1902