1!--------------------------------------------------------------------------------------------------!
2! Copyright (C) by the DBCSR developers group - All rights reserved                                !
3! This file is part of the DBCSR library.                                                          !
4!                                                                                                  !
5! For information on the license, see the LICENSE file.                                            !
6! For further information please visit https://dbcsr.cp2k.org                                      !
7! SPDX-License-Identifier: GPL-2.0+                                                                !
8!--------------------------------------------------------------------------------------------------!
9
10MODULE dbcsr_work_operations
11   !! DBCSR work matrix utilities
12
13   USE dbcsr_array_types, ONLY: array_data, &
14                                array_hold, &
15                                array_i1d_obj, &
16                                array_new, &
17                                array_nullify, &
18                                array_release, &
19                                array_size
20   USE dbcsr_btree, ONLY: btree_data_cp2d, &
21                          btree_data_dp2d, &
22                          btree_data_sp2d, &
23                          btree_data_zp2d
24   USE dbcsr_block_operations, ONLY: block_copy_c, &
25                                     block_copy_d, &
26                                     block_copy_s, &
27                                     block_copy_z, &
28                                     dbcsr_data_copy, &
29                                     dbcsr_data_set
30   USE dbcsr_config, ONLY: default_resize_factor
31   USE dbcsr_data_methods, ONLY: &
32      dbcsr_data_clear_pointer, dbcsr_data_ensure_size, dbcsr_data_get_memory_type, &
33      dbcsr_data_get_size, dbcsr_data_get_size_referenced, dbcsr_data_get_type, dbcsr_data_hold, &
34      dbcsr_data_init, dbcsr_data_new, dbcsr_data_release, dbcsr_data_set_size_referenced, &
35      dbcsr_data_valid, dbcsr_get_data_p_c, dbcsr_get_data_p_d, dbcsr_get_data_p_s, &
36      dbcsr_get_data_p_z
37   USE dbcsr_data_operations, ONLY: dbcsr_data_copyall, &
38                                    dbcsr_sort_data, &
39                                    dbcsr_switch_data_area
40   USE dbcsr_dist_methods, ONLY: dbcsr_distribution_has_threads, &
41                                 dbcsr_distribution_hold, &
42                                 dbcsr_distribution_make_threads, &
43                                 dbcsr_distribution_ncols, &
44                                 dbcsr_distribution_nrows, &
45                                 dbcsr_distribution_release
46   USE dbcsr_index_operations, ONLY: dbcsr_addto_index_array, &
47                                     dbcsr_build_row_index, &
48                                     dbcsr_clearfrom_index_array, &
49                                     dbcsr_index_prune_deleted, &
50                                     dbcsr_make_dbcsr_index, &
51                                     dbcsr_make_index_exist, &
52                                     dbcsr_repoint_index, &
53                                     dbcsr_sort_indices
54   USE dbcsr_iterator_operations, ONLY: dbcsr_iterator_blocks_left, &
55                                        dbcsr_iterator_next_block, &
56                                        dbcsr_iterator_start, &
57                                        dbcsr_iterator_stop
58   USE dbcsr_mem_methods, ONLY: dbcsr_memtype_equal
59   USE dbcsr_methods, ONLY: &
60      dbcsr_col_block_sizes, dbcsr_distribution, dbcsr_get_data_memory_type, &
61      dbcsr_get_data_size_used, dbcsr_get_data_type, dbcsr_get_index_memory_type, &
62      dbcsr_get_matrix_type, dbcsr_get_replication_type, dbcsr_matrix_counter, &
63      dbcsr_max_col_size, dbcsr_max_row_size, dbcsr_mutable_destroy, dbcsr_mutable_init, &
64      dbcsr_mutable_instantiated, dbcsr_mutable_new, dbcsr_mutable_release, dbcsr_name, &
65      dbcsr_row_block_sizes, dbcsr_use_mutable, dbcsr_valid_index, dbcsr_wm_use_mutable
66   USE dbcsr_ptr_util, ONLY: ensure_array_size
67   USE dbcsr_toollib, ONLY: dbcsr_unpack_i8_2i4, &
68                            sort
69   USE dbcsr_string_utilities, ONLY: uppercase
70   USE dbcsr_types, ONLY: &
71      dbcsr_data_obj, dbcsr_distribution_obj, dbcsr_iterator, dbcsr_memtype_default, &
72      dbcsr_memtype_type, dbcsr_meta_size, dbcsr_num_slots, dbcsr_repl_col, dbcsr_repl_full, &
73      dbcsr_repl_none, dbcsr_repl_row, dbcsr_slot_blk_p, dbcsr_slot_col_i, dbcsr_slot_home_coli, &
74      dbcsr_slot_home_pcol, dbcsr_slot_home_prow, dbcsr_slot_home_rowi, dbcsr_slot_home_vpcol, &
75      dbcsr_slot_home_vprow, dbcsr_slot_nblkcols_local, dbcsr_slot_nblkcols_total, &
76      dbcsr_slot_nblkrows_local, dbcsr_slot_nblkrows_total, dbcsr_slot_nblks, &
77      dbcsr_slot_nfullcols_local, dbcsr_slot_nfullcols_total, dbcsr_slot_nfullrows_local, &
78      dbcsr_slot_nfullrows_total, dbcsr_slot_nze, dbcsr_slot_row_p, dbcsr_slot_size, dbcsr_type, &
79      dbcsr_type_antihermitian, dbcsr_type_antisymmetric, dbcsr_type_complex_4, &
80      dbcsr_type_complex_8, dbcsr_type_hermitian, dbcsr_type_no_symmetry, dbcsr_type_real_4, &
81      dbcsr_type_real_8, dbcsr_type_real_default, dbcsr_type_symmetric, dbcsr_work_type
82   USE dbcsr_dist_util, ONLY: convert_sizes_to_offsets, &
83                              dbcsr_calc_block_sizes, &
84                              dbcsr_verify_matrix, &
85                              meta_from_dist
86   USE dbcsr_kinds, ONLY: default_string_length, &
87                          int_8, &
88                          real_4, &
89                          real_8
90#include "base/dbcsr_base_uses.f90"
91
92!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads, omp_in_parallel
93
94   IMPLICIT NONE
95   PRIVATE
96
97   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbcsr_work_operations'
98
99   LOGICAL, PARAMETER :: careful_mod = .FALSE.
100
101   PUBLIC :: dbcsr_create, dbcsr_work_create, dbcsr_finalize, dbcsr_special_finalize
102   PUBLIC :: dbcsr_add_wm_from_matrix, &
103             add_work_coordinate
104   PUBLIC :: dbcsr_work_destroy
105
106   INTERFACE dbcsr_create
107      MODULE PROCEDURE dbcsr_create_new, dbcsr_create_template
108   END INTERFACE
109
110   TYPE i_array_p
111      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: p
112   END TYPE i_array_p
113
114CONTAINS
115
116   SUBROUTINE dbcsr_create_new(matrix, name, dist, matrix_type, &
117                               row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, &
118                               nze, data_type, data_buffer, &
119                               data_memory_type, index_memory_type, &
120                               max_rbs, max_cbs, &
121                               row_blk_offset, col_blk_offset, &
122                               thread_dist, &
123                               reuse, reuse_arrays, mutable_work, make_index, replication_type)
124      !! Creates a matrix, allocating the essentials.
125      !!
126      !! The matrix itself is allocated, as well as the essential parts of
127      !! the index. When passed the nze argument, the data is also allocated
128      !! to that size.
129      !! see dbcsr_types.F
130
131      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
132         !! new matrix
133      CHARACTER(len=*), INTENT(IN)                       :: name
134      TYPE(dbcsr_distribution_obj), INTENT(IN)           :: dist
135         !! distribution_2d distribution
136      CHARACTER, INTENT(IN)                              :: matrix_type
137         !! 'N' for normal, 'T' for transposed, 'S' for symmetric, and 'A' for antisymmetric
138      INTEGER, DIMENSION(:), INTENT(IN), POINTER, &
139         CONTIGUOUS, OPTIONAL                            :: row_blk_size, col_blk_size
140      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_size_obj, col_blk_size_obj
141      INTEGER, INTENT(IN), OPTIONAL                      :: nze, data_type
142         !! number of elements
143         !! type of data from 'rRcC' for single/double precision real/complex, default is 'R'
144      TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL         :: data_buffer
145      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: data_memory_type, index_memory_type
146         !! allocate indices and data using special memory
147         !! allocate indices using special memory
148      INTEGER, INTENT(IN), OPTIONAL                      :: max_rbs, max_cbs
149      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_offset, col_blk_offset
150      TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: thread_dist
151      LOGICAL, INTENT(IN), OPTIONAL                      :: reuse, reuse_arrays, mutable_work, &
152                                                            make_index
153         !! reuses an existing matrix, default is to create a fresh one
154         !! uses the mutable data for working and not the append-only data; default is append-only
155      CHARACTER, INTENT(IN), OPTIONAL                    :: replication_type
156         !! replication to be used for this matrix; default is dbcsr_repl_none
157
158      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_new', &
159                                     routineP = moduleN//':'//routineN
160
161      CHARACTER                                          :: matrix_type_l
162      INTEGER                                            :: handle, my_nze
163      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: vec_col_blk_offset, vec_row_blk_offset
164      INTEGER, DIMENSION(dbcsr_meta_size)                :: new_meta
165      LOGICAL                                            :: hijack, my_make_index
166
167!   ---------------------------------------------------------------------------
168
169      MARK_USED(thread_dist) ! only used with OMP
170
171      CALL timeset(routineN, handle)
172
173      ! Reuse matrix only if has actually been allocated.
174      hijack = ASSOCIATED(matrix%index)
175      IF (PRESENT(reuse)) hijack = reuse
176
177      my_make_index = .TRUE.
178      IF (PRESENT(make_index)) my_make_index = make_index
179
180      IF (.NOT. hijack) THEN
181         matrix = dbcsr_type()
182         matrix%refcount = 1
183      ENDIF
184!$OMP     CRITICAL (crit_counter)
185      matrix%serial_number = dbcsr_matrix_counter
186      dbcsr_matrix_counter = dbcsr_matrix_counter + 1
187!$OMP     END CRITICAL (crit_counter)
188      ! Mark matrix index as having an invalid index.
189      matrix%valid = .FALSE.
190      matrix%name = name
191      ! Sets the type of matrix building/modifying work structures.
192      IF (PRESENT(mutable_work)) THEN
193         matrix%work_mutable = mutable_work
194      ELSE
195         matrix%work_mutable = .FALSE.
196      ENDIF
197      ! Sets the correct data type.
198      IF (PRESENT(data_type)) THEN
199         SELECT CASE (data_type)
200         CASE (dbcsr_type_real_4)
201            matrix%data_type = dbcsr_type_real_4
202         CASE (dbcsr_type_real_8)
203            matrix%data_type = dbcsr_type_real_8
204         CASE (dbcsr_type_complex_4)
205            matrix%data_type = dbcsr_type_complex_4
206         CASE (dbcsr_type_complex_8)
207            matrix%data_type = dbcsr_type_complex_8
208         CASE DEFAULT
209            DBCSR_ABORT("Invalid matrix type")
210         END SELECT
211      ELSE
212         matrix%data_type = dbcsr_type_real_default
213      ENDIF
214
215      matrix%data_memory_type = dbcsr_memtype_default
216      IF (PRESENT(data_memory_type)) &
217         matrix%data_memory_type = data_memory_type
218
219      matrix%index_memory_type = dbcsr_memtype_default
220      IF (PRESENT(index_memory_type)) &
221         matrix%index_memory_type = index_memory_type
222
223      IF (hijack) THEN
224         ! Release/deallocate elements that are replaced or not needed
225         ! by the new matrix. This is similar to what dbcsr_destroy
226         ! does, except that it keeps the index and data.
227         CALL array_release(matrix%row_blk_size)
228         CALL array_release(matrix%col_blk_size)
229         CALL array_release(matrix%row_blk_offset)
230         CALL array_release(matrix%col_blk_offset)
231         IF (matrix%has_local_rows) &
232            CALL array_release(matrix%local_rows)
233         IF (matrix%has_global_rows) &
234            CALL array_release(matrix%global_rows)
235         IF (matrix%has_local_cols) &
236            CALL array_release(matrix%local_cols)
237         IF (matrix%has_global_cols) &
238            CALL array_release(matrix%global_cols)
239         CALL dbcsr_distribution_release(matrix%dist)
240         IF (ASSOCIATED(matrix%wms)) THEN
241            CALL dbcsr_work_destroy_all(matrix)
242         ENDIF
243         CALL array_nullify(matrix%local_rows)
244         CALL array_nullify(matrix%global_rows)
245         CALL array_nullify(matrix%local_cols)
246         CALL array_nullify(matrix%global_cols)
247         !
248         IF (matrix%data_type /= matrix%data_area%d%data_type) &
249            DBCSR_ABORT("Inconsistent data type for the existing buffer.")
250         CALL dbcsr_data_set_size_referenced(matrix%data_area, 0)
251      ELSE
252         ! Invalidate index
253         NULLIFY (matrix%index)
254         ! Invalidate data
255         IF (PRESENT(data_buffer)) THEN
256            IF (.NOT. dbcsr_data_valid(data_buffer)) &
257               DBCSR_ABORT("Input data buffer not valid.")
258            IF (matrix%data_type /= data_buffer%d%data_type) &
259               DBCSR_ABORT("Input buffer data type different by matrix data type.")
260            matrix%data_memory_type = data_buffer%d%memory_type
261            matrix%data_area = data_buffer
262            CALL dbcsr_data_hold(matrix%data_area)
263         ELSE
264            CALL dbcsr_data_init(matrix%data_area)
265         ENDIF
266      ENDIF
267      ! These are always invalidated.
268      NULLIFY (matrix%row_p, matrix%col_i, matrix%blk_p, matrix%thr_c, &
269               matrix%coo_l)
270      IF (PRESENT(row_blk_size_obj)) THEN
271         matrix%row_blk_size = row_blk_size_obj
272         CALL array_hold(matrix%row_blk_size)
273      ELSEIF (PRESENT(row_blk_size)) THEN
274         CALL array_new(matrix%row_blk_size, row_blk_size, gift=reuse_arrays)
275      ELSE
276         DBCSR_ABORT("Missing row_blk_size")
277      ENDIF
278      IF (PRESENT(max_rbs)) THEN
279         matrix%max_rbs = max_rbs
280      ELSE IF (array_size(matrix%row_blk_size) .GT. 0) THEN
281         matrix%max_rbs = MAXVAL(array_data(matrix%row_blk_size))
282      ELSE
283         matrix%max_rbs = 0
284      ENDIF
285      IF (PRESENT(col_blk_size_obj)) THEN
286         matrix%col_blk_size = col_blk_size_obj
287         CALL array_hold(matrix%col_blk_size)
288      ELSEIF (PRESENT(col_blk_size)) THEN
289         CALL array_new(matrix%col_blk_size, col_blk_size, gift=reuse_arrays)
290      ELSE
291         DBCSR_ABORT("Missing col_blk_size")
292      ENDIF
293      IF (PRESENT(max_cbs)) THEN
294         matrix%max_cbs = max_cbs
295      ELSE IF (array_size(matrix%col_blk_size) .GT. 0) THEN
296         matrix%max_cbs = MAXVAL(array_data(matrix%col_blk_size))
297      ELSE
298         matrix%max_cbs = 0
299      ENDIF
300      !
301      IF (array_size(matrix%row_blk_size) /= dbcsr_distribution_nrows(dist)) &
302         DBCSR_ABORT("Number of blocked rows does match blocked row distribution.")
303      IF (array_size(matrix%col_blk_size) /= dbcsr_distribution_ncols(dist)) &
304         DBCSR_ABORT("Number of blocked columns does match blocked column distribution.")
305      ! initialize row/col offsets
306      IF (PRESENT(row_blk_offset)) THEN
307         IF (dbcsr_distribution_nrows(dist) + 1 /= array_size(row_blk_offset)) &
308            CALL dbcsr_abort(__LOCATION__, &
309                             "Number of blocked offset rows does match blocked row distribution.")
310         matrix%row_blk_offset = row_blk_offset
311         CALL array_hold(matrix%row_blk_offset)
312      ELSE
313         ALLOCATE (vec_row_blk_offset(array_size(matrix%row_blk_size) + 1))
314         CALL convert_sizes_to_offsets(array_data(matrix%row_blk_size), vec_row_blk_offset)
315         CALL array_new(matrix%row_blk_offset, vec_row_blk_offset, gift=.TRUE.)
316      ENDIF
317
318      IF (PRESENT(col_blk_offset)) THEN
319         IF (dbcsr_distribution_ncols(dist) + 1 /= array_size(col_blk_offset)) &
320            CALL dbcsr_abort(__LOCATION__, &
321                             "Number of blocked offset columns does match blocked column distribution.")
322         matrix%col_blk_offset = col_blk_offset
323         CALL array_hold(matrix%col_blk_offset)
324      ELSE
325         ALLOCATE (vec_col_blk_offset(array_size(matrix%col_blk_size) + 1))
326         CALL convert_sizes_to_offsets(array_data(matrix%col_blk_size), vec_col_blk_offset)
327         CALL array_new(matrix%col_blk_offset, vec_col_blk_offset, gift=.TRUE.)
328      ENDIF
329
330      matrix%dist = dist
331      CALL dbcsr_distribution_hold(matrix%dist)
332!$    IF (.NOT. dbcsr_distribution_has_threads(matrix%dist) .AND. PRESENT(thread_dist)) THEN
333!$       IF (dbcsr_distribution_has_threads(thread_dist)) THEN
334!$          matrix%dist%d%num_threads = thread_dist%d%num_threads
335!$          matrix%dist%d%has_thread_dist = .TRUE.
336!$          matrix%dist%d%thread_dist = thread_dist%d%thread_dist
337!$          CALL array_hold(matrix%dist%d%thread_dist)
338!$       ENDIF
339!$    ENDIF
340!$    IF (.NOT. dbcsr_distribution_has_threads(matrix%dist)) THEN
341!$       CALL dbcsr_distribution_make_threads(matrix%dist, &
342!$                                            array_data(matrix%row_blk_size))
343!$    ENDIF
344      ! Set up some data.
345      IF (my_make_index) THEN
346         CALL meta_from_dist(new_meta, dist, array_data(matrix%row_blk_size), &
347                             array_data(matrix%col_blk_size))
348         matrix%nblkrows_total = new_meta(dbcsr_slot_nblkrows_total)
349         matrix%nblkcols_total = new_meta(dbcsr_slot_nblkcols_total)
350         matrix%nfullrows_total = new_meta(dbcsr_slot_nfullrows_total)
351         matrix%nfullcols_total = new_meta(dbcsr_slot_nfullcols_total)
352         matrix%nblkrows_local = new_meta(dbcsr_slot_nblkrows_local)
353         matrix%nblkcols_local = new_meta(dbcsr_slot_nblkcols_local)
354         matrix%nfullrows_local = new_meta(dbcsr_slot_nfullrows_local)
355         matrix%nfullcols_local = new_meta(dbcsr_slot_nfullcols_local)
356      ENDIF
357      my_nze = 0; IF (PRESENT(nze)) my_nze = nze
358      matrix%nblks = 0
359      matrix%nze = 0
360
361      IF (PRESENT(replication_type)) THEN
362         IF (replication_type .NE. dbcsr_repl_none &
363             .AND. replication_type .NE. dbcsr_repl_full &
364             .AND. replication_type .NE. dbcsr_repl_row &
365             .AND. replication_type .NE. dbcsr_repl_col) &
366            DBCSR_ABORT("Invalid replication type '"//replication_type//"'")
367         IF (replication_type .EQ. dbcsr_repl_row .OR. replication_type .EQ. dbcsr_repl_col) &
368            DBCSR_WARN("Row and column replication not fully supported")
369         matrix%replication_type = replication_type
370      ELSE
371         matrix%replication_type = dbcsr_repl_none
372      ENDIF
373      !
374      ! Setup a matrix from scratch
375      IF (.NOT. hijack) THEN
376         IF (.NOT. PRESENT(data_buffer)) THEN
377            CALL dbcsr_data_new(matrix%data_area, matrix%data_type, my_nze, &
378                                memory_type=matrix%data_memory_type)
379            CALL dbcsr_data_set_size_referenced(matrix%data_area, 0)
380         ENDIF
381         !
382         IF (my_make_index) THEN
383            NULLIFY (matrix%index)
384            CALL ensure_array_size(matrix%index, lb=1, ub=dbcsr_num_slots, &
385                                   zero_pad=.TRUE., memory_type=matrix%index_memory_type)
386         ENDIF
387      ENDIF
388      IF (my_make_index) THEN
389         IF (LBOUND(matrix%index, 1) .GT. 1 &
390             .OR. UBOUND(matrix%index, 1) .LT. dbcsr_num_slots) &
391            DBCSR_ABORT("Index is not large enough")
392         matrix%index(1:dbcsr_num_slots) = 0
393         matrix%index(1:dbcsr_meta_size) = new_meta(1:dbcsr_meta_size)
394         matrix%index(dbcsr_slot_size) = dbcsr_num_slots
395      ENDIF
396      !
397      matrix%symmetry = .FALSE.
398      matrix%negate_real = .FALSE.
399      matrix%negate_imaginary = .FALSE.
400      !matrix%transpose = .FALSE.
401      matrix_type_l = matrix_type
402      CALL uppercase(matrix_type_l)
403      SELECT CASE (matrix_type_l)
404      CASE (dbcsr_type_no_symmetry)
405      CASE (dbcsr_type_symmetric)
406         matrix%symmetry = .TRUE.
407      CASE (dbcsr_type_antisymmetric)
408         matrix%symmetry = .TRUE.
409         matrix%negate_real = .TRUE.
410         matrix%negate_imaginary = .TRUE.
411      CASE (dbcsr_type_hermitian)
412         matrix%symmetry = .TRUE.
413         matrix%negate_imaginary = .TRUE.
414      CASE (dbcsr_type_antihermitian)
415         matrix%symmetry = .TRUE.
416         matrix%negate_real = .TRUE.
417      CASE DEFAULT
418         DBCSR_ABORT("Invalid matrix type.")
419      END SELECT
420      matrix%bcsc = .FALSE.
421      matrix%local_indexing = .FALSE.
422      matrix%list_indexing = .FALSE.
423      CALL array_nullify(matrix%local_rows)
424      CALL array_nullify(matrix%global_rows)
425      CALL array_nullify(matrix%local_cols)
426      CALL array_nullify(matrix%global_cols)
427      matrix%has_local_rows = .FALSE.
428      matrix%has_global_rows = .FALSE.
429      matrix%has_local_cols = .FALSE.
430      matrix%has_global_cols = .FALSE.
431      IF (my_make_index) THEN
432         CALL dbcsr_make_index_exist(matrix)
433      ENDIF
434      matrix%valid = .TRUE.
435      CALL timestop(handle)
436   END SUBROUTINE dbcsr_create_new
437
438   SUBROUTINE dbcsr_create_template(matrix, template, name, dist, matrix_type, &
439                                    row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, &
440                                    nze, data_type, &
441                                    data_buffer, data_memory_type, index_memory_type, &
442                                    max_rbs, max_cbs, &
443                                    row_blk_offset, col_blk_offset, &
444                                    reuse_arrays, mutable_work, make_index, replication_type)
445      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
446      TYPE(dbcsr_type), INTENT(IN)                       :: template
447      CHARACTER(len=*), INTENT(IN), OPTIONAL             :: name
448      TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: dist
449      CHARACTER, INTENT(IN), OPTIONAL                    :: matrix_type
450      INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL, &
451         POINTER, CONTIGUOUS                             :: row_blk_size, col_blk_size
452      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_size_obj, col_blk_size_obj
453      INTEGER, INTENT(IN), OPTIONAL                      :: nze, data_type
454      TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL         :: data_buffer
455      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: data_memory_type, index_memory_type
456      INTEGER, INTENT(IN), OPTIONAL                      :: max_rbs, max_cbs
457      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_offset, col_blk_offset
458      LOGICAL, INTENT(IN), OPTIONAL                      :: reuse_arrays, mutable_work, make_index
459      CHARACTER, INTENT(IN), OPTIONAL                    :: replication_type
460
461      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_template', &
462                                     routineP = moduleN//':'//routineN
463
464      CHARACTER                                          :: new_matrix_type, new_replication_type
465      CHARACTER(len=default_string_length)               :: new_name
466      INTEGER                                            :: new_data_type, new_max_cbs, new_max_rbs
467      LOGICAL                                            :: my_make_index, new_mutable_work
468      TYPE(array_i1d_obj)                                :: new_col_blk_offset, new_row_blk_offset
469      TYPE(dbcsr_distribution_obj)                       :: new_dist
470      TYPE(dbcsr_memtype_type)                           :: new_data_memory_type, &
471                                                            new_index_memory_type
472
473      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: blk_size
474
475!   ---------------------------------------------------------------------------
476
477      IF (PRESENT(name)) THEN
478         new_name = TRIM(name)
479      ELSE
480         new_name = TRIM(dbcsr_name(template))
481      ENDIF
482      IF (PRESENT(dist)) THEN
483         new_dist = dist
484      ELSE
485         new_dist = dbcsr_distribution(template)
486      ENDIF
487      IF (PRESENT(matrix_type)) THEN
488         new_matrix_type = matrix_type
489      ELSE
490         new_matrix_type = dbcsr_get_matrix_type(template)
491      ENDIF
492      !
493      IF ((PRESENT(row_blk_size) .NEQV. PRESENT(col_blk_size)) .OR. &
494          (PRESENT(row_blk_size_obj) .NEQV. PRESENT(col_blk_size_obj))) THEN
495         DBCSR_ABORT("Both row_blk_size and col_blk_size must be provided")
496      ENDIF
497      !
498      IF (PRESENT(max_rbs)) new_max_rbs = max_rbs
499      IF (PRESENT(row_blk_offset)) new_row_blk_offset = row_blk_offset
500      NULLIFY (blk_size)
501      IF (PRESENT(row_blk_size_obj)) THEN
502         blk_size => array_data(row_blk_size_obj)
503      ELSEIF (PRESENT(row_blk_size)) THEN
504         blk_size => row_blk_size
505      ENDIF
506      IF (ASSOCIATED(blk_size)) THEN
507         IF (.NOT. PRESENT(max_rbs)) &
508            new_max_rbs = MAXVAL(blk_size)
509      ELSE
510         IF (.NOT. PRESENT(max_rbs)) &
511            new_max_rbs = dbcsr_max_row_size(template)
512         IF (.NOT. PRESENT(row_blk_offset)) &
513            new_row_blk_offset = template%row_blk_offset
514      ENDIF
515      !
516      IF (PRESENT(max_cbs)) new_max_cbs = max_cbs
517      IF (PRESENT(col_blk_offset)) new_col_blk_offset = col_blk_offset
518      NULLIFY (blk_size)
519      IF (PRESENT(col_blk_size_obj)) THEN
520         blk_size => array_data(col_blk_size_obj)
521      ELSEIF (PRESENT(col_blk_size)) THEN
522         blk_size => col_blk_size
523      ENDIF
524      IF (ASSOCIATED(blk_size)) THEN
525         IF (.NOT. PRESENT(max_cbs)) &
526            new_max_cbs = MAXVAL(blk_size)
527      ELSE
528         IF (.NOT. PRESENT(max_cbs)) &
529            new_max_cbs = dbcsr_max_col_size(template)
530         IF (.NOT. PRESENT(col_blk_offset)) &
531            new_col_blk_offset = template%col_blk_offset
532      ENDIF
533      IF (PRESENT(data_type)) THEN
534         new_data_type = data_type
535      ELSE
536         new_data_type = dbcsr_get_data_type(template)
537      ENDIF
538      IF (PRESENT(data_memory_type)) THEN
539         new_data_memory_type = data_memory_type
540      ELSE
541         new_data_memory_type = dbcsr_get_data_memory_type(template)
542      ENDIF
543      IF (PRESENT(index_memory_type)) THEN
544         new_index_memory_type = index_memory_type
545      ELSE
546         new_index_memory_type = dbcsr_get_index_memory_type(template)
547      ENDIF
548      IF (PRESENT(replication_type)) THEN
549         new_replication_type = replication_type
550      ELSE
551         new_replication_type = dbcsr_get_replication_type(template)
552      ENDIF
553      IF (PRESENT(mutable_work)) THEN
554         new_mutable_work = mutable_work
555      ELSE
556         new_mutable_work = dbcsr_use_mutable(template)
557      ENDIF
558      IF (PRESENT(row_blk_size_obj)) THEN
559         CALL dbcsr_create(matrix, name=new_name, dist=new_dist, &
560                           matrix_type=new_matrix_type, &
561                           row_blk_size_obj=row_blk_size_obj, &
562                           col_blk_size_obj=col_blk_size_obj, &
563                           nze=nze, &
564                           data_type=new_data_type, &
565                           data_buffer=data_buffer, &
566                           data_memory_type=new_data_memory_type, &
567                           index_memory_type=new_index_memory_type, &
568                           max_rbs=new_max_rbs, max_cbs=new_max_cbs, &
569                           row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, &
570                           reuse_arrays=reuse_arrays, &
571                           mutable_work=new_mutable_work, &
572                           make_index=make_index, &
573                           replication_type=new_replication_type)
574      ELSEIF (PRESENT(row_blk_size)) THEN
575         CALL dbcsr_create(matrix, name=new_name, dist=new_dist, &
576                           matrix_type=new_matrix_type, &
577                           row_blk_size=row_blk_size, &
578                           col_blk_size=col_blk_size, &
579                           nze=nze, &
580                           data_type=new_data_type, &
581                           data_buffer=data_buffer, &
582                           data_memory_type=new_data_memory_type, &
583                           index_memory_type=new_index_memory_type, &
584                           max_rbs=new_max_rbs, max_cbs=new_max_cbs, &
585                           row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, &
586                           reuse_arrays=reuse_arrays, &
587                           mutable_work=new_mutable_work, &
588                           make_index=make_index, &
589                           replication_type=new_replication_type)
590      ELSE
591         CALL dbcsr_create(matrix, name=new_name, dist=new_dist, &
592                           matrix_type=new_matrix_type, &
593                           row_blk_size_obj=template%row_blk_size, &
594                           col_blk_size_obj=template%col_blk_size, &
595                           nze=nze, &
596                           data_type=new_data_type, &
597                           data_buffer=data_buffer, &
598                           data_memory_type=new_data_memory_type, &
599                           index_memory_type=new_index_memory_type, &
600                           max_rbs=new_max_rbs, max_cbs=new_max_cbs, &
601                           row_blk_offset=new_row_blk_offset, col_blk_offset=new_col_blk_offset, &
602                           thread_dist=dbcsr_distribution(template), &
603                           reuse_arrays=reuse_arrays, &
604                           mutable_work=new_mutable_work, &
605                           make_index=make_index, &
606                           replication_type=new_replication_type)
607      ENDIF
608      ! Copy stuff from the meta-array.  These are not normally needed,
609      ! but have to be here for creating matrices from "image" matrices.
610      my_make_index = .TRUE.
611      IF (PRESENT(make_index)) my_make_index = make_index
612      IF (my_make_index) THEN
613         matrix%index(dbcsr_slot_home_prow) = template%index(dbcsr_slot_home_prow)
614         matrix%index(dbcsr_slot_home_rowi) = template%index(dbcsr_slot_home_rowi)
615         matrix%index(dbcsr_slot_home_pcol) = template%index(dbcsr_slot_home_pcol)
616         matrix%index(dbcsr_slot_home_coli) = template%index(dbcsr_slot_home_coli)
617         matrix%index(dbcsr_slot_home_vprow) = template%index(dbcsr_slot_home_vprow)
618         matrix%index(dbcsr_slot_home_vpcol) = template%index(dbcsr_slot_home_vpcol)
619      ENDIF
620      IF (PRESENT(row_blk_size) .AND. .NOT. PRESENT(row_blk_offset)) THEN
621         CALL array_release(new_row_blk_offset)
622      ENDIF
623      IF (PRESENT(col_blk_size) .AND. .NOT. PRESENT(col_blk_offset)) THEN
624         CALL array_release(new_col_blk_offset)
625      ENDIF
626
627   END SUBROUTINE dbcsr_create_template
628
629   SUBROUTINE dbcsr_init_wm(wm, data_type, nblks_guess, sizedata_guess, memory_type)
630      !! Initializes one work matrix
631
632      TYPE(dbcsr_work_type), INTENT(OUT)                 :: wm
633         !! initialized work matrix
634      INTEGER, INTENT(IN)                                :: data_type
635      INTEGER, INTENT(IN), OPTIONAL                      :: nblks_guess, sizedata_guess
636         !! estimated number of blocks
637         !! estimated size of data
638      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: memory_type
639
640      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_init_wm', routineP = moduleN//':'//routineN
641
642      INTEGER                                            :: handle, nblks, stat
643
644!   ---------------------------------------------------------------------------
645
646      IF (careful_mod) CALL timeset(routineN, handle)
647      wm%lastblk = 0
648      wm%datasize = 0
649      ! Index
650      IF (PRESENT(nblks_guess)) THEN
651         nblks = nblks_guess
652         IF (nblks_guess < 0) &
653            DBCSR_ABORT("Can not have negative block count.")
654         ALLOCATE (wm%row_i(nblks), stat=stat)
655         IF (stat /= 0) DBCSR_ABORT("wm%row_i")
656         ALLOCATE (wm%col_i(nblks), stat=stat)
657         IF (stat /= 0) DBCSR_ABORT("wm%col_i")
658         ALLOCATE (wm%blk_p(nblks), stat=stat)
659         IF (stat /= 0) DBCSR_ABORT("wm%blk_p")
660      ELSE
661         NULLIFY (wm%row_i, wm%col_i, wm%blk_p)
662         !nblks = CEILING (REAL (matrix%nblkrows_local * matrix%nblkcols_local)&
663         !     / REAL (dbcsr_mp_numnodes (dbcsr_distribution_mp (matrix%dist))))
664      ENDIF
665      ! Data
666      CALL dbcsr_data_init(wm%data_area)
667      IF (PRESENT(sizedata_guess)) THEN
668         IF (sizedata_guess < 0) &
669            DBCSR_ABORT("Can not have negative data size.")
670         CALL dbcsr_data_new(wm%data_area, data_type, &
671                             data_size=sizedata_guess, memory_type=memory_type)
672      ELSE
673         CALL dbcsr_data_new(wm%data_area, data_type, memory_type=memory_type)
674      ENDIF
675      CALL dbcsr_mutable_init(wm%mutable)
676      IF (careful_mod) CALL timestop(handle)
677   END SUBROUTINE dbcsr_init_wm
678
679   SUBROUTINE dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, &
680                                work_mutable, memory_type)
681      !! Creates a the working matrix(es) for a DBCSR matrix.
682
683      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
684         !! new matrix
685      INTEGER, INTENT(IN), OPTIONAL                      :: nblks_guess, sizedata_guess, n
686         !! estimated number of blocks
687         !! estimated size of data
688         !! number work matrices to create, default is 1
689      LOGICAL, INTENT(in), OPTIONAL                      :: work_mutable
690         !! use mutable work type, default is what was specified in create
691      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: memory_type
692
693      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_create', &
694                                     routineP = moduleN//':'//routineN
695
696      INTEGER                                            :: handle, iw, nw, ow
697      LOGICAL                                            :: wms_new, wms_realloc
698      TYPE(dbcsr_work_type), DIMENSION(:), POINTER       :: wms
699
700!   ---------------------------------------------------------------------------
701
702      CALL timeset(routineN, handle)
703      IF (PRESENT(n)) THEN
704         nw = n
705      ELSE
706         nw = 1
707!$       IF (omp_in_parallel()) THEN
708!$          nw = omp_get_num_threads()
709!$       ELSE
710!$          nw = omp_get_max_threads()
711!$       ENDIF
712      ENDIF
713!$OMP     MASTER
714      wms_new = .NOT. ASSOCIATED(matrix%wms)
715      wms_realloc = .FALSE.
716      IF (ASSOCIATED(matrix%wms)) THEN
717         ow = SIZE(matrix%wms)
718         IF (ow .LT. nw) &
719            DBCSR_WARN("Number of work matrices less than threads.")
720         IF (ow .LT. nw) wms_realloc = .TRUE.
721      ENDIF
722      IF (PRESENT(work_mutable)) THEN
723         matrix%work_mutable = work_mutable
724      ENDIF
725      IF (wms_realloc) THEN
726         ALLOCATE (wms(nw))
727         wms(1:ow) = matrix%wms(1:ow)
728         DEALLOCATE (matrix%wms)
729         matrix%wms => wms
730         DO iw = ow + 1, nw
731            CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, &
732                               nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, &
733                               memory_type=memory_type)
734            IF (matrix%work_mutable) &
735               CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, &
736                                      dbcsr_get_data_type(matrix))
737         END DO
738      ENDIF
739      IF (wms_new) THEN
740         ALLOCATE (matrix%wms(nw))
741         DO iw = 1, nw
742            CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, &
743                               nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, &
744                               memory_type=memory_type)
745            IF (matrix%work_mutable) &
746               CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, &
747                                      dbcsr_get_data_type(matrix))
748         END DO
749      ENDIF
750      matrix%valid = .FALSE.
751!$OMP     END MASTER
752      CALL timestop(handle)
753   END SUBROUTINE dbcsr_work_create
754
755   SUBROUTINE dbcsr_finalize(matrix, reshuffle)
756      !! Creates the final dbcsr_type matrix from the working matrix.
757      !! Work matrices (array or tree-based) are merged into the base DBCSR matrix.
758      !! If a matrix is marked as having a valid index, then nothing is done.
759      !! Deleted blocks are pruned from the index.
760
761      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
762         !! final matrix
763      LOGICAL, INTENT(IN), OPTIONAL                      :: reshuffle
764         !! whether the data should be reshuffled, default is false
765
766      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_finalize', routineP = moduleN//':'//routineN
767      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
768
769      INTEGER                                            :: handle, i, nblks, nwms, start_offset
770      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: empty_row_p
771      INTEGER, DIMENSION(:), POINTER, SAVE               :: old_blk_p, old_col_i, old_row_p
772      LOGICAL                                            :: can_quick, fake_row_p, sort_data, spawn
773
774!   ---------------------------------------------------------------------------
775
776      CALL timeset(routineN, handle)
777
778      NULLIFY (old_blk_p, old_col_i, old_row_p)
779
780!$OMP     BARRIER
781      ! If the matrix is not marked as dirty then skip the work.
782      IF (dbcsr_valid_index(matrix)) THEN
783         !"No need to finalize a valid matrix, skipping."
784         !
785         ! A matrix with a valid index should not have associated work
786         ! arrays.  This may happen when this routine is called on a
787         ! matrix that was not changed.
788!$OMP        BARRIER
789!$OMP        MASTER
790         IF (ASSOCIATED(matrix%wms)) &
791            CALL dbcsr_work_destroy_all(matrix)
792         matrix%valid = .TRUE.
793!$OMP        END MASTER
794!$OMP        BARRIER
795         CALL timestop(handle)
796         RETURN
797      ENDIF
798      !
799      ! If possible, data copying is avoided.
800      IF (PRESENT(reshuffle)) THEN
801         sort_data = reshuffle
802      ELSE
803         sort_data = .FALSE.
804      ENDIF
805      !
806      ! Now make sure that a valid row_p exists. Also clear the row_p if
807      ! the matrix is declared to have 0 blocks.
808!$OMP     MASTER
809      fake_row_p = .NOT. ASSOCIATED(matrix%row_p)
810      IF (ASSOCIATED(matrix%row_p)) THEN
811         fake_row_p = SIZE(matrix%row_p) .LE. 1
812      ENDIF
813      fake_row_p = fake_row_p .OR. matrix%nblks .EQ. 0
814!$OMP     END MASTER
815      !
816      ! See where data will be appended in the main data
817      ! area. Alternatively, set to the start if the matrix is declared
818      ! to have no data. (This value is ignored if reshuffle is true
819      ! because the main data area is always new.)
820      start_offset = matrix%nze
821      i = dbcsr_get_data_size_used(matrix)
822!$OMP     MASTER
823      matrix%nze = 0
824!$OMP     END MASTER
825!$OMP     BARRIER
826!$OMP     ATOMIC
827      matrix%nze = matrix%nze + i
828!$OMP     BARRIER
829      IF (dbg) THEN
830         WRITE (*, *) routineN//" sizes", matrix%nze, i, &
831            dbcsr_data_get_size_referenced(matrix%data_area), &
832            dbcsr_data_get_size(matrix%data_area)
833      ENDIF
834      IF (.FALSE. .AND. dbcsr_data_get_size_referenced(matrix%data_area) .NE. &
835          matrix%nze) THEN
836         IF (matrix%nze .NE. dbcsr_data_get_size_referenced(matrix%data_area)) &
837            DBCSR_WARN("Should reshuffle.")
838         IF (ASSOCIATED(matrix%wms)) THEN
839            sort_data = .NOT. dbcsr_wm_use_mutable(matrix%wms(1))
840         ENDIF
841      ENDIF
842      IF (sort_data .AND. matrix%nze .GT. 0) THEN
843         CALL dbcsr_add_wm_from_matrix(matrix)
844         matrix%nze = 0
845!$OMP        MASTER
846         fake_row_p = .TRUE.
847!$OMP        END MASTER
848      ENDIF
849      start_offset = dbcsr_data_get_size_referenced(matrix%data_area) + 1
850      IF (matrix%nze .EQ. 0) start_offset = 1
851!$OMP     MASTER
852      matrix%index(dbcsr_slot_nze) = matrix%nze
853      IF (fake_row_p) THEN
854         ALLOCATE (empty_row_p(matrix%nblkrows_total + 1))
855         empty_row_p(:) = 0
856         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
857                                      DATA=empty_row_p, extra=0)
858         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
859                                      reservation=0)
860         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
861                                      reservation=0)
862         CALL dbcsr_repoint_index(matrix)
863      ENDIF
864!$OMP     END MASTER
865      !
866!$OMP     BARRIER
867      can_quick = can_quickly_finalize(matrix)
868!$OMP     BARRIER
869      ! If the matrix, work matrices, and environment fit several
870      ! criteria, then a quick O(1) finalization is performed.
871      IF (can_quick .AND. .NOT. sort_data) THEN
872         CALL quick_finalize(matrix)
873      ELSE
874         !
875!$OMP        MASTER
876         !
877         ! Create work matrices if not yet existing
878         IF (.NOT. ASSOCIATED(matrix%wms)) THEN
879            nwms = 1
880!$          nwms = omp_get_num_threads()
881            CALL dbcsr_work_create(matrix, n=nwms)
882         ENDIF
883!$OMP        END MASTER
884!$OMP        BARRIER
885         !
886         ! Ensure index arrays at least exist.
887!$OMP        DO SCHEDULE (STATIC, 1)
888         DO i = 1, SIZE(matrix%wms)
889            IF (.NOT. ASSOCIATED(matrix%wms(i)%row_i)) THEN
890               CALL ensure_array_size(matrix%wms(i)%row_i, ub=0)
891            ENDIF
892            IF (.NOT. ASSOCIATED(matrix%wms(i)%col_i)) THEN
893               CALL ensure_array_size(matrix%wms(i)%col_i, ub=0)
894            ENDIF
895            IF (.NOT. ASSOCIATED(matrix%wms(i)%blk_p)) THEN
896               CALL ensure_array_size(matrix%wms(i)%blk_p, ub=0)
897            ENDIF
898         ENDDO
899!$OMP        ENDDO
900         !
901         ! Check for deleted blocks
902!$OMP        MASTER
903         nblks = matrix%row_p(matrix%nblkrows_total + 1)
904         IF (ANY(matrix%blk_p(1:nblks) .EQ. 0)) THEN
905            CALL dbcsr_index_prune_deleted(matrix)
906         ENDIF
907         old_row_p => matrix%row_p
908         old_col_i => matrix%col_i
909         old_blk_p => matrix%blk_p
910!$OMP        END MASTER
911         !
912!$OMP        BARRIER
913         ! Check to see if we will need to create a parallel environment
914         ! (needed when there are multiple work matrices but we are not
915         ! in an OpenMP parallel section.)
916         !
917         ! A parallel section is created is used when the matrix has
918         ! more work matrices. It's a shortcut when the finalize is
919         ! called from a non-parallel environment whereas the matrix was
920         ! built/modified in a parallel environment
921         nwms = SIZE(matrix%wms)
922         spawn = .FALSE.
923!$       IF (.NOT. OMP_IN_PARALLEL()) THEN
924!$          IF (nwms .GT. 1) spawn = .TRUE.
925!$       ENDIF
926         IF (spawn) THEN
927!$OMP           PARALLEL IF (spawn) &
928!$OMP                    DEFAULT (NONE) &
929!$OMP                    SHARED (matrix, old_row_p, old_col_i, old_blk_p,&
930!$OMP                            start_offset, sort_data)
931            CALL dbcsr_merge_all(matrix, &
932                                 old_row_p, old_col_i, old_blk_p, &
933                                 sort_data=sort_data)
934!$OMP           END PARALLEL
935         ELSE
936            CALL dbcsr_merge_all(matrix, &
937                                 old_row_p, old_col_i, old_blk_p, &
938                                 sort_data=sort_data)
939         ENDIF
940      ENDIF
941!$OMP BARRIER
942!$OMP MASTER
943      ! Clean up.
944      IF (ASSOCIATED(matrix%wms)) THEN
945         CALL dbcsr_work_destroy_all(matrix)
946      ENDIF
947      matrix%valid = .TRUE.
948!$OMP END MASTER
949!$OMP BARRIER
950      IF (dbg) THEN
951!$OMP        SINGLE
952         CALL dbcsr_verify_matrix(matrix)
953!$OMP        END SINGLE
954      ENDIF
955!$OMP MASTER
956      IF (fake_row_p) THEN
957         DEALLOCATE (empty_row_p)
958      ENDIF
959!$OMP END MASTER
960!$OMP BARRIER
961      CALL timestop(handle)
962   END SUBROUTINE dbcsr_finalize
963
964   SUBROUTINE dbcsr_special_finalize(matrix, reshuffle)
965      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
966      LOGICAL, INTENT(IN), OPTIONAL                      :: reshuffle
967
968      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_special_finalize', &
969                                     routineP = moduleN//':'//routineN
970      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
971
972      INTEGER                                            :: handle
973      LOGICAL                                            :: can_quick, sort_data
974
975!   ---------------------------------------------------------------------------
976
977      CALL timeset(routineN, handle)
978
979      IF (matrix%nblks .NE. 0) &
980         DBCSR_ABORT("Optimized finalize requires empty matrix.")
981      IF (dbcsr_valid_index(matrix)) &
982         DBCSR_ABORT("Optimized finalize requires invalid matrix.")
983      IF (.NOT. ASSOCIATED(matrix%wms)) &
984         DBCSR_ABORT("Optimized finalize requires work matrices exist.")
985      IF (SIZE(matrix%wms) .NE. 1) &
986         DBCSR_ABORT("Optimized finalize requires single work matrix")
987
988      !
989      ! If possible, data copying is avoided.
990      IF (PRESENT(reshuffle)) THEN
991         sort_data = reshuffle
992      ELSE
993         sort_data = .FALSE.
994      ENDIF
995!$OMP     BARRIER
996      can_quick = can_quickly_finalize(matrix)
997!$OMP     BARRIER
998
999      ! If the matrix, work matrices, and environment fit several
1000      ! criteria, then a quick O(1) finalization is performed.
1001      IF (can_quick .AND. .NOT. sort_data) THEN
1002         CALL quick_finalize(matrix)
1003      ELSE
1004         IF (.NOT. sort_data) &
1005            DBCSR_ABORT("merge_single_wm only supports data sorting")
1006         !
1007         ! Ensure needed index arrays at least exist.
1008!$OMP        MASTER
1009         !
1010         IF (.NOT. ASSOCIATED(matrix%wms(1)%row_i)) THEN
1011            CALL ensure_array_size(matrix%wms(1)%row_i, ub=0)
1012         ENDIF
1013         IF (.NOT. ASSOCIATED(matrix%wms(1)%col_i)) THEN
1014            CALL ensure_array_size(matrix%wms(1)%col_i, ub=0)
1015         ENDIF
1016         IF (.NOT. ASSOCIATED(matrix%wms(1)%blk_p)) THEN
1017            CALL ensure_array_size(matrix%wms(1)%blk_p, ub=0)
1018         ENDIF
1019         !
1020!$OMP        END MASTER
1021!$OMP        BARRIER
1022         !
1023!$OMP        PARALLEL DEFAULT (NONE), SHARED(matrix)
1024         CALL dbcsr_merge_single_wm(matrix)
1025!$OMP        END PARALLEL
1026
1027      ENDIF
1028
1029!$OMP     MASTER
1030      ! Clean up.
1031      IF (ASSOCIATED(matrix%wms)) THEN
1032         CALL dbcsr_work_destroy_all(matrix)
1033      ENDIF
1034      matrix%valid = .TRUE.
1035!$OMP     END MASTER
1036!$OMP     BARRIER
1037      IF (dbg) THEN
1038!$OMP        SINGLE
1039         CALL dbcsr_verify_matrix(matrix)
1040!$OMP        END SINGLE
1041      ENDIF
1042!$OMP     BARRIER
1043      CALL timestop(handle)
1044   END SUBROUTINE dbcsr_special_finalize
1045
1046   FUNCTION can_quickly_finalize(matrix) RESULT(quick)
1047      !! Checks whether the matrix can be finalized with minimal copying.
1048
1049      TYPE(dbcsr_type), INTENT(IN)                       :: matrix
1050         !! matrix to check
1051      LOGICAL                                            :: quick
1052         !! whether matrix can be quickly finalized
1053
1054!   ---------------------------------------------------------------------------
1055
1056      IF (ASSOCIATED(matrix%wms)) THEN
1057         quick = matrix%nblks .EQ. 0
1058         quick = quick .AND. SIZE(matrix%wms) .EQ. 1 .AND. &
1059                 .NOT. dbcsr_wm_use_mutable(matrix%wms(1))
1060         IF (quick) THEN
1061            quick = quick .AND. &
1062                    dbcsr_memtype_equal( &
1063                    dbcsr_data_get_memory_type(matrix%wms(1)%data_area), &
1064                    dbcsr_data_get_memory_type(matrix%data_area))
1065            quick = quick .AND. &
1066                    ASSOCIATED(matrix%wms(1)%row_i)
1067            quick = quick .AND. &
1068                    (matrix%wms(1)%datasize_after_filtering .LT. 0 .OR. &
1069                     matrix%wms(1)%datasize .EQ. matrix%wms(1)%datasize_after_filtering)
1070         ENDIF
1071      ELSE
1072         quick = .FALSE.
1073      ENDIF
1074   END FUNCTION can_quickly_finalize
1075
1076   SUBROUTINE quick_finalize(matrix)
1077      !! Performs quick finalization of matrix
1078      !! The data area from the work matrix is accepted as the new matrix's data
1079      !! area and the index is built from the work matrix.
1080
1081      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1082         !! matrix to finalize
1083
1084      CHARACTER(len=*), PARAMETER :: routineN = 'quick_finalize', routineP = moduleN//':'//routineN
1085
1086      INTEGER                                            :: error_handle, nblks, nrows
1087
1088!   ---------------------------------------------------------------------------
1089
1090      CALL timeset(routineN, error_handle)
1091!$OMP     SECTIONS
1092!$OMP     SECTION
1093      nblks = matrix%wms(1)%lastblk
1094      nrows = matrix%nblkrows_total
1095      CALL dbcsr_sort_indices(nblks, &
1096                              matrix%wms(1)%row_i, &
1097                              matrix%wms(1)%col_i, &
1098                              matrix%wms(1)%blk_p)
1099      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
1100      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
1101      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
1102      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
1103                                   reservation=nrows + 1, extra=2*nblks)
1104      CALL dbcsr_make_dbcsr_index(matrix%row_p, matrix%wms(1)%row_i, &
1105                                  nrows, nblks)
1106      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
1107                                   DATA=matrix%wms(1)%col_i(1:nblks))
1108      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1109                                   DATA=matrix%wms(1)%blk_p(1:nblks))
1110      matrix%nblks = nblks
1111      matrix%nze = matrix%wms(1)%datasize
1112      matrix%index(dbcsr_slot_nblks) = nblks
1113      matrix%index(dbcsr_slot_nze) = matrix%wms(1)%datasize
1114      CALL dbcsr_repoint_index(matrix)
1115!$OMP     SECTION
1116      CALL dbcsr_switch_data_area(matrix, matrix%wms(1)%data_area)
1117!$OMP     END SECTIONS
1118      CALL timestop(error_handle)
1119   END SUBROUTINE quick_finalize
1120
1121   SUBROUTINE dbcsr_add_wm_from_matrix(matrix, limits)
1122      !! Creates a work matrix from the data present in a finalized matrix.
1123
1124      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1125         !! DBCSR matrix
1126      INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL        :: limits
1127         !! the limits to use for copying
1128
1129      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_add_wm_from_matrix', &
1130                                     routineP = moduleN//':'//routineN
1131
1132      INTEGER                                            :: handle, ithread, nthreads, nwms, &
1133                                                            old_nwms, size_used
1134      LOGICAL                                            :: preexists
1135
1136!   ---------------------------------------------------------------------------
1137
1138      CALL timeset(routineN, handle)
1139!$OMP     BARRIER
1140      preexists = ASSOCIATED(matrix%wms)
1141      IF (preexists) THEN
1142         old_nwms = SIZE(matrix%wms)
1143         IF (old_nwms .EQ. 0) &
1144            DBCSR_WARN("Nonexisting work matrices?!")
1145      ELSE
1146         old_nwms = 0
1147      ENDIF
1148      nthreads = 1; ithread = 0
1149!$    nthreads = OMP_GET_NUM_THREADS(); ithread = OMP_GET_THREAD_NUM()
1150      IF (nthreads .GT. 1) THEN
1151         IF (old_nwms /= nthreads .AND. old_nwms /= 0) &
1152            DBCSR_ABORT("Number of work matrices and threads do not match")
1153      ENDIF
1154      nwms = MAX(1, old_nwms)
1155!$    nwms = MAX(nwms, nthreads)
1156!$OMP     BARRIER
1157!$OMP     MASTER
1158      IF (.NOT. ASSOCIATED(matrix%wms)) THEN
1159         CALL dbcsr_work_create(matrix, &
1160                                INT(matrix%nblks*default_resize_factor/nwms), &
1161                                INT(matrix%nze*default_resize_factor/nwms), &
1162                                n=nwms, work_mutable=.FALSE.)
1163      ENDIF
1164!$OMP     END MASTER
1165!$OMP     BARRIER
1166      size_used = matrix%nze
1167      CALL dbcsr_fill_wm_from_matrix(matrix%wms, matrix, size_used, &
1168                                     limits=limits)
1169!$OMP     BARRIER
1170      CALL timestop(handle)
1171   END SUBROUTINE dbcsr_add_wm_from_matrix
1172
1173   SUBROUTINE dbcsr_fill_wm_from_matrix(wm, matrix, size_used, limits)
1174      !! Fills index and data of the work matrix from the
1175      !! previously-finalized one.
1176      !!
1177      !! limits
1178      !! The limits is a 4-tuple
1179      !! (lower_row, higher_row, lower_column, higher_column).
1180
1181      TYPE(dbcsr_work_type), DIMENSION(:), INTENT(INOUT) :: wm
1182         !! the work matrix to fill
1183      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1184         !! DBCSR matrix
1185      INTEGER, INTENT(IN)                                :: size_used
1186      INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL        :: limits
1187         !! only fills blocks within this range
1188
1189      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_fill_wm_from_matrix', &
1190                                     routineP = moduleN//':'//routineN
1191
1192      INTEGER                                            :: blk, blk_p, col, handle, ithread, &
1193                                                            nthreads, nwms, nze, row, wblk_p, &
1194                                                            which_wm, wm_first, wm_last
1195      LOGICAL                                            :: careful, limit, mt, tr
1196      LOGICAL, SAVE                                      :: mutable
1197      TYPE(dbcsr_data_obj)                               :: data_block
1198      TYPE(dbcsr_iterator)                               :: iter
1199
1200!   ---------------------------------------------------------------------------
1201
1202      CALL timeset(routineN, handle)
1203      nwms = SIZE(matrix%wms)
1204      mt = .FALSE.
1205!$    IF (nwms .GT. 1) mt = omp_get_num_threads() .GT. 1
1206      ithread = 0; nthreads = 1
1207!$    ithread = omp_get_thread_num()
1208!$    nthreads = omp_get_num_threads()
1209      limit = PRESENT(limits)
1210      careful = size_used + size_used/8 &
1211                .LT. dbcsr_data_get_size_referenced(matrix%data_area)
1212      CALL dbcsr_data_init(data_block)
1213      CALL dbcsr_data_new(data_block, dbcsr_data_get_type(matrix%data_area))
1214      IF (mt) THEN
1215         wm_first = ithread + 1
1216         wm_last = ithread + 1
1217      ELSE
1218         wm_first = 1
1219         wm_last = nwms
1220      ENDIF
1221      ! Prepares the work matrices to accept the main data.
1222!$OMP     MASTER
1223      mutable = .FALSE.
1224      DO which_wm = 1, nwms
1225         mutable = mutable .OR. dbcsr_wm_use_mutable(wm(which_wm))
1226      ENDDO
1227!$OMP     END MASTER
1228!$OMP     BARRIER
1229      DO which_wm = wm_first, wm_last
1230         IF (dbcsr_wm_use_mutable(wm(which_wm))) &
1231            DBCSR_ABORT("Adding main matrix into mutable not supported.")
1232         IF (mutable) THEN
1233            IF (.NOT. dbcsr_mutable_instantiated(wm(which_wm)%mutable)) THEN
1234               CALL dbcsr_mutable_new(wm(which_wm)%mutable, matrix%data_type)
1235            ENDIF
1236         ELSE
1237            ! We don't know how much data we'll get so we have to be generous.
1238            CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, &
1239                                        size_used/nwms)
1240            CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, 0)
1241         ENDIF
1242      ENDDO
1243      ! Now copy the data
1244      CALL dbcsr_iterator_start(iter, matrix, shared=mt, &
1245                                contiguous_pointers=.FALSE., read_only=.TRUE.)
1246      DO WHILE (dbcsr_iterator_blocks_left(iter))
1247         CALL dbcsr_iterator_next_block(iter, row, col, data_block, &
1248                                        transposed=tr, block_number=blk)
1249         IF (limit) THEN
1250            IF (.NOT. within_limits(row, col, limits)) CYCLE
1251         ENDIF
1252         blk_p = matrix%blk_p(blk)
1253         which_wm = ithread + 1
1254         wblk_p = SIGN(wm(which_wm)%datasize + 1, blk_p)
1255         nze = dbcsr_data_get_size(data_block)
1256         IF (mt .OR. limit .OR. careful .OR. mutable) THEN
1257            ! The data gets copied block by block so the block pointers
1258            ! are ordered accordingly.
1259            IF (.NOT. mutable) THEN
1260               CALL add_work_coordinate(wm(which_wm), row, col, wblk_p)
1261               CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, &
1262                                           ABS(wblk_p) + nze - 1, factor=default_resize_factor)
1263               CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, &
1264                                                   ABS(wblk_p) + nze - 1)
1265               CALL dbcsr_data_set(wm(which_wm)%data_area, &
1266                                   lb=ABS(wblk_p), &
1267                                   data_size=nze, &
1268                                   src=data_block, source_lb=1)
1269            ENDIF
1270         ELSE
1271            ! The data gets copied all at once so the block pointers
1272            ! should remain the same as they were.
1273            CALL add_work_coordinate(wm(which_wm), row, col, blk_p)
1274         ENDIF
1275         IF (.NOT. mutable) &
1276            wm(which_wm)%datasize = wm(which_wm)%datasize + nze
1277      ENDDO
1278      CALL dbcsr_iterator_stop(iter)
1279      CALL dbcsr_data_clear_pointer(data_block)
1280      CALL dbcsr_data_release(data_block)
1281      ! Copy all blocks at once
1282      IF (.NOT. mt .AND. .NOT. limit .AND. .NOT. careful .AND. .NOT. mutable) THEN
1283         DO which_wm = 1, nwms
1284            CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, &
1285                                        dbcsr_data_get_size_referenced(matrix%data_area))
1286            CALL dbcsr_data_copyall(wm(which_wm)%data_area, matrix%data_area)
1287            wm(which_wm)%datasize = size_used
1288         ENDDO
1289      ENDIF
1290      CALL timestop(handle)
1291   END SUBROUTINE dbcsr_fill_wm_from_matrix
1292
1293   PURE FUNCTION within_limits(row, column, limits)
1294      !! Checks whether a point is within bounds
1295      !! \return whether the point is within the bounds
1296
1297      INTEGER, INTENT(IN)                                :: row, column
1298         !! point to check
1299         !! point to check
1300      INTEGER, DIMENSION(4), INTENT(IN)                  :: limits
1301         !! limits (low_row, high_row, low_col, high_col)
1302      LOGICAL                                            :: within_limits
1303
1304      within_limits = row .GE. limits(1) .AND. row .LE. limits(2) .AND. &
1305                      column .GE. limits(3) .AND. column .LE. limits(4)
1306   END FUNCTION within_limits
1307
1308   SUBROUTINE dbcsr_work_destroy(wm)
1309      !! Deallocates and destroys a work matrix.
1310
1311      TYPE(dbcsr_work_type), INTENT(INOUT)               :: wm
1312         !! work matrix
1313
1314      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy', &
1315                                     routineP = moduleN//':'//routineN
1316
1317      INTEGER                                            :: handle
1318
1319!   ---------------------------------------------------------------------------
1320
1321      IF (careful_mod) CALL timeset(routineN, handle)
1322
1323      IF (ASSOCIATED(wm%row_i)) THEN
1324         DEALLOCATE (wm%row_i)
1325      ENDIF
1326      IF (ASSOCIATED(wm%col_i)) THEN
1327         DEALLOCATE (wm%col_i)
1328      ENDIF
1329      IF (ASSOCIATED(wm%blk_p)) THEN
1330         DEALLOCATE (wm%blk_p)
1331      ENDIF
1332      CALL dbcsr_data_release(wm%data_area)
1333      CALL dbcsr_mutable_destroy(wm%mutable)
1334
1335      IF (careful_mod) CALL timestop(handle)
1336
1337   END SUBROUTINE dbcsr_work_destroy
1338
1339   SUBROUTINE dbcsr_work_destroy_all(m)
1340      !! Deallocates and destroys a work matrix.
1341      TYPE(dbcsr_type), INTENT(INOUT)                    :: m
1342
1343      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy_all', &
1344                                     routineP = moduleN//':'//routineN
1345
1346      INTEGER                                            :: handle, i
1347
1348!   ---------------------------------------------------------------------------
1349
1350      CALL timeset(routineN, handle)
1351
1352      IF (.NOT. ASSOCIATED(m%wms)) &
1353         DBCSR_WARN("Want to destroy nonexisting work matrices.")
1354      IF (ASSOCIATED(m%wms)) THEN
1355         DO i = 1, SIZE(m%wms)
1356            CALL dbcsr_work_destroy(m%wms(i))
1357         ENDDO
1358         DEALLOCATE (m%wms)
1359      ENDIF
1360
1361      CALL timestop(handle)
1362
1363   END SUBROUTINE dbcsr_work_destroy_all
1364
1365   SUBROUTINE add_work_coordinate(matrix, row, col, blk, index)
1366      !! Adds a coordinate (or other data) into a work matrix's row_i and
1367      !! col_i arrays and returns its position.
1368      !! @note  Uses the matrix%lastblk to keep track of the current position.
1369
1370      TYPE(dbcsr_work_type), INTENT(INOUT)               :: matrix
1371         !! work matrix
1372      INTEGER, INTENT(IN)                                :: row, col
1373         !! row data to add
1374         !! col data to add
1375      INTEGER, INTENT(IN), OPTIONAL                      :: blk
1376         !! block pointer to add
1377      INTEGER, INTENT(OUT), OPTIONAL                     :: index
1378         !! saved position
1379
1380      CHARACTER(len=*), PARAMETER :: routineN = 'add_work_coordinate', &
1381                                     routineP = moduleN//':'//routineN
1382
1383      INTEGER                                            :: handle
1384
1385!   ---------------------------------------------------------------------------
1386
1387      IF (careful_mod) CALL timeset(routineN, handle)
1388      matrix%lastblk = matrix%lastblk + 1
1389      CALL ensure_array_size(matrix%row_i, ub=matrix%lastblk, &
1390                             factor=default_resize_factor)
1391      CALL ensure_array_size(matrix%col_i, ub=matrix%lastblk, &
1392                             factor=default_resize_factor)
1393      matrix%row_i(matrix%lastblk) = row
1394      matrix%col_i(matrix%lastblk) = col
1395      IF (PRESENT(blk)) THEN
1396         CALL ensure_array_size(matrix%blk_p, ub=matrix%lastblk, &
1397                                factor=default_resize_factor)
1398         matrix%blk_p(matrix%lastblk) = blk
1399      ENDIF
1400      IF (PRESENT(index)) index = matrix%lastblk
1401      IF (careful_mod) CALL timestop(handle)
1402   END SUBROUTINE add_work_coordinate
1403
1404   SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, &
1405                              sort_data)
1406      !! Merge data from matrix and work matrices into the final matrix.
1407
1408      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1409         !! matrix to work on
1410      INTEGER, DIMENSION(*), INTENT(IN)                  :: old_row_p, old_col_i, old_blk_p
1411      LOGICAL, INTENT(IN)                                :: sort_data
1412         !! whether data will be fully sorted
1413
1414      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all', &
1415                                     routineP = moduleN//':'//routineN
1416      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
1417
1418      INTEGER                                            :: handle, my_row_count, nblks, &
1419                                                            nrows, nwms, row, t, &
1420                                                            which_wm, wm_datasize
1421      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE           :: all_data_offsets, all_data_sizes, &
1422                                                            new_blk_p_sorted, new_blk_sizes, &
1423                                                            new_row_p
1424      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET   :: blk_d, new_blk_p, new_col_i
1425      INTEGER, DIMENSION(:), CONTIGUOUS, POINTER         :: my_row_p
1426      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE   :: cbs, rbs
1427      INTEGER, SAVE                                      :: max_row_count, new_nblks = 0, new_nze = 0
1428      TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), &
1429         SAVE                                            :: all_data_areas
1430      TYPE(dbcsr_work_type), POINTER                     :: wm
1431      TYPE(i_array_p), DIMENSION(:), POINTER, SAVE       :: all_blk_p, all_col_i, all_row_p
1432
1433!   ---------------------------------------------------------------------------
1434
1435      CALL timeset(routineN, handle)
1436
1437      ! Outline:
1438      ! Each thread has a work matrix.  These must be merged and made
1439      ! into a new index.  If sort_data is False, then the data areas
1440      ! are simply appended.  This is probably quicker but the data
1441      ! blocks are not in order and the data size may expand beyond
1442      ! what is needed.  If sort_data is True, then data blocks are
1443      ! sorted in order.
1444
1445      IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data
1446!$OMP     BARRIER
1447      nrows = matrix%nblkrows_total
1448      nwms = SIZE(matrix%wms)
1449!$    IF (nwms /= OMP_GET_NUM_THREADS()) &
1450!$       DBCSR_ABORT("Number of threads does not match number of work matrices.")
1451      which_wm = 1
1452!$    which_wm = OMP_GET_THREAD_NUM() + 1
1453      wm => matrix%wms(which_wm)
1454      !
1455      ! Currently B-tree-based work matrices are converted to array form.
1456      IF (dbcsr_wm_use_mutable(wm)) THEN
1457         SELECT CASE (wm%mutable%m%data_type)
1458         CASE (dbcsr_type_real_4)
1459            CALL tree_to_linear_s(wm)
1460         CASE (dbcsr_type_real_8)
1461            CALL tree_to_linear_d(wm)
1462         CASE (dbcsr_type_complex_4)
1463            CALL tree_to_linear_c(wm)
1464         CASE (dbcsr_type_complex_8)
1465            CALL tree_to_linear_z(wm)
1466         CASE default
1467            DBCSR_ABORT("Invalid data type")
1468         END SELECT
1469      ENDIF
1470      !
1471      ! Initializations and some counts from the threads are summed.
1472      !
1473!$OMP     MASTER
1474      new_nblks = old_row_p(nrows + 1)
1475      new_nze = matrix%nze
1476      ALLOCATE (all_row_p(nwms))
1477      ALLOCATE (all_col_i(nwms))
1478      ALLOCATE (all_blk_p(nwms))
1479      ALLOCATE (all_data_sizes(0:nwms))
1480      ALLOCATE (all_data_offsets(nwms))
1481      IF (sort_data) ALLOCATE (all_data_areas(0:nwms))
1482      IF (sort_data) THEN
1483         CALL dbcsr_data_init(all_data_areas(0))
1484         all_data_areas(0) = matrix%data_area
1485!$OMP        CRITICAL (crit_data)
1486         CALL dbcsr_data_hold(all_data_areas(0))
1487!$OMP        END CRITICAL (crit_data)
1488      ENDIF
1489      ! The following is valid because the data actually referenced
1490      ! by blocks is explicitly calculated in dbcsr_finalize.
1491      all_data_sizes(0) = matrix%nze
1492      rbs => array_data(matrix%row_blk_size)
1493      cbs => array_data(matrix%col_blk_size)
1494!$OMP     END MASTER
1495      !
1496!$OMP     BARRIER
1497      IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN
1498         wm_datasize = wm%datasize_after_filtering
1499      ELSE
1500         wm_datasize = wm%datasize
1501      ENDIF
1502      nblks = wm%lastblk
1503!$OMP     ATOMIC
1504      new_nblks = new_nblks + nblks
1505!$OMP     ATOMIC
1506      new_nze = new_nze + wm_datasize
1507!$OMP     BARRIER
1508      !
1509!$OMP     MASTER
1510      ALLOCATE (new_row_p(nrows + 1))
1511      ALLOCATE (new_col_i(new_nblks))
1512      ALLOCATE (new_blk_p(new_nblks))
1513      IF (sort_data) THEN
1514         ALLOCATE (blk_d(new_nblks))
1515      ELSE
1516         ALLOCATE (blk_d(1))
1517      ENDIF
1518!$OMP     END MASTER
1519      !
1520      ! Each thread creates a row_p index for its new blocks.  This gives the
1521      ! number of new blocks per thread per row.
1522      CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p)
1523      ALLOCATE (my_row_p(nrows + 1))
1524      CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks)
1525      !
1526      ! The threads must share their row_p, col_i, blk_p, and data areas
1527      ! among themselves.
1528      all_row_p(which_wm)%p => my_row_p
1529      all_data_sizes(which_wm) = wm_datasize
1530      IF (.NOT. sort_data) THEN
1531         IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) &
1532            DBCSR_ABORT("Data size mismatch.")
1533      ENDIF
1534      all_col_i(which_wm)%p => wm%col_i
1535      all_blk_p(which_wm)%p => wm%blk_p
1536!$OMP     BARRIER
1537      IF (sort_data) THEN
1538!$OMP        MASTER
1539         all_data_offsets(:) = 0
1540!$OMP        END MASTER
1541         CALL dbcsr_data_init(all_data_areas(which_wm))
1542         all_data_areas(which_wm) = wm%data_area
1543!$OMP        CRITICAL (crit_data)
1544         CALL dbcsr_data_hold(all_data_areas(which_wm))
1545!$OMP        END CRITICAL (crit_data)
1546      ELSE
1547!$OMP        MASTER
1548         all_data_offsets(1) = all_data_sizes(0)
1549         DO t = 2, nwms
1550            all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1)
1551         ENDDO
1552!$OMP        END MASTER
1553      ENDIF
1554      !
1555      ! The new row counts are created, then the new row_p index is created.
1556      !
1557!$OMP     DO
1558      DO row = 1, nrows
1559         my_row_count = old_row_p(row + 1) - old_row_p(row)
1560         DO t = 1, nwms
1561            my_row_count = my_row_count &
1562                 &       + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row)
1563         ENDDO
1564         new_row_p(row) = my_row_count
1565      ENDDO
1566!$OMP     END DO
1567!$OMP     MASTER
1568      max_row_count = MAXVAL(new_row_p(1:nrows))
1569      CALL dbcsr_build_row_index(new_row_p, nrows)
1570!$OMP     END MASTER
1571!$OMP     BARRIER
1572      !
1573      ! The new index is built
1574      CALL merge_index(new_row_p, new_col_i, new_blk_p, &
1575                       blk_d, &
1576                       old_row_p, old_col_i, old_blk_p, &
1577                       all_row_p, all_col_i, all_blk_p, &
1578                       all_data_offsets, nwms, nrows, max_row_count, sort_data)
1579      !
1580      ! The data is then merged in one of two ways.
1581      !
1582!$OMP     MASTER
1583      IF (sort_data) THEN
1584         ! The matrix gets a fresh data area.  Blocks from the work
1585         ! matrices will be copied into it in order.
1586         !
1587!$OMP        CRITICAL (crit_data)
1588         CALL dbcsr_data_release(matrix%data_area)
1589         CALL dbcsr_data_init(matrix%data_area)
1590!$OMP        END CRITICAL (crit_data)
1591         CALL dbcsr_data_new(matrix%data_area, &
1592                             data_size=new_nze, &
1593                             data_type=dbcsr_data_get_type(all_data_areas(0)), &
1594                             memory_type=dbcsr_data_get_memory_type(all_data_areas(0)))
1595         ALLOCATE (new_blk_p_sorted(new_nblks))
1596         ALLOCATE (new_blk_sizes(new_nblks))
1597      ELSE
1598         ! Data stored in the work matrices will be just appended to the
1599         ! current data area.
1600         CALL dbcsr_data_ensure_size(matrix%data_area, new_nze)
1601      ENDIF
1602!$OMP     END MASTER
1603!$OMP     BARRIER
1604      IF (sort_data) THEN
1605         CALL dbcsr_calc_block_sizes(new_blk_sizes, &
1606                                     new_row_p, new_col_i, rbs, cbs)
1607         CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, &
1608                              new_blk_sizes, dsts=matrix%data_area, &
1609                              src=all_data_areas(0), &
1610                              srcs=all_data_areas, old_blk_d=blk_d)
1611      ELSE
1612         IF (all_data_sizes(which_wm) .GT. 0) THEN
1613            CALL dbcsr_data_copy(dst=matrix%data_area, &
1614                                 dst_lb=(/all_data_offsets(which_wm) + 1/), &
1615                                 dst_sizes=(/all_data_sizes(which_wm)/), &
1616                                 src=wm%data_area, &
1617                                 src_lb=(/1/), &
1618                                 src_sizes=(/all_data_sizes(which_wm)/))
1619         ENDIF
1620      ENDIF
1621      !
1622      ! Creates a new index array.
1623      !
1624!$OMP     BARRIER
1625!$OMP     MASTER
1626      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
1627      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
1628      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
1629      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
1630                                   DATA=new_row_p(1:nrows + 1), extra=new_nblks*2)
1631      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
1632                                   DATA=new_col_i(1:new_nblks))
1633      IF (sort_data) THEN
1634         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1635                                      DATA=new_blk_p_sorted)
1636      ELSE
1637         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1638                                      DATA=new_blk_p)
1639      ENDIF
1640      matrix%nblks = new_nblks
1641      matrix%nze = new_nze
1642      matrix%index(dbcsr_slot_nblks) = matrix%nblks
1643      matrix%index(dbcsr_slot_nze) = matrix%nze
1644      CALL dbcsr_repoint_index(matrix)
1645!$OMP     END MASTER
1646      !
1647!$OMP     BARRIER
1648      !
1649      DEALLOCATE (my_row_p)
1650      IF (sort_data) THEN
1651!$OMP        MASTER
1652!$OMP        CRITICAL (crit_data)
1653         CALL dbcsr_data_release(all_data_areas(0))
1654!$OMP        END CRITICAL (crit_data)
1655         DO which_wm = 1, nwms
1656!$OMP           CRITICAL (crit_data)
1657            CALL dbcsr_data_release(all_data_areas(which_wm))
1658!$OMP           END CRITICAL (crit_data)
1659         ENDDO
1660!$OMP        END MASTER
1661      ENDIF
1662!$OMP     BARRIER
1663!$OMP     MASTER
1664      DEALLOCATE (all_row_p)
1665      DEALLOCATE (all_col_i)
1666      DEALLOCATE (all_blk_p)
1667      DEALLOCATE (new_row_p)
1668      DEALLOCATE (new_col_i)
1669      DEALLOCATE (new_blk_p)
1670      DEALLOCATE (all_data_sizes)
1671      DEALLOCATE (all_data_offsets)
1672      IF (sort_data) THEN
1673         DEALLOCATE (all_data_areas)
1674         DEALLOCATE (new_blk_sizes)
1675         DEALLOCATE (new_blk_p_sorted)
1676      ENDIF
1677      DEALLOCATE (blk_d)
1678!$OMP     END MASTER
1679!$OMP     BARRIER
1680      IF (dbg) WRITE (*, *) routineN//" stopped"
1681      CALL timestop(handle)
1682   END SUBROUTINE dbcsr_merge_all
1683
1684   SUBROUTINE dbcsr_merge_single_wm(matrix)
1685      !! Sort data from the WM into the final matrix, based closely on dbcsr_merge_all
1686
1687      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1688         !! matrix to work on
1689
1690      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_single_wm', &
1691                                     routineP = moduleN//':'//routineN
1692      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
1693
1694      INTEGER                                            :: handle, nblks, nrows
1695      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE           :: new_blk_p_sorted, new_blk_sizes, &
1696                                                            new_row_p
1697      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET   :: blk_d
1698      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE   :: cbs, rbs
1699      TYPE(dbcsr_work_type), POINTER                     :: wm
1700
1701!   ---------------------------------------------------------------------------
1702
1703      CALL timeset(routineN, handle)
1704
1705      ! Outline:
1706      ! There is a single work matrix.  Data blocks are sorted and copied
1707      ! into the matrix data area (which is empty).  The index is made consistent
1708
1709      nrows = matrix%nblkrows_total
1710      wm => matrix%wms(1)
1711      nblks = wm%lastblk
1712      IF (dbcsr_wm_use_mutable(wm)) &
1713         DBCSR_ABORT("Number of threads does not match number of work matrices.")
1714      !
1715      ! The following is valid because the data actually referenced
1716      ! by blocks is explicitly calculated in dbcsr_finalize.
1717!$OMP     MASTER
1718      rbs => array_data(matrix%row_blk_size)
1719      cbs => array_data(matrix%col_blk_size)
1720      !
1721      ! Initializations
1722      !
1723      ALLOCATE (new_row_p(nrows + 1))
1724      ALLOCATE (blk_d(nblks))
1725      ALLOCATE (new_blk_p_sorted(nblks))
1726      ALLOCATE (new_blk_sizes(nblks))
1727      !
1728      ! Master thread creates a row_p index for the (sorted) blocks.
1729      CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p)
1730      CALL dbcsr_make_dbcsr_index(new_row_p, wm%row_i, nrows, nblks)
1731      !
1732      !
1733      ! The matrix data area is resized.  Blocks from the work
1734      ! matrices will be copied into it in order.
1735      !
1736      CALL dbcsr_data_ensure_size(matrix%data_area, wm%datasize, nocopy=.TRUE.)
1737!$OMP     END MASTER
1738!$OMP     BARRIER
1739      CALL dbcsr_calc_block_sizes(new_blk_sizes, &
1740                                  new_row_p, wm%col_i, rbs, cbs)
1741      CALL dbcsr_sort_data(new_blk_p_sorted, wm%blk_p, &
1742                           new_blk_sizes, dsts=matrix%data_area, &
1743                           src=wm%data_area)
1744      !
1745      ! Creates a new index array.
1746      !
1747!$OMP     BARRIER
1748!$OMP     MASTER
1749      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
1750      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
1751      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
1752      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
1753                                   DATA=new_row_p(1:nrows + 1), extra=nblks*2)
1754      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
1755                                   DATA=wm%col_i(1:nblks))
1756      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1757                                   DATA=new_blk_p_sorted)
1758      matrix%nblks = nblks
1759      matrix%nze = wm%datasize
1760      matrix%index(dbcsr_slot_nblks) = matrix%nblks
1761      matrix%index(dbcsr_slot_nze) = matrix%nze
1762      CALL dbcsr_repoint_index(matrix)
1763      DEALLOCATE (new_row_p)
1764      DEALLOCATE (new_blk_sizes)
1765      DEALLOCATE (new_blk_p_sorted)
1766      DEALLOCATE (blk_d)
1767!$OMP     END MASTER
1768      IF (dbg) WRITE (*, *) routineN//" stopped"
1769      CALL timestop(handle)
1770   END SUBROUTINE dbcsr_merge_single_wm
1771
1772   SUBROUTINE merge_index(new_row_p, new_col_i, new_blk_p, &
1773      !! Builds a new index from several work matrices.
1774                          blk_d, old_row_p, old_col_i, old_blk_p, &
1775                          all_row_p, all_col_i, all_blk_p, &
1776                          all_data_offsets, nwms, nrows, max_row_count, sort_data)
1777      INTEGER, DIMENSION(*), INTENT(IN)                  :: new_row_p
1778      INTEGER, DIMENSION(*), INTENT(OUT), TARGET         :: new_col_i, new_blk_p
1779      INTEGER, DIMENSION(*), INTENT(IN), TARGET          :: blk_d
1780      INTEGER, DIMENSION(*), INTENT(IN)                  :: old_row_p, old_col_i, old_blk_p
1781      TYPE(i_array_p), DIMENSION(*), INTENT(IN)          :: all_row_p, all_col_i, all_blk_p
1782      INTEGER, DIMENSION(*), INTENT(IN)                  :: all_data_offsets
1783      INTEGER, INTENT(IN)                                :: nwms, nrows, max_row_count
1784      LOGICAL, INTENT(IN)                                :: sort_data
1785
1786      CHARACTER(len=*), PARAMETER :: routineN = 'merge_index', routineP = moduleN//':'//routineN
1787
1788      INTEGER                                            :: blk1, blk2, error_handle, first_row_blk, &
1789                                                            last_row_blk, row, src_blk_1, &
1790                                                            src_blk_2, t
1791      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_p_buff, tmp_arr
1792      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: blk_span, col_span, d_span
1793
1794!   ---------------------------------------------------------------------------
1795
1796      CALL timeset(routineN, error_handle)
1797      !
1798      ALLOCATE (tmp_arr(max_row_count))
1799      ALLOCATE (blk_p_buff(max_row_count))
1800      !
1801!$OMP     DO
1802      DO row = 1, nrows
1803         first_row_blk = new_row_p(row) + 1
1804         last_row_blk = new_row_p(row + 1)
1805         col_span => new_col_i(first_row_blk:last_row_blk)
1806         blk_span => new_blk_p(first_row_blk:last_row_blk)
1807         IF (sort_data) d_span => blk_d(first_row_blk:last_row_blk)
1808         src_blk_1 = old_row_p(row) + 1
1809         src_blk_2 = old_row_p(row + 1)
1810         blk1 = 1
1811         blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1
1812         col_span(blk1:blk2) = old_col_i(src_blk_1:src_blk_2)
1813         blk_span(blk1:blk2) = old_blk_p(src_blk_1:src_blk_2)
1814         IF (sort_data) THEN
1815            d_span(blk1:blk2) = 1
1816            DO t = 1, nwms
1817               src_blk_1 = all_row_p(t)%p(row) + 1
1818               src_blk_2 = all_row_p(t)%p(row + 1)
1819               blk1 = blk2 + 1
1820               blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1
1821               col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2)
1822               blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2)
1823               d_span(blk1:blk2) = t + 1
1824            ENDDO
1825         ELSE
1826            DO t = 1, nwms
1827               src_blk_1 = all_row_p(t)%p(row) + 1
1828               src_blk_2 = all_row_p(t)%p(row + 1)
1829               blk1 = blk2 + 1
1830               blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1
1831               col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2)
1832               blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) &
1833                    &              + all_data_offsets(t)
1834            ENDDO
1835         ENDIF
1836         CALL sort(col_span, SIZE(col_span), tmp_arr)
1837         blk_p_buff(1:SIZE(blk_span)) = blk_span(:)
1838         blk_span(1:SIZE(blk_span)) = blk_p_buff(tmp_arr(1:SIZE(blk_span)))
1839      ENDDO
1840!$OMP     END DO
1841      !
1842      DEALLOCATE (tmp_arr)
1843      DEALLOCATE (blk_p_buff)
1844      !
1845      CALL timestop(error_handle)
1846   END SUBROUTINE merge_index
1847
1848#:include '../data/dbcsr.fypp'
1849#:for n, nametype1, base1, prec1, kind1, type1, dkind1 in inst_params_float
1850   SUBROUTINE tree_to_linear_${nametype1}$ (wm)
1851     !! Converts mutable data to linear (array) type.
1852
1853      USE dbcsr_btree, &
1854         ONLY: btree_2d_data_${nametype1}$ => btree_data_${nametype1}$p2d, &
1855               btree_destroy_${nametype1}$ => btree_delete, &
1856               btree_size_${nametype1}$ => btree_get_entries
1857      TYPE(dbcsr_work_type), INTENT(INOUT)     :: wm
1858        !! work matrix to convert
1859
1860      CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_${nametype1}$', &
1861                                     routineP = moduleN//':'//routineN
1862
1863      INTEGER                                  :: blk, blk_p, treesize, &
1864                                                  error_handler, needed_size
1865      INTEGER(KIND=int_8), ALLOCATABLE, &
1866         DIMENSION(:)                           :: keys
1867      ${type1}$, DIMENSION(:), POINTER, CONTIGUOUS :: target_data
1868      ${type1}$, DIMENSION(:, :), POINTER        :: block_2d
1869      TYPE(btree_2d_data_${nametype1}$), ALLOCATABLE, &
1870         DIMENSION(:)                           :: values
1871
1872!   ---------------------------------------------------------------------------
1873
1874      CALL timeset(routineN, error_handler)
1875      ! srt = .TRUE. ! Not needed because of the copy
1876      treesize = btree_size_${nametype1}$ (wm%mutable%m%btree_${nametype1}$)
1877      IF (wm%lastblk .NE. treesize) &
1878         DBCSR_ABORT("Mismatch in number of blocks")
1879      ALLOCATE (keys(treesize), values(treesize))
1880      CALL btree_destroy_${nametype1}$ (wm%mutable%m%btree_${nametype1}$, keys, values)
1881      CALL ensure_array_size(wm%row_i, ub=treesize)
1882      CALL ensure_array_size(wm%col_i, ub=treesize)
1883      CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, &
1884                               wm%col_i)
1885      ! For now we also fill the data, sloooowly, but this should
1886      ! be avoided and the data should be copied directly from the
1887      ! source in the subroutine's main loop.
1888      CALL ensure_array_size(wm%blk_p, ub=treesize)
1889      needed_size = 0
1890      DO blk = 1, treesize
1891         block_2d => values(blk)%p
1892         needed_size = needed_size + SIZE(block_2d)
1893      ENDDO
1894      wm%datasize = needed_size
1895      CALL dbcsr_data_ensure_size(wm%data_area, &
1896                                  wm%datasize)
1897      target_data => dbcsr_get_data_p_${nametype1}$ (wm%data_area)
1898      blk_p = 1
1899      DO blk = 1, treesize
1900         block_2d => values(blk)%p
1901         IF (.NOT. values(blk)%tr) THEN
1902            wm%blk_p(blk) = blk_p
1903         ELSE
1904            wm%blk_p(blk) = -blk_p
1905         ENDIF
1906         CALL block_copy_${nametype1}$ (target_data, block_2d, &
1907                                        SIZE(block_2d), blk_p, 1)
1908         blk_p = blk_p + SIZE(block_2d)
1909         DEALLOCATE (block_2d)
1910      ENDDO
1911      DEALLOCATE (keys, values)
1912      CALL dbcsr_mutable_release(wm%mutable)
1913      CALL timestop(error_handler)
1914   END SUBROUTINE tree_to_linear_${nametype1}$
1915
1916#:endfor
1917
1918END MODULE dbcsr_work_operations
1919