1!--------------------------------------------------------------------------------------------------!
2! Copyright (C) by the DBCSR developers group - All rights reserved                                !
3! This file is part of the DBCSR library.                                                          !
4!                                                                                                  !
5! For information on the license, see the LICENSE file.                                            !
6! For further information please visit https://dbcsr.cp2k.org                                      !
7! SPDX-License-Identifier: GPL-2.0+                                                                !
8!--------------------------------------------------------------------------------------------------!
9
10MODULE dbcsr_work_operations
11   !! DBCSR work matrix utilities
12
13   USE dbcsr_array_types, ONLY: array_data, &
14                                array_hold, &
15                                array_i1d_obj, &
16                                array_new, &
17                                array_nullify, &
18                                array_release, &
19                                array_size
20   USE dbcsr_btree, ONLY: btree_data_cp2d, &
21                          btree_data_dp2d, &
22                          btree_data_sp2d, &
23                          btree_data_zp2d
24   USE dbcsr_block_operations, ONLY: block_copy_c, &
25                                     block_copy_d, &
26                                     block_copy_s, &
27                                     block_copy_z, &
28                                     dbcsr_data_copy, &
29                                     dbcsr_data_set
30   USE dbcsr_config, ONLY: default_resize_factor
31   USE dbcsr_data_methods, ONLY: &
32      dbcsr_data_clear_pointer, dbcsr_data_ensure_size, dbcsr_data_get_memory_type, &
33      dbcsr_data_get_size, dbcsr_data_get_size_referenced, dbcsr_data_get_type, dbcsr_data_hold, &
34      dbcsr_data_init, dbcsr_data_new, dbcsr_data_release, dbcsr_data_set_size_referenced, &
35      dbcsr_data_valid, dbcsr_get_data_p_c, dbcsr_get_data_p_d, dbcsr_get_data_p_s, &
36      dbcsr_get_data_p_z
37   USE dbcsr_data_operations, ONLY: dbcsr_data_copyall, &
38                                    dbcsr_sort_data, &
39                                    dbcsr_switch_data_area
40   USE dbcsr_dist_methods, ONLY: dbcsr_distribution_has_threads, &
41                                 dbcsr_distribution_hold, &
42                                 dbcsr_distribution_make_threads, &
43                                 dbcsr_distribution_ncols, &
44                                 dbcsr_distribution_nrows, &
45                                 dbcsr_distribution_release
46   USE dbcsr_index_operations, ONLY: dbcsr_addto_index_array, &
47                                     dbcsr_build_row_index, &
48                                     dbcsr_clearfrom_index_array, &
49                                     dbcsr_index_prune_deleted, &
50                                     dbcsr_make_dbcsr_index, &
51                                     dbcsr_make_index_exist, &
52                                     dbcsr_repoint_index, &
53                                     dbcsr_sort_indices
54   USE dbcsr_iterator_operations, ONLY: dbcsr_iterator_blocks_left, &
55                                        dbcsr_iterator_next_block, &
56                                        dbcsr_iterator_start, &
57                                        dbcsr_iterator_stop
58   USE dbcsr_mem_methods, ONLY: dbcsr_memtype_equal
59   USE dbcsr_methods, ONLY: &
60      dbcsr_col_block_sizes, dbcsr_distribution, dbcsr_get_data_memory_type, &
61      dbcsr_get_data_size_used, dbcsr_get_data_type, dbcsr_get_index_memory_type, &
62      dbcsr_get_matrix_type, dbcsr_get_replication_type, dbcsr_matrix_counter, &
63      dbcsr_max_col_size, dbcsr_max_row_size, dbcsr_mutable_destroy, dbcsr_mutable_init, &
64      dbcsr_mutable_instantiated, dbcsr_mutable_new, dbcsr_mutable_release, dbcsr_name, &
65      dbcsr_row_block_sizes, dbcsr_use_mutable, dbcsr_valid_index, dbcsr_wm_use_mutable
66   USE dbcsr_ptr_util, ONLY: ensure_array_size
67   USE dbcsr_toollib, ONLY: dbcsr_unpack_i8_2i4, &
68                            sort
69   USE dbcsr_string_utilities, ONLY: uppercase
70   USE dbcsr_types, ONLY: &
71      dbcsr_data_obj, dbcsr_distribution_obj, dbcsr_iterator, dbcsr_memtype_default, &
72      dbcsr_memtype_type, dbcsr_meta_size, dbcsr_num_slots, dbcsr_repl_col, dbcsr_repl_full, &
73      dbcsr_repl_none, dbcsr_repl_row, dbcsr_slot_blk_p, dbcsr_slot_col_i, dbcsr_slot_home_coli, &
74      dbcsr_slot_home_pcol, dbcsr_slot_home_prow, dbcsr_slot_home_rowi, dbcsr_slot_home_vpcol, &
75      dbcsr_slot_home_vprow, dbcsr_slot_nblkcols_local, dbcsr_slot_nblkcols_total, &
76      dbcsr_slot_nblkrows_local, dbcsr_slot_nblkrows_total, dbcsr_slot_nblks, &
77      dbcsr_slot_nfullcols_local, dbcsr_slot_nfullcols_total, dbcsr_slot_nfullrows_local, &
78      dbcsr_slot_nfullrows_total, dbcsr_slot_nze, dbcsr_slot_row_p, dbcsr_slot_size, dbcsr_type, &
79      dbcsr_type_antihermitian, dbcsr_type_antisymmetric, dbcsr_type_complex_4, &
80      dbcsr_type_complex_8, dbcsr_type_hermitian, dbcsr_type_no_symmetry, dbcsr_type_real_4, &
81      dbcsr_type_real_8, dbcsr_type_real_default, dbcsr_type_symmetric, dbcsr_work_type
82   USE dbcsr_dist_util, ONLY: convert_sizes_to_offsets, &
83                              dbcsr_calc_block_sizes, &
84                              dbcsr_verify_matrix, &
85                              meta_from_dist
86   USE dbcsr_kinds, ONLY: default_string_length, &
87                          int_8, &
88                          real_4, &
89                          real_8
90#include "base/dbcsr_base_uses.f90"
91
92!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads, omp_in_parallel
93
94   IMPLICIT NONE
95   PRIVATE
96
97   CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbcsr_work_operations'
98
99   LOGICAL, PARAMETER :: careful_mod = .FALSE.
100
101   PUBLIC :: dbcsr_create, dbcsr_work_create, dbcsr_finalize, dbcsr_special_finalize
102   PUBLIC :: dbcsr_add_wm_from_matrix, &
103             add_work_coordinate
104   PUBLIC :: dbcsr_work_destroy
105
106   INTERFACE dbcsr_create
107      MODULE PROCEDURE dbcsr_create_new, dbcsr_create_template
108   END INTERFACE
109
110   TYPE i_array_p
111      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS :: p
112   END TYPE i_array_p
113
114CONTAINS
115
116   SUBROUTINE dbcsr_create_new(matrix, name, dist, matrix_type, &
117                               row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, &
118                               nze, data_type, data_buffer, &
119                               data_memory_type, index_memory_type, &
120                               max_rbs, max_cbs, &
121                               row_blk_offset, col_blk_offset, &
122                               thread_dist, &
123                               reuse, reuse_arrays, mutable_work, make_index, replication_type)
124      !! Creates a matrix, allocating the essentials.
125      !!
126      !! The matrix itself is allocated, as well as the essential parts of
127      !! the index. When passed the nze argument, the data is also allocated
128      !! to that size.
129      !! see dbcsr_types.F
130
131      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
132         !! new matrix
133      CHARACTER(len=*), INTENT(IN)                       :: name
134      TYPE(dbcsr_distribution_obj), INTENT(IN)           :: dist
135         !! distribution_2d distribution
136      CHARACTER, INTENT(IN)                              :: matrix_type
137         !! 'N' for normal, 'T' for transposed, 'S' for symmetric, and 'A' for antisymmetric
138      INTEGER, DIMENSION(:), INTENT(IN), POINTER, &
139         CONTIGUOUS, OPTIONAL                            :: row_blk_size, col_blk_size
140      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_size_obj, col_blk_size_obj
141      INTEGER, INTENT(IN), OPTIONAL                      :: nze, data_type
142         !! number of elements
143         !! type of data from 'rRcC' for single/double precision real/complex, default is 'R'
144      TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL         :: data_buffer
145      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: data_memory_type, index_memory_type
146         !! allocate indices and data using special memory
147         !! allocate indices using special memory
148      INTEGER, INTENT(IN), OPTIONAL                      :: max_rbs, max_cbs
149      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_offset, col_blk_offset
150      TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: thread_dist
151      LOGICAL, INTENT(IN), OPTIONAL                      :: reuse, reuse_arrays, mutable_work, &
152                                                            make_index
153         !! reuses an existing matrix, default is to create a fresh one
154         !! uses the mutable data for working and not the append-only data; default is append-only
155      CHARACTER, INTENT(IN), OPTIONAL                    :: replication_type
156         !! replication to be used for this matrix; default is dbcsr_repl_none
157
158      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_new'
159
160      CHARACTER                                          :: matrix_type_l
161      INTEGER                                            :: handle, my_nze
162      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: vec_col_blk_offset, vec_row_blk_offset
163      INTEGER, DIMENSION(dbcsr_meta_size)                :: new_meta
164      LOGICAL                                            :: hijack, my_make_index
165
166!   ---------------------------------------------------------------------------
167
168      MARK_USED(thread_dist) ! only used with OMP
169
170      CALL timeset(routineN, handle)
171
172      ! Reuse matrix only if has actually been allocated.
173      hijack = ASSOCIATED(matrix%index)
174      IF (PRESENT(reuse)) hijack = reuse
175
176      my_make_index = .TRUE.
177      IF (PRESENT(make_index)) my_make_index = make_index
178
179      IF (.NOT. hijack) THEN
180         matrix = dbcsr_type()
181         matrix%refcount = 1
182      ENDIF
183!$OMP     CRITICAL (crit_counter)
184      matrix%serial_number = dbcsr_matrix_counter
185      dbcsr_matrix_counter = dbcsr_matrix_counter + 1
186!$OMP     END CRITICAL (crit_counter)
187      ! Mark matrix index as having an invalid index.
188      matrix%valid = .FALSE.
189      matrix%name = name
190      ! Sets the type of matrix building/modifying work structures.
191      IF (PRESENT(mutable_work)) THEN
192         matrix%work_mutable = mutable_work
193      ELSE
194         matrix%work_mutable = .FALSE.
195      ENDIF
196      ! Sets the correct data type.
197      IF (PRESENT(data_type)) THEN
198         SELECT CASE (data_type)
199         CASE (dbcsr_type_real_4)
200            matrix%data_type = dbcsr_type_real_4
201         CASE (dbcsr_type_real_8)
202            matrix%data_type = dbcsr_type_real_8
203         CASE (dbcsr_type_complex_4)
204            matrix%data_type = dbcsr_type_complex_4
205         CASE (dbcsr_type_complex_8)
206            matrix%data_type = dbcsr_type_complex_8
207         CASE DEFAULT
208            DBCSR_ABORT("Invalid matrix type")
209         END SELECT
210      ELSE
211         matrix%data_type = dbcsr_type_real_default
212      ENDIF
213
214      matrix%data_memory_type = dbcsr_memtype_default
215      IF (PRESENT(data_memory_type)) &
216         matrix%data_memory_type = data_memory_type
217
218      matrix%index_memory_type = dbcsr_memtype_default
219      IF (PRESENT(index_memory_type)) &
220         matrix%index_memory_type = index_memory_type
221
222      IF (hijack) THEN
223         ! Release/deallocate elements that are replaced or not needed
224         ! by the new matrix. This is similar to what dbcsr_destroy
225         ! does, except that it keeps the index and data.
226         CALL array_release(matrix%row_blk_size)
227         CALL array_release(matrix%col_blk_size)
228         CALL array_release(matrix%row_blk_offset)
229         CALL array_release(matrix%col_blk_offset)
230         IF (matrix%has_local_rows) &
231            CALL array_release(matrix%local_rows)
232         IF (matrix%has_global_rows) &
233            CALL array_release(matrix%global_rows)
234         IF (matrix%has_local_cols) &
235            CALL array_release(matrix%local_cols)
236         IF (matrix%has_global_cols) &
237            CALL array_release(matrix%global_cols)
238         CALL dbcsr_distribution_release(matrix%dist)
239         IF (ASSOCIATED(matrix%wms)) THEN
240            CALL dbcsr_work_destroy_all(matrix)
241         ENDIF
242         CALL array_nullify(matrix%local_rows)
243         CALL array_nullify(matrix%global_rows)
244         CALL array_nullify(matrix%local_cols)
245         CALL array_nullify(matrix%global_cols)
246         !
247         IF (matrix%data_type /= matrix%data_area%d%data_type) &
248            DBCSR_ABORT("Inconsistent data type for the existing buffer.")
249         CALL dbcsr_data_set_size_referenced(matrix%data_area, 0)
250      ELSE
251         ! Invalidate index
252         NULLIFY (matrix%index)
253         ! Invalidate data
254         IF (PRESENT(data_buffer)) THEN
255            IF (.NOT. dbcsr_data_valid(data_buffer)) &
256               DBCSR_ABORT("Input data buffer not valid.")
257            IF (matrix%data_type /= data_buffer%d%data_type) &
258               DBCSR_ABORT("Input buffer data type different by matrix data type.")
259            matrix%data_memory_type = data_buffer%d%memory_type
260            matrix%data_area = data_buffer
261            CALL dbcsr_data_hold(matrix%data_area)
262         ELSE
263            CALL dbcsr_data_init(matrix%data_area)
264         ENDIF
265      ENDIF
266      ! These are always invalidated.
267      NULLIFY (matrix%row_p, matrix%col_i, matrix%blk_p, matrix%thr_c, &
268               matrix%coo_l)
269      IF (PRESENT(row_blk_size_obj)) THEN
270         matrix%row_blk_size = row_blk_size_obj
271         CALL array_hold(matrix%row_blk_size)
272      ELSEIF (PRESENT(row_blk_size)) THEN
273         CALL array_new(matrix%row_blk_size, row_blk_size, gift=reuse_arrays)
274      ELSE
275         DBCSR_ABORT("Missing row_blk_size")
276      ENDIF
277      IF (PRESENT(max_rbs)) THEN
278         matrix%max_rbs = max_rbs
279      ELSE IF (array_size(matrix%row_blk_size) .GT. 0) THEN
280         matrix%max_rbs = MAXVAL(array_data(matrix%row_blk_size))
281      ELSE
282         matrix%max_rbs = 0
283      ENDIF
284      IF (PRESENT(col_blk_size_obj)) THEN
285         matrix%col_blk_size = col_blk_size_obj
286         CALL array_hold(matrix%col_blk_size)
287      ELSEIF (PRESENT(col_blk_size)) THEN
288         CALL array_new(matrix%col_blk_size, col_blk_size, gift=reuse_arrays)
289      ELSE
290         DBCSR_ABORT("Missing col_blk_size")
291      ENDIF
292      IF (PRESENT(max_cbs)) THEN
293         matrix%max_cbs = max_cbs
294      ELSE IF (array_size(matrix%col_blk_size) .GT. 0) THEN
295         matrix%max_cbs = MAXVAL(array_data(matrix%col_blk_size))
296      ELSE
297         matrix%max_cbs = 0
298      ENDIF
299      !
300      IF (array_size(matrix%row_blk_size) /= dbcsr_distribution_nrows(dist)) &
301         DBCSR_ABORT("Number of blocked rows does match blocked row distribution.")
302      IF (array_size(matrix%col_blk_size) /= dbcsr_distribution_ncols(dist)) &
303         DBCSR_ABORT("Number of blocked columns does match blocked column distribution.")
304      ! initialize row/col offsets
305      IF (PRESENT(row_blk_offset)) THEN
306         IF (dbcsr_distribution_nrows(dist) + 1 /= array_size(row_blk_offset)) &
307            CALL dbcsr_abort(__LOCATION__, &
308                             "Number of blocked offset rows does match blocked row distribution.")
309         matrix%row_blk_offset = row_blk_offset
310         CALL array_hold(matrix%row_blk_offset)
311      ELSE
312         ALLOCATE (vec_row_blk_offset(array_size(matrix%row_blk_size) + 1))
313         CALL convert_sizes_to_offsets(array_data(matrix%row_blk_size), vec_row_blk_offset)
314         CALL array_new(matrix%row_blk_offset, vec_row_blk_offset, gift=.TRUE.)
315      ENDIF
316
317      IF (PRESENT(col_blk_offset)) THEN
318         IF (dbcsr_distribution_ncols(dist) + 1 /= array_size(col_blk_offset)) &
319            CALL dbcsr_abort(__LOCATION__, &
320                             "Number of blocked offset columns does match blocked column distribution.")
321         matrix%col_blk_offset = col_blk_offset
322         CALL array_hold(matrix%col_blk_offset)
323      ELSE
324         ALLOCATE (vec_col_blk_offset(array_size(matrix%col_blk_size) + 1))
325         CALL convert_sizes_to_offsets(array_data(matrix%col_blk_size), vec_col_blk_offset)
326         CALL array_new(matrix%col_blk_offset, vec_col_blk_offset, gift=.TRUE.)
327      ENDIF
328
329      matrix%dist = dist
330      CALL dbcsr_distribution_hold(matrix%dist)
331!$    IF (.NOT. dbcsr_distribution_has_threads(matrix%dist) .AND. PRESENT(thread_dist)) THEN
332!$       IF (dbcsr_distribution_has_threads(thread_dist)) THEN
333!$          matrix%dist%d%num_threads = thread_dist%d%num_threads
334!$          matrix%dist%d%has_thread_dist = .TRUE.
335!$          matrix%dist%d%thread_dist = thread_dist%d%thread_dist
336!$          CALL array_hold(matrix%dist%d%thread_dist)
337!$       ENDIF
338!$    ENDIF
339!$    IF (.NOT. dbcsr_distribution_has_threads(matrix%dist)) THEN
340!$       CALL dbcsr_distribution_make_threads(matrix%dist, &
341!$                                            array_data(matrix%row_blk_size))
342!$    ENDIF
343      ! Set up some data.
344      IF (my_make_index) THEN
345         CALL meta_from_dist(new_meta, dist, array_data(matrix%row_blk_size), &
346                             array_data(matrix%col_blk_size))
347         matrix%nblkrows_total = new_meta(dbcsr_slot_nblkrows_total)
348         matrix%nblkcols_total = new_meta(dbcsr_slot_nblkcols_total)
349         matrix%nfullrows_total = new_meta(dbcsr_slot_nfullrows_total)
350         matrix%nfullcols_total = new_meta(dbcsr_slot_nfullcols_total)
351         matrix%nblkrows_local = new_meta(dbcsr_slot_nblkrows_local)
352         matrix%nblkcols_local = new_meta(dbcsr_slot_nblkcols_local)
353         matrix%nfullrows_local = new_meta(dbcsr_slot_nfullrows_local)
354         matrix%nfullcols_local = new_meta(dbcsr_slot_nfullcols_local)
355      ENDIF
356      my_nze = 0; IF (PRESENT(nze)) my_nze = nze
357      matrix%nblks = 0
358      matrix%nze = 0
359
360      IF (PRESENT(replication_type)) THEN
361         IF (replication_type .NE. dbcsr_repl_none &
362             .AND. replication_type .NE. dbcsr_repl_full &
363             .AND. replication_type .NE. dbcsr_repl_row &
364             .AND. replication_type .NE. dbcsr_repl_col) &
365            DBCSR_ABORT("Invalid replication type '"//replication_type//"'")
366         IF (replication_type .EQ. dbcsr_repl_row .OR. replication_type .EQ. dbcsr_repl_col) &
367            DBCSR_WARN("Row and column replication not fully supported")
368         matrix%replication_type = replication_type
369      ELSE
370         matrix%replication_type = dbcsr_repl_none
371      ENDIF
372      !
373      ! Setup a matrix from scratch
374      IF (.NOT. hijack) THEN
375         IF (.NOT. PRESENT(data_buffer)) THEN
376            CALL dbcsr_data_new(matrix%data_area, matrix%data_type, my_nze, &
377                                memory_type=matrix%data_memory_type)
378            CALL dbcsr_data_set_size_referenced(matrix%data_area, 0)
379         ENDIF
380         !
381         IF (my_make_index) THEN
382            NULLIFY (matrix%index)
383            CALL ensure_array_size(matrix%index, lb=1, ub=dbcsr_num_slots, &
384                                   zero_pad=.TRUE., memory_type=matrix%index_memory_type)
385         ENDIF
386      ENDIF
387      IF (my_make_index) THEN
388         IF (LBOUND(matrix%index, 1) .GT. 1 &
389             .OR. UBOUND(matrix%index, 1) .LT. dbcsr_num_slots) &
390            DBCSR_ABORT("Index is not large enough")
391         matrix%index(1:dbcsr_num_slots) = 0
392         matrix%index(1:dbcsr_meta_size) = new_meta(1:dbcsr_meta_size)
393         matrix%index(dbcsr_slot_size) = dbcsr_num_slots
394      ENDIF
395      !
396      matrix%symmetry = .FALSE.
397      matrix%negate_real = .FALSE.
398      matrix%negate_imaginary = .FALSE.
399      !matrix%transpose = .FALSE.
400      matrix_type_l = matrix_type
401      CALL uppercase(matrix_type_l)
402      SELECT CASE (matrix_type_l)
403      CASE (dbcsr_type_no_symmetry)
404      CASE (dbcsr_type_symmetric)
405         matrix%symmetry = .TRUE.
406      CASE (dbcsr_type_antisymmetric)
407         matrix%symmetry = .TRUE.
408         matrix%negate_real = .TRUE.
409         matrix%negate_imaginary = .TRUE.
410      CASE (dbcsr_type_hermitian)
411         matrix%symmetry = .TRUE.
412         matrix%negate_imaginary = .TRUE.
413      CASE (dbcsr_type_antihermitian)
414         matrix%symmetry = .TRUE.
415         matrix%negate_real = .TRUE.
416      CASE DEFAULT
417         DBCSR_ABORT("Invalid matrix type.")
418      END SELECT
419      matrix%bcsc = .FALSE.
420      matrix%local_indexing = .FALSE.
421      matrix%list_indexing = .FALSE.
422      CALL array_nullify(matrix%local_rows)
423      CALL array_nullify(matrix%global_rows)
424      CALL array_nullify(matrix%local_cols)
425      CALL array_nullify(matrix%global_cols)
426      matrix%has_local_rows = .FALSE.
427      matrix%has_global_rows = .FALSE.
428      matrix%has_local_cols = .FALSE.
429      matrix%has_global_cols = .FALSE.
430      IF (my_make_index) THEN
431         CALL dbcsr_make_index_exist(matrix)
432      ENDIF
433      matrix%valid = .TRUE.
434      CALL timestop(handle)
435   END SUBROUTINE dbcsr_create_new
436
437   SUBROUTINE dbcsr_create_template(matrix, template, name, dist, matrix_type, &
438                                    row_blk_size, col_blk_size, row_blk_size_obj, col_blk_size_obj, &
439                                    nze, data_type, &
440                                    data_buffer, data_memory_type, index_memory_type, &
441                                    max_rbs, max_cbs, &
442                                    row_blk_offset, col_blk_offset, &
443                                    reuse_arrays, mutable_work, make_index, replication_type)
444      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
445      TYPE(dbcsr_type), INTENT(IN)                       :: template
446      CHARACTER(len=*), INTENT(IN), OPTIONAL             :: name
447      TYPE(dbcsr_distribution_obj), INTENT(IN), OPTIONAL :: dist
448      CHARACTER, INTENT(IN), OPTIONAL                    :: matrix_type
449      INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL, &
450         POINTER, CONTIGUOUS                             :: row_blk_size, col_blk_size
451      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_size_obj, col_blk_size_obj
452      INTEGER, INTENT(IN), OPTIONAL                      :: nze, data_type
453      TYPE(dbcsr_data_obj), INTENT(IN), OPTIONAL         :: data_buffer
454      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: data_memory_type, index_memory_type
455      INTEGER, INTENT(IN), OPTIONAL                      :: max_rbs, max_cbs
456      TYPE(array_i1d_obj), INTENT(IN), OPTIONAL          :: row_blk_offset, col_blk_offset
457      LOGICAL, INTENT(IN), OPTIONAL                      :: reuse_arrays, mutable_work, make_index
458      CHARACTER, INTENT(IN), OPTIONAL                    :: replication_type
459
460      CHARACTER                                          :: new_matrix_type, new_replication_type
461      CHARACTER(len=default_string_length)               :: new_name
462      INTEGER                                            :: new_data_type, new_max_cbs, new_max_rbs
463      LOGICAL                                            :: my_make_index, new_mutable_work
464      TYPE(array_i1d_obj)                                :: new_col_blk_offset, new_row_blk_offset
465      TYPE(dbcsr_distribution_obj)                       :: new_dist
466      TYPE(dbcsr_memtype_type)                           :: new_data_memory_type, &
467                                                            new_index_memory_type
468
469      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: blk_size
470
471!   ---------------------------------------------------------------------------
472
473      IF (PRESENT(name)) THEN
474         new_name = TRIM(name)
475      ELSE
476         new_name = TRIM(dbcsr_name(template))
477      ENDIF
478      IF (PRESENT(dist)) THEN
479         new_dist = dist
480      ELSE
481         new_dist = dbcsr_distribution(template)
482      ENDIF
483      IF (PRESENT(matrix_type)) THEN
484         new_matrix_type = matrix_type
485      ELSE
486         new_matrix_type = dbcsr_get_matrix_type(template)
487      ENDIF
488      !
489      IF ((PRESENT(row_blk_size) .NEQV. PRESENT(col_blk_size)) .OR. &
490          (PRESENT(row_blk_size_obj) .NEQV. PRESENT(col_blk_size_obj))) THEN
491         DBCSR_ABORT("Both row_blk_size and col_blk_size must be provided")
492      ENDIF
493      !
494      IF (PRESENT(max_rbs)) new_max_rbs = max_rbs
495      IF (PRESENT(row_blk_offset)) new_row_blk_offset = row_blk_offset
496      NULLIFY (blk_size)
497      IF (PRESENT(row_blk_size_obj)) THEN
498         blk_size => array_data(row_blk_size_obj)
499      ELSEIF (PRESENT(row_blk_size)) THEN
500         blk_size => row_blk_size
501      ENDIF
502      IF (ASSOCIATED(blk_size)) THEN
503         IF (.NOT. PRESENT(max_rbs)) &
504            new_max_rbs = MAXVAL(blk_size)
505      ELSE
506         IF (.NOT. PRESENT(max_rbs)) &
507            new_max_rbs = dbcsr_max_row_size(template)
508         IF (.NOT. PRESENT(row_blk_offset)) &
509            new_row_blk_offset = template%row_blk_offset
510      ENDIF
511      !
512      IF (PRESENT(max_cbs)) new_max_cbs = max_cbs
513      IF (PRESENT(col_blk_offset)) new_col_blk_offset = col_blk_offset
514      NULLIFY (blk_size)
515      IF (PRESENT(col_blk_size_obj)) THEN
516         blk_size => array_data(col_blk_size_obj)
517      ELSEIF (PRESENT(col_blk_size)) THEN
518         blk_size => col_blk_size
519      ENDIF
520      IF (ASSOCIATED(blk_size)) THEN
521         IF (.NOT. PRESENT(max_cbs)) &
522            new_max_cbs = MAXVAL(blk_size)
523      ELSE
524         IF (.NOT. PRESENT(max_cbs)) &
525            new_max_cbs = dbcsr_max_col_size(template)
526         IF (.NOT. PRESENT(col_blk_offset)) &
527            new_col_blk_offset = template%col_blk_offset
528      ENDIF
529      IF (PRESENT(data_type)) THEN
530         new_data_type = data_type
531      ELSE
532         new_data_type = dbcsr_get_data_type(template)
533      ENDIF
534      IF (PRESENT(data_memory_type)) THEN
535         new_data_memory_type = data_memory_type
536      ELSE
537         new_data_memory_type = dbcsr_get_data_memory_type(template)
538      ENDIF
539      IF (PRESENT(index_memory_type)) THEN
540         new_index_memory_type = index_memory_type
541      ELSE
542         new_index_memory_type = dbcsr_get_index_memory_type(template)
543      ENDIF
544      IF (PRESENT(replication_type)) THEN
545         new_replication_type = replication_type
546      ELSE
547         new_replication_type = dbcsr_get_replication_type(template)
548      ENDIF
549      IF (PRESENT(mutable_work)) THEN
550         new_mutable_work = mutable_work
551      ELSE
552         new_mutable_work = dbcsr_use_mutable(template)
553      ENDIF
554      IF (PRESENT(row_blk_size_obj)) THEN
555         CALL dbcsr_create(matrix, name=new_name, dist=new_dist, &
556                           matrix_type=new_matrix_type, &
557                           row_blk_size_obj=row_blk_size_obj, &
558                           col_blk_size_obj=col_blk_size_obj, &
559                           nze=nze, &
560                           data_type=new_data_type, &
561                           data_buffer=data_buffer, &
562                           data_memory_type=new_data_memory_type, &
563                           index_memory_type=new_index_memory_type, &
564                           max_rbs=new_max_rbs, max_cbs=new_max_cbs, &
565                           row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, &
566                           reuse_arrays=reuse_arrays, &
567                           mutable_work=new_mutable_work, &
568                           make_index=make_index, &
569                           replication_type=new_replication_type)
570      ELSEIF (PRESENT(row_blk_size)) THEN
571         CALL dbcsr_create(matrix, name=new_name, dist=new_dist, &
572                           matrix_type=new_matrix_type, &
573                           row_blk_size=row_blk_size, &
574                           col_blk_size=col_blk_size, &
575                           nze=nze, &
576                           data_type=new_data_type, &
577                           data_buffer=data_buffer, &
578                           data_memory_type=new_data_memory_type, &
579                           index_memory_type=new_index_memory_type, &
580                           max_rbs=new_max_rbs, max_cbs=new_max_cbs, &
581                           row_blk_offset=row_blk_offset, col_blk_offset=col_blk_offset, &
582                           reuse_arrays=reuse_arrays, &
583                           mutable_work=new_mutable_work, &
584                           make_index=make_index, &
585                           replication_type=new_replication_type)
586      ELSE
587         CALL dbcsr_create(matrix, name=new_name, dist=new_dist, &
588                           matrix_type=new_matrix_type, &
589                           row_blk_size_obj=template%row_blk_size, &
590                           col_blk_size_obj=template%col_blk_size, &
591                           nze=nze, &
592                           data_type=new_data_type, &
593                           data_buffer=data_buffer, &
594                           data_memory_type=new_data_memory_type, &
595                           index_memory_type=new_index_memory_type, &
596                           max_rbs=new_max_rbs, max_cbs=new_max_cbs, &
597                           row_blk_offset=new_row_blk_offset, col_blk_offset=new_col_blk_offset, &
598                           thread_dist=dbcsr_distribution(template), &
599                           reuse_arrays=reuse_arrays, &
600                           mutable_work=new_mutable_work, &
601                           make_index=make_index, &
602                           replication_type=new_replication_type)
603      ENDIF
604      ! Copy stuff from the meta-array.  These are not normally needed,
605      ! but have to be here for creating matrices from "image" matrices.
606      my_make_index = .TRUE.
607      IF (PRESENT(make_index)) my_make_index = make_index
608      IF (my_make_index) THEN
609         matrix%index(dbcsr_slot_home_prow) = template%index(dbcsr_slot_home_prow)
610         matrix%index(dbcsr_slot_home_rowi) = template%index(dbcsr_slot_home_rowi)
611         matrix%index(dbcsr_slot_home_pcol) = template%index(dbcsr_slot_home_pcol)
612         matrix%index(dbcsr_slot_home_coli) = template%index(dbcsr_slot_home_coli)
613         matrix%index(dbcsr_slot_home_vprow) = template%index(dbcsr_slot_home_vprow)
614         matrix%index(dbcsr_slot_home_vpcol) = template%index(dbcsr_slot_home_vpcol)
615      ENDIF
616      IF (PRESENT(row_blk_size) .AND. .NOT. PRESENT(row_blk_offset)) THEN
617         CALL array_release(new_row_blk_offset)
618      ENDIF
619      IF (PRESENT(col_blk_size) .AND. .NOT. PRESENT(col_blk_offset)) THEN
620         CALL array_release(new_col_blk_offset)
621      ENDIF
622
623   END SUBROUTINE dbcsr_create_template
624
625   SUBROUTINE dbcsr_init_wm(wm, data_type, nblks_guess, sizedata_guess, memory_type)
626      !! Initializes one work matrix
627
628      TYPE(dbcsr_work_type), INTENT(OUT)                 :: wm
629         !! initialized work matrix
630      INTEGER, INTENT(IN)                                :: data_type
631      INTEGER, INTENT(IN), OPTIONAL                      :: nblks_guess, sizedata_guess
632         !! estimated number of blocks
633         !! estimated size of data
634      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: memory_type
635
636      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_init_wm'
637      INTEGER                                            :: handle, nblks, stat
638
639!   ---------------------------------------------------------------------------
640
641      IF (careful_mod) CALL timeset(routineN, handle)
642      wm%lastblk = 0
643      wm%datasize = 0
644      ! Index
645      IF (PRESENT(nblks_guess)) THEN
646         nblks = nblks_guess
647         IF (nblks_guess < 0) &
648            DBCSR_ABORT("Can not have negative block count.")
649         ALLOCATE (wm%row_i(nblks), stat=stat)
650         IF (stat /= 0) DBCSR_ABORT("wm%row_i")
651         ALLOCATE (wm%col_i(nblks), stat=stat)
652         IF (stat /= 0) DBCSR_ABORT("wm%col_i")
653         ALLOCATE (wm%blk_p(nblks), stat=stat)
654         IF (stat /= 0) DBCSR_ABORT("wm%blk_p")
655      ELSE
656         NULLIFY (wm%row_i, wm%col_i, wm%blk_p)
657         !nblks = CEILING (REAL (matrix%nblkrows_local * matrix%nblkcols_local)&
658         !     / REAL (dbcsr_mp_numnodes (dbcsr_distribution_mp (matrix%dist))))
659      ENDIF
660      ! Data
661      CALL dbcsr_data_init(wm%data_area)
662      IF (PRESENT(sizedata_guess)) THEN
663         IF (sizedata_guess < 0) &
664            DBCSR_ABORT("Can not have negative data size.")
665         CALL dbcsr_data_new(wm%data_area, data_type, &
666                             data_size=sizedata_guess, memory_type=memory_type)
667      ELSE
668         CALL dbcsr_data_new(wm%data_area, data_type, memory_type=memory_type)
669      ENDIF
670      CALL dbcsr_mutable_init(wm%mutable)
671      IF (careful_mod) CALL timestop(handle)
672   END SUBROUTINE dbcsr_init_wm
673
674   SUBROUTINE dbcsr_work_create(matrix, nblks_guess, sizedata_guess, n, &
675                                work_mutable, memory_type)
676      !! Creates a the working matrix(es) for a DBCSR matrix.
677
678      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
679         !! new matrix
680      INTEGER, INTENT(IN), OPTIONAL                      :: nblks_guess, sizedata_guess, n
681         !! estimated number of blocks
682         !! estimated size of data
683         !! number work matrices to create, default is 1
684      LOGICAL, INTENT(in), OPTIONAL                      :: work_mutable
685         !! use mutable work type, default is what was specified in create
686      TYPE(dbcsr_memtype_type), INTENT(IN), OPTIONAL     :: memory_type
687
688      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_create'
689
690      INTEGER                                            :: handle, iw, nw, ow
691      LOGICAL                                            :: wms_new, wms_realloc
692      TYPE(dbcsr_work_type), DIMENSION(:), POINTER       :: wms
693
694!   ---------------------------------------------------------------------------
695
696      CALL timeset(routineN, handle)
697      IF (PRESENT(n)) THEN
698         nw = n
699      ELSE
700         nw = 1
701!$       IF (omp_in_parallel()) THEN
702!$          nw = omp_get_num_threads()
703!$       ELSE
704!$          nw = omp_get_max_threads()
705!$       ENDIF
706      ENDIF
707!$OMP     MASTER
708      wms_new = .NOT. ASSOCIATED(matrix%wms)
709      wms_realloc = .FALSE.
710      IF (ASSOCIATED(matrix%wms)) THEN
711         ow = SIZE(matrix%wms)
712         IF (ow .LT. nw) &
713            DBCSR_WARN("Number of work matrices less than threads.")
714         IF (ow .LT. nw) wms_realloc = .TRUE.
715      ENDIF
716      IF (PRESENT(work_mutable)) THEN
717         matrix%work_mutable = work_mutable
718      ENDIF
719      IF (wms_realloc) THEN
720         ALLOCATE (wms(nw))
721         wms(1:ow) = matrix%wms(1:ow)
722         DEALLOCATE (matrix%wms)
723         matrix%wms => wms
724         DO iw = ow + 1, nw
725            CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, &
726                               nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, &
727                               memory_type=memory_type)
728            IF (matrix%work_mutable) &
729               CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, &
730                                      dbcsr_get_data_type(matrix))
731         END DO
732      ENDIF
733      IF (wms_new) THEN
734         ALLOCATE (matrix%wms(nw))
735         DO iw = 1, nw
736            CALL dbcsr_init_wm(matrix%wms(iw), matrix%data_type, &
737                               nblks_guess=nblks_guess, sizedata_guess=sizedata_guess, &
738                               memory_type=memory_type)
739            IF (matrix%work_mutable) &
740               CALL dbcsr_mutable_new(matrix%wms(iw)%mutable, &
741                                      dbcsr_get_data_type(matrix))
742         END DO
743      ENDIF
744      matrix%valid = .FALSE.
745!$OMP     END MASTER
746      CALL timestop(handle)
747   END SUBROUTINE dbcsr_work_create
748
749   SUBROUTINE dbcsr_finalize(matrix, reshuffle)
750      !! Creates the final dbcsr_type matrix from the working matrix.
751      !! Work matrices (array or tree-based) are merged into the base DBCSR matrix.
752      !! If a matrix is marked as having a valid index, then nothing is done.
753      !! Deleted blocks are pruned from the index.
754
755      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
756         !! final matrix
757      LOGICAL, INTENT(IN), OPTIONAL                      :: reshuffle
758         !! whether the data should be reshuffled, default is false
759
760      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_finalize'
761      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
762
763      INTEGER                                            :: handle, i, nblks, nwms, start_offset
764      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: empty_row_p
765      INTEGER, DIMENSION(:), POINTER, SAVE               :: old_blk_p, old_col_i, old_row_p
766      LOGICAL                                            :: can_quick, fake_row_p, sort_data, spawn
767
768!   ---------------------------------------------------------------------------
769
770      CALL timeset(routineN, handle)
771
772      NULLIFY (old_blk_p, old_col_i, old_row_p)
773
774!$OMP     BARRIER
775      ! If the matrix is not marked as dirty then skip the work.
776      IF (dbcsr_valid_index(matrix)) THEN
777         !"No need to finalize a valid matrix, skipping."
778         !
779         ! A matrix with a valid index should not have associated work
780         ! arrays.  This may happen when this routine is called on a
781         ! matrix that was not changed.
782!$OMP        BARRIER
783!$OMP        MASTER
784         IF (ASSOCIATED(matrix%wms)) &
785            CALL dbcsr_work_destroy_all(matrix)
786         matrix%valid = .TRUE.
787!$OMP        END MASTER
788!$OMP        BARRIER
789         CALL timestop(handle)
790         RETURN
791      ENDIF
792      !
793      ! If possible, data copying is avoided.
794      IF (PRESENT(reshuffle)) THEN
795         sort_data = reshuffle
796      ELSE
797         sort_data = .FALSE.
798      ENDIF
799      !
800      ! Now make sure that a valid row_p exists. Also clear the row_p if
801      ! the matrix is declared to have 0 blocks.
802!$OMP     MASTER
803      fake_row_p = .NOT. ASSOCIATED(matrix%row_p)
804      IF (ASSOCIATED(matrix%row_p)) THEN
805         fake_row_p = SIZE(matrix%row_p) .LE. 1
806      ENDIF
807      fake_row_p = fake_row_p .OR. matrix%nblks .EQ. 0
808!$OMP     END MASTER
809      !
810      ! See where data will be appended in the main data
811      ! area. Alternatively, set to the start if the matrix is declared
812      ! to have no data. (This value is ignored if reshuffle is true
813      ! because the main data area is always new.)
814      start_offset = matrix%nze
815      i = dbcsr_get_data_size_used(matrix)
816!$OMP     MASTER
817      matrix%nze = 0
818!$OMP     END MASTER
819!$OMP     BARRIER
820!$OMP     ATOMIC
821      matrix%nze = matrix%nze + i
822!$OMP     BARRIER
823      IF (dbg) THEN
824         WRITE (*, *) routineN//" sizes", matrix%nze, i, &
825            dbcsr_data_get_size_referenced(matrix%data_area), &
826            dbcsr_data_get_size(matrix%data_area)
827      ENDIF
828      IF (.FALSE. .AND. dbcsr_data_get_size_referenced(matrix%data_area) .NE. &
829          matrix%nze) THEN
830         IF (matrix%nze .NE. dbcsr_data_get_size_referenced(matrix%data_area)) &
831            DBCSR_WARN("Should reshuffle.")
832         IF (ASSOCIATED(matrix%wms)) THEN
833            sort_data = .NOT. dbcsr_wm_use_mutable(matrix%wms(1))
834         ENDIF
835      ENDIF
836      IF (sort_data .AND. matrix%nze .GT. 0) THEN
837         CALL dbcsr_add_wm_from_matrix(matrix)
838         matrix%nze = 0
839!$OMP        MASTER
840         fake_row_p = .TRUE.
841!$OMP        END MASTER
842      ENDIF
843      start_offset = dbcsr_data_get_size_referenced(matrix%data_area) + 1
844      IF (matrix%nze .EQ. 0) start_offset = 1
845!$OMP     MASTER
846      matrix%index(dbcsr_slot_nze) = matrix%nze
847      IF (fake_row_p) THEN
848         ALLOCATE (empty_row_p(matrix%nblkrows_total + 1))
849         empty_row_p(:) = 0
850         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
851                                      DATA=empty_row_p, extra=0)
852         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
853                                      reservation=0)
854         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
855                                      reservation=0)
856         CALL dbcsr_repoint_index(matrix)
857      ENDIF
858!$OMP     END MASTER
859      !
860!$OMP     BARRIER
861      can_quick = can_quickly_finalize(matrix)
862!$OMP     BARRIER
863      ! If the matrix, work matrices, and environment fit several
864      ! criteria, then a quick O(1) finalization is performed.
865      IF (can_quick .AND. .NOT. sort_data) THEN
866         CALL quick_finalize(matrix)
867      ELSE
868         !
869!$OMP        MASTER
870         !
871         ! Create work matrices if not yet existing
872         IF (.NOT. ASSOCIATED(matrix%wms)) THEN
873            nwms = 1
874!$          nwms = omp_get_num_threads()
875            CALL dbcsr_work_create(matrix, n=nwms)
876         ENDIF
877!$OMP        END MASTER
878!$OMP        BARRIER
879         !
880         ! Ensure index arrays at least exist.
881!$OMP        DO SCHEDULE (STATIC, 1)
882         DO i = 1, SIZE(matrix%wms)
883            IF (.NOT. ASSOCIATED(matrix%wms(i)%row_i)) THEN
884               CALL ensure_array_size(matrix%wms(i)%row_i, ub=0)
885            ENDIF
886            IF (.NOT. ASSOCIATED(matrix%wms(i)%col_i)) THEN
887               CALL ensure_array_size(matrix%wms(i)%col_i, ub=0)
888            ENDIF
889            IF (.NOT. ASSOCIATED(matrix%wms(i)%blk_p)) THEN
890               CALL ensure_array_size(matrix%wms(i)%blk_p, ub=0)
891            ENDIF
892         ENDDO
893!$OMP        ENDDO
894         !
895         ! Check for deleted blocks
896!$OMP        MASTER
897         nblks = matrix%row_p(matrix%nblkrows_total + 1)
898         IF (ANY(matrix%blk_p(1:nblks) .EQ. 0)) THEN
899            CALL dbcsr_index_prune_deleted(matrix)
900         ENDIF
901         old_row_p => matrix%row_p
902         old_col_i => matrix%col_i
903         old_blk_p => matrix%blk_p
904!$OMP        END MASTER
905         !
906!$OMP        BARRIER
907         ! Check to see if we will need to create a parallel environment
908         ! (needed when there are multiple work matrices but we are not
909         ! in an OpenMP parallel section.)
910         !
911         ! A parallel section is created is used when the matrix has
912         ! more work matrices. It's a shortcut when the finalize is
913         ! called from a non-parallel environment whereas the matrix was
914         ! built/modified in a parallel environment
915         nwms = SIZE(matrix%wms)
916         spawn = .FALSE.
917!$       IF (.NOT. OMP_IN_PARALLEL()) THEN
918!$          IF (nwms .GT. 1) spawn = .TRUE.
919!$       ENDIF
920         IF (spawn) THEN
921!$OMP           PARALLEL IF (spawn) &
922!$OMP                    DEFAULT (NONE) &
923!$OMP                    SHARED (matrix, old_row_p, old_col_i, old_blk_p,&
924!$OMP                            start_offset, sort_data)
925            CALL dbcsr_merge_all(matrix, &
926                                 old_row_p, old_col_i, old_blk_p, &
927                                 sort_data=sort_data)
928!$OMP           END PARALLEL
929         ELSE
930            CALL dbcsr_merge_all(matrix, &
931                                 old_row_p, old_col_i, old_blk_p, &
932                                 sort_data=sort_data)
933         ENDIF
934      ENDIF
935!$OMP BARRIER
936!$OMP MASTER
937      ! Clean up.
938      IF (ASSOCIATED(matrix%wms)) THEN
939         CALL dbcsr_work_destroy_all(matrix)
940      ENDIF
941      matrix%valid = .TRUE.
942!$OMP END MASTER
943!$OMP BARRIER
944      IF (dbg) THEN
945!$OMP        SINGLE
946         CALL dbcsr_verify_matrix(matrix)
947!$OMP        END SINGLE
948      ENDIF
949!$OMP MASTER
950      IF (fake_row_p) THEN
951         DEALLOCATE (empty_row_p)
952      ENDIF
953!$OMP END MASTER
954!$OMP BARRIER
955      CALL timestop(handle)
956   END SUBROUTINE dbcsr_finalize
957
958   SUBROUTINE dbcsr_special_finalize(matrix, reshuffle)
959      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
960      LOGICAL, INTENT(IN), OPTIONAL                      :: reshuffle
961
962      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_special_finalize'
963      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
964
965      INTEGER                                            :: handle
966      LOGICAL                                            :: can_quick, sort_data
967
968!   ---------------------------------------------------------------------------
969
970      CALL timeset(routineN, handle)
971
972      IF (matrix%nblks .NE. 0) &
973         DBCSR_ABORT("Optimized finalize requires empty matrix.")
974      IF (dbcsr_valid_index(matrix)) &
975         DBCSR_ABORT("Optimized finalize requires invalid matrix.")
976      IF (.NOT. ASSOCIATED(matrix%wms)) &
977         DBCSR_ABORT("Optimized finalize requires work matrices exist.")
978      IF (SIZE(matrix%wms) .NE. 1) &
979         DBCSR_ABORT("Optimized finalize requires single work matrix")
980
981      !
982      ! If possible, data copying is avoided.
983      IF (PRESENT(reshuffle)) THEN
984         sort_data = reshuffle
985      ELSE
986         sort_data = .FALSE.
987      ENDIF
988!$OMP     BARRIER
989      can_quick = can_quickly_finalize(matrix)
990!$OMP     BARRIER
991
992      ! If the matrix, work matrices, and environment fit several
993      ! criteria, then a quick O(1) finalization is performed.
994      IF (can_quick .AND. .NOT. sort_data) THEN
995         CALL quick_finalize(matrix)
996      ELSE
997         IF (.NOT. sort_data) &
998            DBCSR_ABORT("merge_single_wm only supports data sorting")
999         !
1000         ! Ensure needed index arrays at least exist.
1001!$OMP        MASTER
1002         !
1003         IF (.NOT. ASSOCIATED(matrix%wms(1)%row_i)) THEN
1004            CALL ensure_array_size(matrix%wms(1)%row_i, ub=0)
1005         ENDIF
1006         IF (.NOT. ASSOCIATED(matrix%wms(1)%col_i)) THEN
1007            CALL ensure_array_size(matrix%wms(1)%col_i, ub=0)
1008         ENDIF
1009         IF (.NOT. ASSOCIATED(matrix%wms(1)%blk_p)) THEN
1010            CALL ensure_array_size(matrix%wms(1)%blk_p, ub=0)
1011         ENDIF
1012         !
1013!$OMP        END MASTER
1014!$OMP        BARRIER
1015         !
1016!$OMP        PARALLEL DEFAULT (NONE), SHARED(matrix)
1017         CALL dbcsr_merge_single_wm(matrix)
1018!$OMP        END PARALLEL
1019
1020      ENDIF
1021
1022!$OMP     MASTER
1023      ! Clean up.
1024      IF (ASSOCIATED(matrix%wms)) THEN
1025         CALL dbcsr_work_destroy_all(matrix)
1026      ENDIF
1027      matrix%valid = .TRUE.
1028!$OMP     END MASTER
1029!$OMP     BARRIER
1030      IF (dbg) THEN
1031!$OMP        SINGLE
1032         CALL dbcsr_verify_matrix(matrix)
1033!$OMP        END SINGLE
1034      ENDIF
1035!$OMP     BARRIER
1036      CALL timestop(handle)
1037   END SUBROUTINE dbcsr_special_finalize
1038
1039   FUNCTION can_quickly_finalize(matrix) RESULT(quick)
1040      !! Checks whether the matrix can be finalized with minimal copying.
1041
1042      TYPE(dbcsr_type), INTENT(IN)                       :: matrix
1043         !! matrix to check
1044      LOGICAL                                            :: quick
1045         !! whether matrix can be quickly finalized
1046
1047!   ---------------------------------------------------------------------------
1048
1049      IF (ASSOCIATED(matrix%wms)) THEN
1050         quick = matrix%nblks .EQ. 0
1051         quick = quick .AND. SIZE(matrix%wms) .EQ. 1 .AND. &
1052                 .NOT. dbcsr_wm_use_mutable(matrix%wms(1))
1053         IF (quick) THEN
1054            quick = quick .AND. &
1055                    dbcsr_memtype_equal( &
1056                    dbcsr_data_get_memory_type(matrix%wms(1)%data_area), &
1057                    dbcsr_data_get_memory_type(matrix%data_area))
1058            quick = quick .AND. &
1059                    ASSOCIATED(matrix%wms(1)%row_i)
1060            quick = quick .AND. &
1061                    (matrix%wms(1)%datasize_after_filtering .LT. 0 .OR. &
1062                     matrix%wms(1)%datasize .EQ. matrix%wms(1)%datasize_after_filtering)
1063         ENDIF
1064      ELSE
1065         quick = .FALSE.
1066      ENDIF
1067   END FUNCTION can_quickly_finalize
1068
1069   SUBROUTINE quick_finalize(matrix)
1070      !! Performs quick finalization of matrix
1071      !! The data area from the work matrix is accepted as the new matrix's data
1072      !! area and the index is built from the work matrix.
1073
1074      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1075         !! matrix to finalize
1076
1077      CHARACTER(len=*), PARAMETER :: routineN = 'quick_finalize'
1078      INTEGER                                            :: error_handle, nblks, nrows
1079
1080!   ---------------------------------------------------------------------------
1081
1082      CALL timeset(routineN, error_handle)
1083!$OMP     SECTIONS
1084!$OMP     SECTION
1085      nblks = matrix%wms(1)%lastblk
1086      nrows = matrix%nblkrows_total
1087      CALL dbcsr_sort_indices(nblks, &
1088                              matrix%wms(1)%row_i, &
1089                              matrix%wms(1)%col_i, &
1090                              matrix%wms(1)%blk_p)
1091      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
1092      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
1093      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
1094      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
1095                                   reservation=nrows + 1, extra=2*nblks)
1096      CALL dbcsr_make_dbcsr_index(matrix%row_p, matrix%wms(1)%row_i, &
1097                                  nrows, nblks)
1098      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
1099                                   DATA=matrix%wms(1)%col_i(1:nblks))
1100      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1101                                   DATA=matrix%wms(1)%blk_p(1:nblks))
1102      matrix%nblks = nblks
1103      matrix%nze = matrix%wms(1)%datasize
1104      matrix%index(dbcsr_slot_nblks) = nblks
1105      matrix%index(dbcsr_slot_nze) = matrix%wms(1)%datasize
1106      CALL dbcsr_repoint_index(matrix)
1107!$OMP     SECTION
1108      CALL dbcsr_switch_data_area(matrix, matrix%wms(1)%data_area)
1109!$OMP     END SECTIONS
1110      CALL timestop(error_handle)
1111   END SUBROUTINE quick_finalize
1112
1113   SUBROUTINE dbcsr_add_wm_from_matrix(matrix, limits)
1114      !! Creates a work matrix from the data present in a finalized matrix.
1115
1116      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1117         !! DBCSR matrix
1118      INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL        :: limits
1119         !! the limits to use for copying
1120
1121      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_add_wm_from_matrix'
1122
1123      INTEGER                                            :: handle, ithread, nthreads, nwms, &
1124                                                            old_nwms, size_used
1125      LOGICAL                                            :: preexists
1126
1127!   ---------------------------------------------------------------------------
1128
1129      CALL timeset(routineN, handle)
1130!$OMP     BARRIER
1131      preexists = ASSOCIATED(matrix%wms)
1132      IF (preexists) THEN
1133         old_nwms = SIZE(matrix%wms)
1134         IF (old_nwms .EQ. 0) &
1135            DBCSR_WARN("Nonexisting work matrices?!")
1136      ELSE
1137         old_nwms = 0
1138      ENDIF
1139      nthreads = 1; ithread = 0
1140!$    nthreads = OMP_GET_NUM_THREADS(); ithread = OMP_GET_THREAD_NUM()
1141      IF (nthreads .GT. 1) THEN
1142         IF (old_nwms /= nthreads .AND. old_nwms /= 0) &
1143            DBCSR_ABORT("Number of work matrices and threads do not match")
1144      ENDIF
1145      nwms = MAX(1, old_nwms)
1146!$    nwms = MAX(nwms, nthreads)
1147!$OMP     BARRIER
1148!$OMP     MASTER
1149      IF (.NOT. ASSOCIATED(matrix%wms)) THEN
1150         CALL dbcsr_work_create(matrix, &
1151                                INT(matrix%nblks*default_resize_factor/nwms), &
1152                                INT(matrix%nze*default_resize_factor/nwms), &
1153                                n=nwms, work_mutable=.FALSE.)
1154      ENDIF
1155!$OMP     END MASTER
1156!$OMP     BARRIER
1157      size_used = matrix%nze
1158      CALL dbcsr_fill_wm_from_matrix(matrix%wms, matrix, size_used, &
1159                                     limits=limits)
1160!$OMP     BARRIER
1161      CALL timestop(handle)
1162   END SUBROUTINE dbcsr_add_wm_from_matrix
1163
1164   SUBROUTINE dbcsr_fill_wm_from_matrix(wm, matrix, size_used, limits)
1165      !! Fills index and data of the work matrix from the
1166      !! previously-finalized one.
1167      !!
1168      !! limits
1169      !! The limits is a 4-tuple
1170      !! (lower_row, higher_row, lower_column, higher_column).
1171
1172      TYPE(dbcsr_work_type), DIMENSION(:), INTENT(INOUT) :: wm
1173         !! the work matrix to fill
1174      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1175         !! DBCSR matrix
1176      INTEGER, INTENT(IN)                                :: size_used
1177      INTEGER, DIMENSION(4), INTENT(IN), OPTIONAL        :: limits
1178         !! only fills blocks within this range
1179
1180      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_fill_wm_from_matrix'
1181
1182      INTEGER                                            :: blk, blk_p, col, handle, ithread, &
1183                                                            nthreads, nwms, nze, row, wblk_p, &
1184                                                            which_wm, wm_first, wm_last
1185      LOGICAL                                            :: careful, limit, mt, tr
1186      LOGICAL, SAVE                                      :: mutable
1187      TYPE(dbcsr_data_obj)                               :: data_block
1188      TYPE(dbcsr_iterator)                               :: iter
1189
1190!   ---------------------------------------------------------------------------
1191
1192      CALL timeset(routineN, handle)
1193      nwms = SIZE(matrix%wms)
1194      mt = .FALSE.
1195!$    IF (nwms .GT. 1) mt = omp_get_num_threads() .GT. 1
1196      ithread = 0; nthreads = 1
1197!$    ithread = omp_get_thread_num()
1198!$    nthreads = omp_get_num_threads()
1199      limit = PRESENT(limits)
1200      careful = size_used + size_used/8 &
1201                .LT. dbcsr_data_get_size_referenced(matrix%data_area)
1202      CALL dbcsr_data_init(data_block)
1203      CALL dbcsr_data_new(data_block, dbcsr_data_get_type(matrix%data_area))
1204      IF (mt) THEN
1205         wm_first = ithread + 1
1206         wm_last = ithread + 1
1207      ELSE
1208         wm_first = 1
1209         wm_last = nwms
1210      ENDIF
1211      ! Prepares the work matrices to accept the main data.
1212!$OMP     MASTER
1213      mutable = .FALSE.
1214      DO which_wm = 1, nwms
1215         mutable = mutable .OR. dbcsr_wm_use_mutable(wm(which_wm))
1216      ENDDO
1217!$OMP     END MASTER
1218!$OMP     BARRIER
1219      DO which_wm = wm_first, wm_last
1220         IF (dbcsr_wm_use_mutable(wm(which_wm))) &
1221            DBCSR_ABORT("Adding main matrix into mutable not supported.")
1222         IF (mutable) THEN
1223            IF (.NOT. dbcsr_mutable_instantiated(wm(which_wm)%mutable)) THEN
1224               CALL dbcsr_mutable_new(wm(which_wm)%mutable, matrix%data_type)
1225            ENDIF
1226         ELSE
1227            ! We don't know how much data we'll get so we have to be generous.
1228            CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, &
1229                                        size_used/nwms)
1230            CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, 0)
1231         ENDIF
1232      ENDDO
1233      ! Now copy the data
1234      CALL dbcsr_iterator_start(iter, matrix, shared=mt, &
1235                                contiguous_pointers=.FALSE., read_only=.TRUE.)
1236      DO WHILE (dbcsr_iterator_blocks_left(iter))
1237         CALL dbcsr_iterator_next_block(iter, row, col, data_block, &
1238                                        transposed=tr, block_number=blk)
1239         IF (limit) THEN
1240            IF (.NOT. within_limits(row, col, limits)) CYCLE
1241         ENDIF
1242         blk_p = matrix%blk_p(blk)
1243         which_wm = ithread + 1
1244         wblk_p = SIGN(wm(which_wm)%datasize + 1, blk_p)
1245         nze = dbcsr_data_get_size(data_block)
1246         IF (mt .OR. limit .OR. careful .OR. mutable) THEN
1247            ! The data gets copied block by block so the block pointers
1248            ! are ordered accordingly.
1249            IF (.NOT. mutable) THEN
1250               CALL add_work_coordinate(wm(which_wm), row, col, wblk_p)
1251               CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, &
1252                                           ABS(wblk_p) + nze - 1, factor=default_resize_factor)
1253               CALL dbcsr_data_set_size_referenced(wm(which_wm)%data_area, &
1254                                                   ABS(wblk_p) + nze - 1)
1255               CALL dbcsr_data_set(wm(which_wm)%data_area, &
1256                                   lb=ABS(wblk_p), &
1257                                   data_size=nze, &
1258                                   src=data_block, source_lb=1)
1259            ENDIF
1260         ELSE
1261            ! The data gets copied all at once so the block pointers
1262            ! should remain the same as they were.
1263            CALL add_work_coordinate(wm(which_wm), row, col, blk_p)
1264         ENDIF
1265         IF (.NOT. mutable) &
1266            wm(which_wm)%datasize = wm(which_wm)%datasize + nze
1267      ENDDO
1268      CALL dbcsr_iterator_stop(iter)
1269      CALL dbcsr_data_clear_pointer(data_block)
1270      CALL dbcsr_data_release(data_block)
1271      ! Copy all blocks at once
1272      IF (.NOT. mt .AND. .NOT. limit .AND. .NOT. careful .AND. .NOT. mutable) THEN
1273         DO which_wm = 1, nwms
1274            CALL dbcsr_data_ensure_size(wm(which_wm)%data_area, &
1275                                        dbcsr_data_get_size_referenced(matrix%data_area))
1276            CALL dbcsr_data_copyall(wm(which_wm)%data_area, matrix%data_area)
1277            wm(which_wm)%datasize = size_used
1278         ENDDO
1279      ENDIF
1280      CALL timestop(handle)
1281   END SUBROUTINE dbcsr_fill_wm_from_matrix
1282
1283   PURE FUNCTION within_limits(row, column, limits)
1284      !! Checks whether a point is within bounds
1285      !! \return whether the point is within the bounds
1286
1287      INTEGER, INTENT(IN)                                :: row, column
1288         !! point to check
1289         !! point to check
1290      INTEGER, DIMENSION(4), INTENT(IN)                  :: limits
1291         !! limits (low_row, high_row, low_col, high_col)
1292      LOGICAL                                            :: within_limits
1293
1294      within_limits = row .GE. limits(1) .AND. row .LE. limits(2) .AND. &
1295                      column .GE. limits(3) .AND. column .LE. limits(4)
1296   END FUNCTION within_limits
1297
1298   SUBROUTINE dbcsr_work_destroy(wm)
1299      !! Deallocates and destroys a work matrix.
1300
1301      TYPE(dbcsr_work_type), INTENT(INOUT)               :: wm
1302         !! work matrix
1303
1304      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy'
1305
1306      INTEGER                                            :: handle
1307
1308!   ---------------------------------------------------------------------------
1309
1310      IF (careful_mod) CALL timeset(routineN, handle)
1311
1312      IF (ASSOCIATED(wm%row_i)) THEN
1313         DEALLOCATE (wm%row_i)
1314      ENDIF
1315      IF (ASSOCIATED(wm%col_i)) THEN
1316         DEALLOCATE (wm%col_i)
1317      ENDIF
1318      IF (ASSOCIATED(wm%blk_p)) THEN
1319         DEALLOCATE (wm%blk_p)
1320      ENDIF
1321      CALL dbcsr_data_release(wm%data_area)
1322      CALL dbcsr_mutable_destroy(wm%mutable)
1323
1324      IF (careful_mod) CALL timestop(handle)
1325
1326   END SUBROUTINE dbcsr_work_destroy
1327
1328   SUBROUTINE dbcsr_work_destroy_all(m)
1329      !! Deallocates and destroys a work matrix.
1330      TYPE(dbcsr_type), INTENT(INOUT)                    :: m
1331
1332      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_work_destroy_all'
1333
1334      INTEGER                                            :: handle, i
1335
1336!   ---------------------------------------------------------------------------
1337
1338      CALL timeset(routineN, handle)
1339
1340      IF (.NOT. ASSOCIATED(m%wms)) &
1341         DBCSR_WARN("Want to destroy nonexisting work matrices.")
1342      IF (ASSOCIATED(m%wms)) THEN
1343         DO i = 1, SIZE(m%wms)
1344            CALL dbcsr_work_destroy(m%wms(i))
1345         ENDDO
1346         DEALLOCATE (m%wms)
1347      ENDIF
1348
1349      CALL timestop(handle)
1350
1351   END SUBROUTINE dbcsr_work_destroy_all
1352
1353   SUBROUTINE add_work_coordinate(matrix, row, col, blk, index)
1354      !! Adds a coordinate (or other data) into a work matrix's row_i and
1355      !! col_i arrays and returns its position.
1356      !! @note  Uses the matrix%lastblk to keep track of the current position.
1357
1358      TYPE(dbcsr_work_type), INTENT(INOUT)               :: matrix
1359         !! work matrix
1360      INTEGER, INTENT(IN)                                :: row, col
1361         !! row data to add
1362         !! col data to add
1363      INTEGER, INTENT(IN), OPTIONAL                      :: blk
1364         !! block pointer to add
1365      INTEGER, INTENT(OUT), OPTIONAL                     :: index
1366         !! saved position
1367
1368      CHARACTER(len=*), PARAMETER :: routineN = 'add_work_coordinate'
1369
1370      INTEGER                                            :: handle
1371
1372!   ---------------------------------------------------------------------------
1373
1374      IF (careful_mod) CALL timeset(routineN, handle)
1375      matrix%lastblk = matrix%lastblk + 1
1376      CALL ensure_array_size(matrix%row_i, ub=matrix%lastblk, &
1377                             factor=default_resize_factor)
1378      CALL ensure_array_size(matrix%col_i, ub=matrix%lastblk, &
1379                             factor=default_resize_factor)
1380      matrix%row_i(matrix%lastblk) = row
1381      matrix%col_i(matrix%lastblk) = col
1382      IF (PRESENT(blk)) THEN
1383         CALL ensure_array_size(matrix%blk_p, ub=matrix%lastblk, &
1384                                factor=default_resize_factor)
1385         matrix%blk_p(matrix%lastblk) = blk
1386      ENDIF
1387      IF (PRESENT(index)) index = matrix%lastblk
1388      IF (careful_mod) CALL timestop(handle)
1389   END SUBROUTINE add_work_coordinate
1390
1391   SUBROUTINE dbcsr_merge_all(matrix, old_row_p, old_col_i, old_blk_p, &
1392                              sort_data)
1393      !! Merge data from matrix and work matrices into the final matrix.
1394
1395      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1396         !! matrix to work on
1397      INTEGER, DIMENSION(*), INTENT(IN)                  :: old_row_p, old_col_i, old_blk_p
1398      LOGICAL, INTENT(IN)                                :: sort_data
1399         !! whether data will be fully sorted
1400
1401      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_all'
1402      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
1403
1404      INTEGER                                            :: handle, my_row_count, nblks, &
1405                                                            nrows, nwms, row, t, &
1406                                                            which_wm, wm_datasize
1407      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE           :: all_data_offsets, all_data_sizes, &
1408                                                            new_blk_p_sorted, new_blk_sizes, &
1409                                                            new_row_p
1410      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET   :: blk_d, new_blk_p, new_col_i
1411      INTEGER, DIMENSION(:), CONTIGUOUS, POINTER         :: my_row_p
1412      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE   :: cbs, rbs
1413      INTEGER, SAVE                                      :: max_row_count, new_nblks = 0, new_nze = 0
1414      TYPE(dbcsr_data_obj), ALLOCATABLE, DIMENSION(:), &
1415         SAVE                                            :: all_data_areas
1416      TYPE(dbcsr_work_type), POINTER                     :: wm
1417      TYPE(i_array_p), DIMENSION(:), POINTER, SAVE       :: all_blk_p, all_col_i, all_row_p
1418
1419!   ---------------------------------------------------------------------------
1420
1421      CALL timeset(routineN, handle)
1422
1423      ! Outline:
1424      ! Each thread has a work matrix.  These must be merged and made
1425      ! into a new index.  If sort_data is False, then the data areas
1426      ! are simply appended.  This is probably quicker but the data
1427      ! blocks are not in order and the data size may expand beyond
1428      ! what is needed.  If sort_data is True, then data blocks are
1429      ! sorted in order.
1430
1431      IF (dbg) WRITE (*, *) routineN//" starting, sort?", sort_data
1432!$OMP     BARRIER
1433      nrows = matrix%nblkrows_total
1434      nwms = SIZE(matrix%wms)
1435!$    IF (nwms /= OMP_GET_NUM_THREADS()) &
1436!$       DBCSR_ABORT("Number of threads does not match number of work matrices.")
1437      which_wm = 1
1438!$    which_wm = OMP_GET_THREAD_NUM() + 1
1439      wm => matrix%wms(which_wm)
1440      !
1441      ! Currently B-tree-based work matrices are converted to array form.
1442      IF (dbcsr_wm_use_mutable(wm)) THEN
1443         SELECT CASE (wm%mutable%m%data_type)
1444         CASE (dbcsr_type_real_4)
1445            CALL tree_to_linear_s(wm)
1446         CASE (dbcsr_type_real_8)
1447            CALL tree_to_linear_d(wm)
1448         CASE (dbcsr_type_complex_4)
1449            CALL tree_to_linear_c(wm)
1450         CASE (dbcsr_type_complex_8)
1451            CALL tree_to_linear_z(wm)
1452         CASE default
1453            DBCSR_ABORT("Invalid data type")
1454         END SELECT
1455      ENDIF
1456      !
1457      ! Initializations and some counts from the threads are summed.
1458      !
1459!$OMP     MASTER
1460      new_nblks = old_row_p(nrows + 1)
1461      new_nze = matrix%nze
1462      ALLOCATE (all_row_p(nwms))
1463      ALLOCATE (all_col_i(nwms))
1464      ALLOCATE (all_blk_p(nwms))
1465      ALLOCATE (all_data_sizes(0:nwms))
1466      ALLOCATE (all_data_offsets(nwms))
1467      IF (sort_data) ALLOCATE (all_data_areas(0:nwms))
1468      IF (sort_data) THEN
1469         CALL dbcsr_data_init(all_data_areas(0))
1470         all_data_areas(0) = matrix%data_area
1471!$OMP        CRITICAL (crit_data)
1472         CALL dbcsr_data_hold(all_data_areas(0))
1473!$OMP        END CRITICAL (crit_data)
1474      ENDIF
1475      ! The following is valid because the data actually referenced
1476      ! by blocks is explicitly calculated in dbcsr_finalize.
1477      all_data_sizes(0) = matrix%nze
1478      rbs => array_data(matrix%row_blk_size)
1479      cbs => array_data(matrix%col_blk_size)
1480!$OMP     END MASTER
1481      !
1482!$OMP     BARRIER
1483      IF (sort_data .AND. wm%datasize_after_filtering .GE. 0) THEN
1484         wm_datasize = wm%datasize_after_filtering
1485      ELSE
1486         wm_datasize = wm%datasize
1487      ENDIF
1488      nblks = wm%lastblk
1489!$OMP     ATOMIC
1490      new_nblks = new_nblks + nblks
1491!$OMP     ATOMIC
1492      new_nze = new_nze + wm_datasize
1493!$OMP     BARRIER
1494      !
1495!$OMP     MASTER
1496      ALLOCATE (new_row_p(nrows + 1))
1497      ALLOCATE (new_col_i(new_nblks))
1498      ALLOCATE (new_blk_p(new_nblks))
1499      IF (sort_data) THEN
1500         ALLOCATE (blk_d(new_nblks))
1501      ELSE
1502         ALLOCATE (blk_d(1))
1503      ENDIF
1504!$OMP     END MASTER
1505      !
1506      ! Each thread creates a row_p index for its new blocks.  This gives the
1507      ! number of new blocks per thread per row.
1508      CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p)
1509      ALLOCATE (my_row_p(nrows + 1))
1510      CALL dbcsr_make_dbcsr_index(my_row_p, wm%row_i, nrows, nblks)
1511      !
1512      ! The threads must share their row_p, col_i, blk_p, and data areas
1513      ! among themselves.
1514      all_row_p(which_wm)%p => my_row_p
1515      all_data_sizes(which_wm) = wm_datasize
1516      IF (.NOT. sort_data) THEN
1517         IF (wm_datasize > dbcsr_data_get_size_referenced(wm%data_area)) &
1518            DBCSR_ABORT("Data size mismatch.")
1519      ENDIF
1520      all_col_i(which_wm)%p => wm%col_i
1521      all_blk_p(which_wm)%p => wm%blk_p
1522!$OMP     BARRIER
1523      IF (sort_data) THEN
1524!$OMP        MASTER
1525         all_data_offsets(:) = 0
1526!$OMP        END MASTER
1527         CALL dbcsr_data_init(all_data_areas(which_wm))
1528         all_data_areas(which_wm) = wm%data_area
1529!$OMP        CRITICAL (crit_data)
1530         CALL dbcsr_data_hold(all_data_areas(which_wm))
1531!$OMP        END CRITICAL (crit_data)
1532      ELSE
1533!$OMP        MASTER
1534         all_data_offsets(1) = all_data_sizes(0)
1535         DO t = 2, nwms
1536            all_data_offsets(t) = all_data_offsets(t - 1) + all_data_sizes(t - 1)
1537         ENDDO
1538!$OMP        END MASTER
1539      ENDIF
1540      !
1541      ! The new row counts are created, then the new row_p index is created.
1542      !
1543!$OMP     DO
1544      DO row = 1, nrows
1545         my_row_count = old_row_p(row + 1) - old_row_p(row)
1546         DO t = 1, nwms
1547            my_row_count = my_row_count &
1548                 &       + all_row_p(t)%p(row + 1) - all_row_p(t)%p(row)
1549         ENDDO
1550         new_row_p(row) = my_row_count
1551      ENDDO
1552!$OMP     END DO
1553!$OMP     MASTER
1554      max_row_count = MAXVAL(new_row_p(1:nrows))
1555      CALL dbcsr_build_row_index(new_row_p, nrows)
1556!$OMP     END MASTER
1557!$OMP     BARRIER
1558      !
1559      ! The new index is built
1560      CALL merge_index(new_row_p, new_col_i, new_blk_p, &
1561                       blk_d, &
1562                       old_row_p, old_col_i, old_blk_p, &
1563                       all_row_p, all_col_i, all_blk_p, &
1564                       all_data_offsets, nwms, nrows, max_row_count, sort_data)
1565      !
1566      ! The data is then merged in one of two ways.
1567      !
1568!$OMP     MASTER
1569      IF (sort_data) THEN
1570         ! The matrix gets a fresh data area.  Blocks from the work
1571         ! matrices will be copied into it in order.
1572         !
1573!$OMP        CRITICAL (crit_data)
1574         CALL dbcsr_data_release(matrix%data_area)
1575         CALL dbcsr_data_init(matrix%data_area)
1576!$OMP        END CRITICAL (crit_data)
1577         CALL dbcsr_data_new(matrix%data_area, &
1578                             data_size=new_nze, &
1579                             data_type=dbcsr_data_get_type(all_data_areas(0)), &
1580                             memory_type=dbcsr_data_get_memory_type(all_data_areas(0)))
1581         ALLOCATE (new_blk_p_sorted(new_nblks))
1582         ALLOCATE (new_blk_sizes(new_nblks))
1583      ELSE
1584         ! Data stored in the work matrices will be just appended to the
1585         ! current data area.
1586         CALL dbcsr_data_ensure_size(matrix%data_area, new_nze)
1587      ENDIF
1588!$OMP     END MASTER
1589!$OMP     BARRIER
1590      IF (sort_data) THEN
1591         CALL dbcsr_calc_block_sizes(new_blk_sizes, &
1592                                     new_row_p, new_col_i, rbs, cbs)
1593         CALL dbcsr_sort_data(new_blk_p_sorted, new_blk_p, &
1594                              new_blk_sizes, dsts=matrix%data_area, &
1595                              src=all_data_areas(0), &
1596                              srcs=all_data_areas, old_blk_d=blk_d)
1597      ELSE
1598         IF (all_data_sizes(which_wm) .GT. 0) THEN
1599            CALL dbcsr_data_copy(dst=matrix%data_area, &
1600                                 dst_lb=(/all_data_offsets(which_wm) + 1/), &
1601                                 dst_sizes=(/all_data_sizes(which_wm)/), &
1602                                 src=wm%data_area, &
1603                                 src_lb=(/1/), &
1604                                 src_sizes=(/all_data_sizes(which_wm)/))
1605         ENDIF
1606      ENDIF
1607      !
1608      ! Creates a new index array.
1609      !
1610!$OMP     BARRIER
1611!$OMP     MASTER
1612      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
1613      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
1614      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
1615      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
1616                                   DATA=new_row_p(1:nrows + 1), extra=new_nblks*2)
1617      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
1618                                   DATA=new_col_i(1:new_nblks))
1619      IF (sort_data) THEN
1620         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1621                                      DATA=new_blk_p_sorted)
1622      ELSE
1623         CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1624                                      DATA=new_blk_p)
1625      ENDIF
1626      matrix%nblks = new_nblks
1627      matrix%nze = new_nze
1628      matrix%index(dbcsr_slot_nblks) = matrix%nblks
1629      matrix%index(dbcsr_slot_nze) = matrix%nze
1630      CALL dbcsr_repoint_index(matrix)
1631!$OMP     END MASTER
1632      !
1633!$OMP     BARRIER
1634      !
1635      DEALLOCATE (my_row_p)
1636      IF (sort_data) THEN
1637!$OMP        MASTER
1638!$OMP        CRITICAL (crit_data)
1639         CALL dbcsr_data_release(all_data_areas(0))
1640!$OMP        END CRITICAL (crit_data)
1641         DO which_wm = 1, nwms
1642!$OMP           CRITICAL (crit_data)
1643            CALL dbcsr_data_release(all_data_areas(which_wm))
1644!$OMP           END CRITICAL (crit_data)
1645         ENDDO
1646!$OMP        END MASTER
1647      ENDIF
1648!$OMP     BARRIER
1649!$OMP     MASTER
1650      DEALLOCATE (all_row_p)
1651      DEALLOCATE (all_col_i)
1652      DEALLOCATE (all_blk_p)
1653      DEALLOCATE (new_row_p)
1654      DEALLOCATE (new_col_i)
1655      DEALLOCATE (new_blk_p)
1656      DEALLOCATE (all_data_sizes)
1657      DEALLOCATE (all_data_offsets)
1658      IF (sort_data) THEN
1659         DEALLOCATE (all_data_areas)
1660         DEALLOCATE (new_blk_sizes)
1661         DEALLOCATE (new_blk_p_sorted)
1662      ENDIF
1663      DEALLOCATE (blk_d)
1664!$OMP     END MASTER
1665!$OMP     BARRIER
1666      IF (dbg) WRITE (*, *) routineN//" stopped"
1667      CALL timestop(handle)
1668   END SUBROUTINE dbcsr_merge_all
1669
1670   SUBROUTINE dbcsr_merge_single_wm(matrix)
1671      !! Sort data from the WM into the final matrix, based closely on dbcsr_merge_all
1672
1673      TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
1674         !! matrix to work on
1675
1676      CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_merge_single_wm'
1677      LOGICAL, PARAMETER                                 :: dbg = .FALSE.
1678
1679      INTEGER                                            :: handle, nblks, nrows
1680      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE           :: new_blk_p_sorted, new_blk_sizes, &
1681                                                            new_row_p
1682      INTEGER, ALLOCATABLE, DIMENSION(:), SAVE, TARGET   :: blk_d
1683      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS, SAVE   :: cbs, rbs
1684      TYPE(dbcsr_work_type), POINTER                     :: wm
1685
1686!   ---------------------------------------------------------------------------
1687
1688      CALL timeset(routineN, handle)
1689
1690      ! Outline:
1691      ! There is a single work matrix.  Data blocks are sorted and copied
1692      ! into the matrix data area (which is empty).  The index is made consistent
1693
1694      nrows = matrix%nblkrows_total
1695      wm => matrix%wms(1)
1696      nblks = wm%lastblk
1697      IF (dbcsr_wm_use_mutable(wm)) &
1698         DBCSR_ABORT("Number of threads does not match number of work matrices.")
1699      !
1700      ! The following is valid because the data actually referenced
1701      ! by blocks is explicitly calculated in dbcsr_finalize.
1702!$OMP     MASTER
1703      rbs => array_data(matrix%row_blk_size)
1704      cbs => array_data(matrix%col_blk_size)
1705      !
1706      ! Initializations
1707      !
1708      ALLOCATE (new_row_p(nrows + 1))
1709      ALLOCATE (blk_d(nblks))
1710      ALLOCATE (new_blk_p_sorted(nblks))
1711      ALLOCATE (new_blk_sizes(nblks))
1712      !
1713      ! Master thread creates a row_p index for the (sorted) blocks.
1714      CALL dbcsr_sort_indices(nblks, wm%row_i, wm%col_i, wm%blk_p)
1715      CALL dbcsr_make_dbcsr_index(new_row_p, wm%row_i, nrows, nblks)
1716      !
1717      !
1718      ! The matrix data area is resized.  Blocks from the work
1719      ! matrices will be copied into it in order.
1720      !
1721      CALL dbcsr_data_ensure_size(matrix%data_area, wm%datasize, nocopy=.TRUE.)
1722!$OMP     END MASTER
1723!$OMP     BARRIER
1724      CALL dbcsr_calc_block_sizes(new_blk_sizes, &
1725                                  new_row_p, wm%col_i, rbs, cbs)
1726      CALL dbcsr_sort_data(new_blk_p_sorted, wm%blk_p, &
1727                           new_blk_sizes, dsts=matrix%data_area, &
1728                           src=wm%data_area)
1729      !
1730      ! Creates a new index array.
1731      !
1732!$OMP     BARRIER
1733!$OMP     MASTER
1734      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_row_p)
1735      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_col_i)
1736      CALL dbcsr_clearfrom_index_array(matrix, dbcsr_slot_blk_p)
1737      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_row_p, &
1738                                   DATA=new_row_p(1:nrows + 1), extra=nblks*2)
1739      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_col_i, &
1740                                   DATA=wm%col_i(1:nblks))
1741      CALL dbcsr_addto_index_array(matrix, dbcsr_slot_blk_p, &
1742                                   DATA=new_blk_p_sorted)
1743      matrix%nblks = nblks
1744      matrix%nze = wm%datasize
1745      matrix%index(dbcsr_slot_nblks) = matrix%nblks
1746      matrix%index(dbcsr_slot_nze) = matrix%nze
1747      CALL dbcsr_repoint_index(matrix)
1748      DEALLOCATE (new_row_p)
1749      DEALLOCATE (new_blk_sizes)
1750      DEALLOCATE (new_blk_p_sorted)
1751      DEALLOCATE (blk_d)
1752!$OMP     END MASTER
1753      IF (dbg) WRITE (*, *) routineN//" stopped"
1754      CALL timestop(handle)
1755   END SUBROUTINE dbcsr_merge_single_wm
1756
1757   SUBROUTINE merge_index(new_row_p, new_col_i, new_blk_p, &
1758      !! Builds a new index from several work matrices.
1759                          blk_d, old_row_p, old_col_i, old_blk_p, &
1760                          all_row_p, all_col_i, all_blk_p, &
1761                          all_data_offsets, nwms, nrows, max_row_count, sort_data)
1762      INTEGER, DIMENSION(*), INTENT(IN)                  :: new_row_p
1763      INTEGER, DIMENSION(*), INTENT(OUT), TARGET         :: new_col_i, new_blk_p
1764      INTEGER, DIMENSION(*), INTENT(IN), TARGET          :: blk_d
1765      INTEGER, DIMENSION(*), INTENT(IN)                  :: old_row_p, old_col_i, old_blk_p
1766      TYPE(i_array_p), DIMENSION(*), INTENT(IN)          :: all_row_p, all_col_i, all_blk_p
1767      INTEGER, DIMENSION(*), INTENT(IN)                  :: all_data_offsets
1768      INTEGER, INTENT(IN)                                :: nwms, nrows, max_row_count
1769      LOGICAL, INTENT(IN)                                :: sort_data
1770
1771      CHARACTER(len=*), PARAMETER :: routineN = 'merge_index'
1772      INTEGER                                            :: blk1, blk2, error_handle, first_row_blk, &
1773                                                            last_row_blk, row, src_blk_1, &
1774                                                            src_blk_2, t
1775      INTEGER, ALLOCATABLE, DIMENSION(:)                 :: blk_p_buff, tmp_arr
1776      INTEGER, DIMENSION(:), POINTER, CONTIGUOUS         :: blk_span, col_span, d_span
1777
1778!   ---------------------------------------------------------------------------
1779
1780      CALL timeset(routineN, error_handle)
1781      !
1782      ALLOCATE (tmp_arr(max_row_count))
1783      ALLOCATE (blk_p_buff(max_row_count))
1784      !
1785!$OMP     DO
1786      DO row = 1, nrows
1787         first_row_blk = new_row_p(row) + 1
1788         last_row_blk = new_row_p(row + 1)
1789         col_span => new_col_i(first_row_blk:last_row_blk)
1790         blk_span => new_blk_p(first_row_blk:last_row_blk)
1791         IF (sort_data) d_span => blk_d(first_row_blk:last_row_blk)
1792         src_blk_1 = old_row_p(row) + 1
1793         src_blk_2 = old_row_p(row + 1)
1794         blk1 = 1
1795         blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1
1796         col_span(blk1:blk2) = old_col_i(src_blk_1:src_blk_2)
1797         blk_span(blk1:blk2) = old_blk_p(src_blk_1:src_blk_2)
1798         IF (sort_data) THEN
1799            d_span(blk1:blk2) = 1
1800            DO t = 1, nwms
1801               src_blk_1 = all_row_p(t)%p(row) + 1
1802               src_blk_2 = all_row_p(t)%p(row + 1)
1803               blk1 = blk2 + 1
1804               blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1
1805               col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2)
1806               blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2)
1807               d_span(blk1:blk2) = t + 1
1808            ENDDO
1809         ELSE
1810            DO t = 1, nwms
1811               src_blk_1 = all_row_p(t)%p(row) + 1
1812               src_blk_2 = all_row_p(t)%p(row + 1)
1813               blk1 = blk2 + 1
1814               blk2 = blk1 + (src_blk_2 - src_blk_1 + 1) - 1
1815               col_span(blk1:blk2) = all_col_i(t)%p(src_blk_1:src_blk_2)
1816               blk_span(blk1:blk2) = all_blk_p(t)%p(src_blk_1:src_blk_2) &
1817                    &              + all_data_offsets(t)
1818            ENDDO
1819         ENDIF
1820         CALL sort(col_span, SIZE(col_span), tmp_arr)
1821         blk_p_buff(1:SIZE(blk_span)) = blk_span(:)
1822         blk_span(1:SIZE(blk_span)) = blk_p_buff(tmp_arr(1:SIZE(blk_span)))
1823      ENDDO
1824!$OMP     END DO
1825      !
1826      DEALLOCATE (tmp_arr)
1827      DEALLOCATE (blk_p_buff)
1828      !
1829      CALL timestop(error_handle)
1830   END SUBROUTINE merge_index
1831
1832#:include '../data/dbcsr.fypp'
1833#:for n, nametype1, base1, prec1, kind1, type1, dkind1 in inst_params_float
1834   SUBROUTINE tree_to_linear_${nametype1}$ (wm)
1835     !! Converts mutable data to linear (array) type.
1836
1837      USE dbcsr_btree, &
1838         ONLY: btree_2d_data_${nametype1}$ => btree_data_${nametype1}$p2d, &
1839               btree_destroy_${nametype1}$ => btree_delete, &
1840               btree_size_${nametype1}$ => btree_get_entries
1841      TYPE(dbcsr_work_type), INTENT(INOUT)     :: wm
1842        !! work matrix to convert
1843
1844      CHARACTER(len=*), PARAMETER :: routineN = 'tree_to_linear_${nametype1}$'
1845
1846      INTEGER                                  :: blk, blk_p, treesize, &
1847                                                  error_handler, needed_size
1848      INTEGER(KIND=int_8), ALLOCATABLE, &
1849         DIMENSION(:)                           :: keys
1850      ${type1}$, DIMENSION(:), POINTER, CONTIGUOUS :: target_data
1851      ${type1}$, DIMENSION(:, :), POINTER        :: block_2d
1852      TYPE(btree_2d_data_${nametype1}$), ALLOCATABLE, &
1853         DIMENSION(:)                           :: values
1854
1855!   ---------------------------------------------------------------------------
1856
1857      CALL timeset(routineN, error_handler)
1858      ! srt = .TRUE. ! Not needed because of the copy
1859      treesize = btree_size_${nametype1}$ (wm%mutable%m%btree_${nametype1}$)
1860      IF (wm%lastblk .NE. treesize) &
1861         DBCSR_ABORT("Mismatch in number of blocks")
1862      ALLOCATE (keys(treesize), values(treesize))
1863      CALL btree_destroy_${nametype1}$ (wm%mutable%m%btree_${nametype1}$, keys, values)
1864      CALL ensure_array_size(wm%row_i, ub=treesize)
1865      CALL ensure_array_size(wm%col_i, ub=treesize)
1866      CALL dbcsr_unpack_i8_2i4(keys, wm%row_i, &
1867                               wm%col_i)
1868      ! For now we also fill the data, sloooowly, but this should
1869      ! be avoided and the data should be copied directly from the
1870      ! source in the subroutine's main loop.
1871      CALL ensure_array_size(wm%blk_p, ub=treesize)
1872      needed_size = 0
1873      DO blk = 1, treesize
1874         block_2d => values(blk)%p
1875         needed_size = needed_size + SIZE(block_2d)
1876      ENDDO
1877      wm%datasize = needed_size
1878      CALL dbcsr_data_ensure_size(wm%data_area, &
1879                                  wm%datasize)
1880      target_data => dbcsr_get_data_p_${nametype1}$ (wm%data_area)
1881      blk_p = 1
1882      DO blk = 1, treesize
1883         block_2d => values(blk)%p
1884         IF (.NOT. values(blk)%tr) THEN
1885            wm%blk_p(blk) = blk_p
1886         ELSE
1887            wm%blk_p(blk) = -blk_p
1888         ENDIF
1889         CALL block_copy_${nametype1}$ (target_data, block_2d, &
1890                                        SIZE(block_2d), blk_p, 1)
1891         blk_p = blk_p + SIZE(block_2d)
1892         DEALLOCATE (block_2d)
1893      ENDDO
1894      DEALLOCATE (keys, values)
1895      CALL dbcsr_mutable_release(wm%mutable)
1896      CALL timestop(error_handler)
1897   END SUBROUTINE tree_to_linear_${nametype1}$
1898
1899#:endfor
1900
1901END MODULE dbcsr_work_operations
1902