1 //------------------------------------------------------------------------------
2 // GB_Matrix_wait: finish all pending computations on a single matrix
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // CALLS: GB_builder
11
12 // This function is typically called via the GB_MATRIX_WAIT(A) macro, except
13 // for GB_assign, GB_subassign, and GB_mxm.
14
15 // The matrix A has zombies and/or pending tuples placed there by
16 // GrB_setElement, GrB_*assign, or GB_mxm. Zombies must now be deleted, and
17 // pending tuples must now be assembled together and added into the matrix.
18 // The indices in A might also be jumbled; if so, they are sorted now.
19
20 // When the function returns, and all pending tuples and zombies have been
21 // deleted. This is true even the function fails due to lack of memory (in
22 // that case, the matrix is cleared as well).
23
24 // If A is hypersparse, the time taken is at most O(nnz(A) + t log t), where t
25 // is the number of pending tuples in A, and nnz(A) includes both zombies and
26 // live entries. There is no O(m) or O(n) time component, if A is m-by-n.
27 // If the number of non-empty vectors of A grows too large, then A can be
28 // converted to non-hypersparse.
29
30 // If A is non-hypersparse, then O(n) is added in the worst case, to prune
31 // zombies and to update the vector pointers for A.
32
33 // If the method is successful, it does an OpenMP flush just before returning.
34
35 #include "GB_select.h"
36 #include "GB_add.h"
37 #include "GB_Pending.h"
38 #include "GB_build.h"
39 #include "GB_jappend.h"
40
41 #define GB_FREE_ALL \
42 { \
43 GB_FREE (&W, W_size) ; \
44 GB_phbix_free (A) ; \
45 GB_phbix_free (T) ; \
46 GB_phbix_free (S) ; \
47 GB_phbix_free (A1) ; \
48 }
49
50 GB_PUBLIC // accessed by the MATLAB tests in GraphBLAS/Test only
GB_Matrix_wait(GrB_Matrix A,const char * name,GB_Context Context)51 GrB_Info GB_Matrix_wait // finish all pending computations
52 (
53 GrB_Matrix A, // matrix with pending computations
54 const char *name, // name of the matrix
55 GB_Context Context
56 )
57 {
58
59 //--------------------------------------------------------------------------
60 // check inputs
61 //--------------------------------------------------------------------------
62
63 GB_void *W = NULL ; size_t W_size = 0 ;
64 struct GB_Matrix_opaque T_header, A1_header, S_header ;
65 GrB_Matrix T = GB_clear_static_header (&T_header) ;
66 GrB_Matrix A1 = NULL ;
67 GrB_Matrix S = GB_clear_static_header (&S_header) ;
68 GrB_Info info = GrB_SUCCESS ;
69
70 ASSERT_MATRIX_OK (A, "A to wait", GB_FLIP (GB0)) ;
71
72 if (GB_IS_FULL (A) || GB_IS_BITMAP (A))
73 {
74 // full and bitmap matrices never have any pending work
75 ASSERT (!GB_ZOMBIES (A)) ;
76 ASSERT (!GB_JUMBLED (A)) ;
77 ASSERT (!GB_PENDING (A)) ;
78 // ensure the matrix is written to memory
79 #pragma omp flush
80 return (GrB_SUCCESS) ;
81 }
82
83 // only sparse and hypersparse matrices can have pending work
84 ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A)) ;
85 ASSERT (GB_ZOMBIES_OK (A)) ;
86 ASSERT (GB_JUMBLED_OK (A)) ;
87 ASSERT (GB_PENDING_OK (A)) ;
88
89 //--------------------------------------------------------------------------
90 // get the zombie and pending count, and burble if work needs to be done
91 //--------------------------------------------------------------------------
92
93 int64_t nzombies = A->nzombies ;
94 int64_t npending = GB_Pending_n (A) ;
95 if (nzombies > 0 || npending > 0 || A->jumbled)
96 {
97 GB_BURBLE_MATRIX (A, "(wait:%s " GBd " %s, " GBd " pending%s) ", name,
98 nzombies, (nzombies == 1) ? "zombie" : "zombies", npending,
99 A->jumbled ? ", jumbled" : "") ;
100 }
101
102 //--------------------------------------------------------------------------
103 // determine the max # of threads to use
104 //--------------------------------------------------------------------------
105
106 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
107
108 //--------------------------------------------------------------------------
109 // ensure A is not shallow
110 //--------------------------------------------------------------------------
111
112 int64_t anz_orig = GB_NNZ (A) ;
113 int64_t asize = A->type->size ;
114
115 #if 0
116 // this code is currently unused
117 if (GB_is_shallow (A))
118 {
119 // shallow matrices will never have any pending tuples
120 ASSERT (npending == 0) ;
121
122 if (A->p_shallow)
123 {
124 int64_t len = (A->plen + 1) * sizeof (int64_t) ;
125 W = GB_MALLOC (len, GB_void, &W_size) ;
126 if (W == NULL)
127 {
128 // out of memory
129 return (GrB_OUT_OF_MEMORY) ;
130 }
131 GB_memcpy (W, A->p, len, nthreads_max) ;
132 A->p = (int64_t *) W ; A->p_size = W_size ;
133 A->p_shallow = false ;
134 W = NULL ;
135 }
136
137 if (A->h_shallow)
138 {
139 int64_t len = A->nvec * sizeof (int64_t) ;
140 W = GB_MALLOC (len, GB_void, &W_size) ;
141 if (W == NULL)
142 {
143 // out of memory
144 return (GrB_OUT_OF_MEMORY) ;
145 }
146 GB_memcpy (W, A->h, len, nthreads_max) ;
147 A->h = W ; A->h_size = W_size ;
148 A->h_shallow = false ;
149 W = NULL ;
150 }
151
152 if (A->i_shallow)
153 {
154 int64_t len = anz_orig * sizeof (int64_t) ;
155 W = GB_MALLOC (len, GB_void, &W_size) ;
156 if (W == NULL)
157 {
158 // out of memory
159 return (GrB_OUT_OF_MEMORY) ;
160 }
161 GB_memcpy (W, A->i, len, nthreads_max) ;
162 A->i = (int64_t *) W ; A->i_size = W_size ;
163 A->i_shallow = false ;
164 W = NULL ;
165 }
166
167 if (A->x_shallow)
168 {
169 int64_t len = anz_orig * asize ;
170 W = GB_MALLOC (len, GB_void, &W_size) ;
171 if (W == NULL)
172 {
173 // out of memory
174 return (GrB_OUT_OF_MEMORY) ;
175 }
176 GB_memcpy (W, A->x, len, nthreads_max) ;
177 A->x = (GB_void *) W ; A->x_size = W_size ;
178 A->x_shallow = false ;
179 W = NULL ;
180 }
181 }
182 #endif
183
184 ASSERT (!GB_is_shallow (A)) ;
185
186 //--------------------------------------------------------------------------
187 // check if A only needs to be unjumbled
188 //--------------------------------------------------------------------------
189
190 if (npending == 0 && nzombies == 0)
191 {
192 // A is not conformed, so the sparsity structure of A is not modified.
193 // That is, if A has no pending tuples and no zombies, but is just
194 // jumbled, then it stays sparse or hypersparse.
195 GB_OK (GB_unjumble (A, Context)) ;
196 return (info) ;
197 }
198
199 //--------------------------------------------------------------------------
200 // assemble the pending tuples into T
201 //--------------------------------------------------------------------------
202
203 int64_t tnz = 0 ;
204 if (npending > 0)
205 {
206
207 //----------------------------------------------------------------------
208 // construct a new hypersparse matrix T with just the pending tuples
209 //----------------------------------------------------------------------
210
211 // T has the same type as A->type, which can differ from the type of
212 // the pending tuples, A->Pending->type. The Pending->op can be NULL
213 // (an implicit SECOND function), or it can be any accum operator. The
214 // z=accum(x,y) operator can have any types, and it does not have to be
215 // associative.
216
217 info = GB_builder
218 (
219 T, // create T using a static header
220 A->type, // T->type = A->type
221 A->vlen, // T->vlen = A->vlen
222 A->vdim, // T->vdim = A->vdim
223 A->is_csc, // T->is_csc = A->is_csc
224 &(A->Pending->i), // iwork_handle, becomes T->i on output
225 &(A->Pending->i_size),
226 &(A->Pending->j), // jwork_handle, free on output
227 &(A->Pending->j_size),
228 &(A->Pending->x), // Swork_handle, free on output
229 &(A->Pending->x_size),
230 A->Pending->sorted, // tuples may or may not be sorted
231 false, // there might be duplicates; look for them
232 A->Pending->nmax, // size of Pending->[ijx] arrays
233 true, // is_matrix: unused
234 NULL, NULL, NULL, // original I,J,S tuples, not used here
235 npending, // # of tuples
236 A->Pending->op, // dup operator for assembling duplicates
237 A->Pending->type->code, // type of Pending->x
238 Context
239 ) ;
240
241 //----------------------------------------------------------------------
242 // free pending tuples
243 //----------------------------------------------------------------------
244
245 // The tuples have been converted to T, which is more compact, and
246 // duplicates have been removed. The following work needs to be done
247 // even if the builder fails.
248
249 // GB_builder frees A->Pending->j and A->Pending->x. If successful,
250 // A->Pending->i is now T->i. Otherwise A->Pending->i is freed. In
251 // both cases, A->Pending->i is NULL.
252 ASSERT (A->Pending->i == NULL) ;
253 ASSERT (A->Pending->j == NULL) ;
254 ASSERT (A->Pending->x == NULL) ;
255
256 // free the list of pending tuples
257 GB_Pending_free (&(A->Pending)) ;
258 ASSERT (!GB_PENDING (A)) ;
259
260 ASSERT_MATRIX_OK (A, "A after moving pending tuples to T", GB0) ;
261
262 //----------------------------------------------------------------------
263 // check the status of the builder
264 //----------------------------------------------------------------------
265
266 // Finally check the status of the builder. The pending tuples, must
267 // be freed (just above), whether or not the builder is successful.
268 if (info != GrB_SUCCESS)
269 {
270 // out of memory in GB_builder
271 GB_FREE_ALL ;
272 return (info) ;
273 }
274
275 ASSERT_MATRIX_OK (T, "T = hypersparse matrix of pending tuples", GB0) ;
276 ASSERT (GB_IS_HYPERSPARSE (T)) ;
277 ASSERT (!GB_ZOMBIES (T)) ;
278 ASSERT (!GB_JUMBLED (T)) ;
279 ASSERT (!GB_PENDING (T)) ;
280
281 tnz = GB_NNZ (T) ;
282 ASSERT (tnz > 0) ;
283 }
284
285 //--------------------------------------------------------------------------
286 // delete zombies
287 //--------------------------------------------------------------------------
288
289 // A zombie is an entry A(i,j) in the matrix that as been marked for
290 // deletion, but hasn't been deleted yet. It is marked by "negating"
291 // replacing its index i with GB_FLIP(i).
292
293 // TODO: pass tnz to GB_selector, to pad the reallocated A matrix
294 ASSERT_MATRIX_OK (A, "A before zombies removed", GB0) ;
295
296 if (nzombies > 0)
297 {
298 // remove all zombies from A
299 GB_OK (GB_selector (NULL /* A in-place */, GB_NONZOMBIE_opcode, NULL,
300 false, A, 0, NULL, Context)) ;
301 ASSERT (A->nzombies == (anz_orig - GB_NNZ (A))) ;
302 A->nzombies = 0 ;
303 }
304
305 ASSERT_MATRIX_OK (A, "A after zombies removed", GB0) ;
306
307 // all the zombies are gone, and pending tuples are now in T
308 ASSERT (!GB_ZOMBIES (A)) ;
309 ASSERT (GB_JUMBLED_OK (A)) ;
310 ASSERT (!GB_PENDING (A)) ;
311
312 //--------------------------------------------------------------------------
313 // unjumble the matrix
314 //--------------------------------------------------------------------------
315
316 GB_OK (GB_unjumble (A, Context)) ;
317
318 ASSERT (!GB_ZOMBIES (A)) ;
319 ASSERT (!GB_JUMBLED (A)) ;
320 ASSERT (!GB_PENDING (A)) ;
321
322 //--------------------------------------------------------------------------
323 // check for pending tuples
324 //--------------------------------------------------------------------------
325
326 if (npending == 0)
327 {
328 // conform A to its desired sparsity structure and return result
329 info = GB_conform (A, Context) ;
330 #pragma omp flush
331 return (info) ;
332 }
333
334 //--------------------------------------------------------------------------
335 // check for quick transplant
336 //--------------------------------------------------------------------------
337
338 int64_t anz = GB_NNZ (A) ;
339 if (anz == 0)
340 {
341 // A has no entries so just transplant T into A, then free T and
342 // conform A to its desired hypersparsity.
343 info = GB_transplant_conform (A, A->type, &T, Context) ;
344 #pragma omp flush
345 return (info) ;
346 }
347
348 //--------------------------------------------------------------------------
349 // determine the method for A = A+T
350 //--------------------------------------------------------------------------
351
352 // If anz > 0, T is hypersparse, even if A is a GrB_Vector
353 ASSERT (GB_IS_HYPERSPARSE (T)) ;
354 ASSERT (tnz > 0) ;
355 ASSERT (T->nvec > 0) ;
356 ASSERT (A->nvec > 0) ;
357
358 // tjfirst = first vector in T
359 int64_t tjfirst = T->h [0] ;
360 int64_t anz0 = 0 ;
361 int64_t kA = 0 ;
362 int64_t jlast ;
363
364 int64_t *restrict Ap = A->p ;
365 int64_t *restrict Ah = A->h ;
366 int64_t *restrict Ai = A->i ;
367 GB_void *restrict Ax = (GB_void *) A->x ;
368
369 int64_t anvec = A->nvec ;
370
371 // anz0 = nnz (A0) = nnz (A (:, 0:tjfirst-1)), the region not modified by T
372 if (A->h != NULL)
373 {
374 // find tjfirst in A->h
375 int64_t pright = anvec - 1 ;
376 bool found ;
377 GB_SPLIT_BINARY_SEARCH (tjfirst, A->h, kA, pright, found) ;
378 // A->h [0 ... kA-1] excludes vector tjfirst. The list
379 // A->h [kA ... anvec-1] includes tjfirst.
380 ASSERT (kA >= 0 && kA <= anvec) ;
381 ASSERT (GB_IMPLIES (kA > 0 && kA < anvec, A->h [kA-1] < tjfirst)) ;
382 ASSERT (GB_IMPLIES (found, A->h [kA] == tjfirst)) ;
383 jlast = (kA > 0) ? A->h [kA-1] : (-1) ;
384 }
385 else
386 {
387 kA = tjfirst ;
388 jlast = tjfirst - 1 ;
389 }
390
391 // anz1 = nnz (A1) = nnz (A (:, kA:end)), the region modified by T
392 anz0 = A->p [kA] ;
393 int64_t anz1 = anz - anz0 ;
394 bool ignore ;
395
396 // A + T will have anz_new entries
397 int64_t anz_new = anz + tnz ; // must have at least this space
398
399 if (2 * anz1 < anz0)
400 {
401
402 //----------------------------------------------------------------------
403 // append new tuples to A
404 //----------------------------------------------------------------------
405
406 // A is growing incrementally. It splits into two parts: A = [A0 A1].
407 // where A0 = A (:, 0:kA-1) and A1 = A (:, kA:end). The
408 // first part (A0 with anz0 = nnz (A0) entries) is not modified. The
409 // second part (A1, with anz1 = nnz (A1) entries) overlaps with T.
410 // If anz1 is zero, or small compared to anz0, then it is faster to
411 // leave A0 unmodified, and to update just A1.
412
413 // TODO: if A also had zombies, GB_selector could pad A so that
414 // A->nzmax = anz + tnz.
415
416 // make sure A has enough space for the new tuples
417 if (anz_new > A->nzmax)
418 {
419 // double the size if not enough space
420 GB_OK (GB_ix_resize (A, anz_new, Context)) ;
421 Ai = A->i ;
422 Ax = (GB_void *) A->x ;
423 }
424
425 //----------------------------------------------------------------------
426 // T = A1 + T
427 //----------------------------------------------------------------------
428
429 if (anz1 > 0)
430 {
431
432 //------------------------------------------------------------------
433 // extract A1 = A (:, kA:end) as a shallow copy
434 //------------------------------------------------------------------
435
436 // A1 = [0, A (:, kA:end)], hypersparse with same dimensions as A
437 A1 = GB_clear_static_header (&A1_header) ;
438 GB_OK (GB_new (&A1, true, // hyper, static header
439 A->type, A->vlen, A->vdim, GB_Ap_malloc, A->is_csc,
440 GxB_HYPERSPARSE, GB_ALWAYS_HYPER, anvec - kA, Context)) ;
441
442 // the A1->i and A1->x content are shallow copies of A(:,kA:end).
443 // They are not allocated pointers, but point to space inside
444 // Ai and Ax.
445 A1->x = (void *) (Ax + asize * anz0) ;
446 A1->x_size = 0 ; // A1->x is not a pointer to malloc'd block
447 A1->i = Ai + anz0 ;
448 A1->i_size = 0 ; // A1->i is not a pointer to malloc'd block
449 A1->x_shallow = true ;
450 A1->i_shallow = true ;
451 A1->nzmax = anz1 ;
452
453 // fill the column A1->h and A1->p with A->h and A->p, shifted
454 int64_t *restrict A1p = A1->p ;
455 int64_t *restrict A1h = A1->h ;
456 int64_t a1nvec = 0 ;
457 for (int64_t k = kA ; k < anvec ; k++)
458 {
459 // get A (:,k)
460 int64_t pA_start = Ap [k] ;
461 int64_t pA_end = Ap [k+1] ;
462 if (pA_end > pA_start)
463 {
464 // add this column to A1 if A (:,k) is not empty
465 int64_t j = GBH (Ah, k) ;
466 A1p [a1nvec] = pA_start - anz0 ;
467 A1h [a1nvec] = j ;
468 a1nvec++ ;
469 }
470 }
471
472 // finalize A1
473 A1p [a1nvec] = anz1 ;
474 A1->nvec = a1nvec ;
475 A1->nvec_nonempty = a1nvec ;
476 A1->magic = GB_MAGIC ;
477
478 ASSERT_MATRIX_OK (A1, "A1 slice for GB_Matrix_wait", GB0) ;
479
480 //------------------------------------------------------------------
481 // S = A1 + T, with no operator or mask
482 //------------------------------------------------------------------
483
484 GB_OK (GB_add (S, A->type, A->is_csc, NULL, 0, 0, &ignore,
485 A1, T, NULL, Context)) ;
486
487 ASSERT_MATRIX_OK (S, "S = A1+T", GB0) ;
488
489 // free A1 and T
490 GB_phbix_free (T) ;
491 GB_phbix_free (A1) ;
492
493 //------------------------------------------------------------------
494 // replace T with S
495 //------------------------------------------------------------------
496
497 T = S ;
498 S = NULL ;
499 tnz = GB_NNZ (T) ;
500
501 //------------------------------------------------------------------
502 // remove A1 from the vectors of A, if A is hypersparse
503 //------------------------------------------------------------------
504
505 if (A->h != NULL)
506 {
507 A->nvec = kA ;
508 }
509 }
510
511 //----------------------------------------------------------------------
512 // append T to the end of A0
513 //----------------------------------------------------------------------
514
515 const int64_t *restrict Tp = T->p ;
516 const int64_t *restrict Th = T->h ;
517 const int64_t *restrict Ti = T->i ;
518 const GB_void *restrict Tx = (GB_void *) T->x ;
519 int64_t tnvec = T->nvec ;
520
521 anz = anz0 ;
522 int64_t anz_last = anz ;
523
524 int nthreads = GB_nthreads (tnz, chunk, nthreads_max) ;
525
526 // append the indices and values of T to the end of A
527 GB_memcpy (Ai + anz , Ti, tnz * sizeof (int64_t), nthreads) ;
528 GB_memcpy (Ax + anz * asize, Tx, tnz * asize , nthreads) ;
529
530 // append the vectors of T to the end of A
531 for (int64_t k = 0 ; k < tnvec ; k++)
532 {
533 int64_t j = Th [k] ;
534 ASSERT (j >= tjfirst) ;
535 anz += (Tp [k+1] - Tp [k]) ;
536 GB_OK (GB_jappend (A, j, &jlast, anz, &anz_last, Context)) ;
537 }
538
539 GB_jwrapup (A, jlast, anz) ;
540 ASSERT (anz == anz_new) ;
541
542 // need to recompute the # of non-empty vectors in GB_conform
543 A->nvec_nonempty = -1 ; // recomputed just below
544
545 ASSERT_MATRIX_OK (A, "A after GB_Matrix_wait:append", GB0) ;
546
547 GB_phbix_free (T) ;
548
549 // conform A to its desired sparsity structure
550 info = GB_conform (A, Context) ;
551
552 }
553 else
554 {
555
556 //----------------------------------------------------------------------
557 // A = A+T
558 //----------------------------------------------------------------------
559
560 // The update is not incremental since most of A is changing. Just do
561 // a single parallel add: S=A+T, free T, and then transplant S back
562 // into A. The nzmax of A is tight, with no room for future
563 // incremental growth.
564
565 // FUTURE:: if GB_add could tolerate zombies in A, then the initial
566 // prune of zombies can be skipped.
567
568 GB_OK (GB_add (S, A->type, A->is_csc, NULL, 0, 0, &ignore, A, T, NULL,
569 Context)) ;
570 GB_phbix_free (T) ;
571 ASSERT_MATRIX_OK (S, "S after GB_Matrix_wait:add", GB0) ;
572 info = GB_transplant_conform (A, A->type, &S, Context) ;
573 }
574
575 //--------------------------------------------------------------------------
576 // flush the matrix and return result
577 //--------------------------------------------------------------------------
578
579 #pragma omp flush
580 return (info) ;
581 }
582
583