1 //------------------------------------------------------------------------------
2 // GB_Matrix_wait:  finish all pending computations on a single matrix
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // CALLS:     GB_builder
11 
12 // This function is typically called via the GB_MATRIX_WAIT(A) macro, except
13 // for GB_assign, GB_subassign, and GB_mxm.
14 
15 // The matrix A has zombies and/or pending tuples placed there by
16 // GrB_setElement, GrB_*assign, or GB_mxm.  Zombies must now be deleted, and
17 // pending tuples must now be assembled together and added into the matrix.
18 // The indices in A might also be jumbled; if so, they are sorted now.
19 
20 // When the function returns, and all pending tuples and zombies have been
21 // deleted.  This is true even the function fails due to lack of memory (in
22 // that case, the matrix is cleared as well).
23 
24 // If A is hypersparse, the time taken is at most O(nnz(A) + t log t), where t
25 // is the number of pending tuples in A, and nnz(A) includes both zombies and
26 // live entries.  There is no O(m) or O(n) time component, if A is m-by-n.
27 // If the number of non-empty vectors of A grows too large, then A can be
28 // converted to non-hypersparse.
29 
30 // If A is non-hypersparse, then O(n) is added in the worst case, to prune
31 // zombies and to update the vector pointers for A.
32 
33 // If the method is successful, it does an OpenMP flush just before returning.
34 
35 #include "GB_select.h"
36 #include "GB_add.h"
37 #include "GB_Pending.h"
38 #include "GB_build.h"
39 #include "GB_jappend.h"
40 
41 #define GB_FREE_ALL                     \
42 {                                       \
43     GB_FREE (&W, W_size) ;              \
44     GB_phbix_free (A) ;                 \
45     GB_phbix_free (T) ;                 \
46     GB_phbix_free (S) ;                 \
47     GB_phbix_free (A1) ;                \
48 }
49 
50 GB_PUBLIC   // accessed by the MATLAB tests in GraphBLAS/Test only
GB_Matrix_wait(GrB_Matrix A,const char * name,GB_Context Context)51 GrB_Info GB_Matrix_wait         // finish all pending computations
52 (
53     GrB_Matrix A,               // matrix with pending computations
54     const char *name,           // name of the matrix
55     GB_Context Context
56 )
57 {
58 
59     //--------------------------------------------------------------------------
60     // check inputs
61     //--------------------------------------------------------------------------
62 
63     GB_void *W = NULL ; size_t W_size = 0 ;
64     struct GB_Matrix_opaque T_header, A1_header, S_header ;
65     GrB_Matrix T  = GB_clear_static_header (&T_header) ;
66     GrB_Matrix A1 = NULL ;
67     GrB_Matrix S  = GB_clear_static_header (&S_header) ;
68     GrB_Info info = GrB_SUCCESS ;
69 
70     ASSERT_MATRIX_OK (A, "A to wait", GB_FLIP (GB0)) ;
71 
72     if (GB_IS_FULL (A) || GB_IS_BITMAP (A))
73     {
74         // full and bitmap matrices never have any pending work
75         ASSERT (!GB_ZOMBIES (A)) ;
76         ASSERT (!GB_JUMBLED (A)) ;
77         ASSERT (!GB_PENDING (A)) ;
78         // ensure the matrix is written to memory
79         #pragma omp flush
80         return (GrB_SUCCESS) ;
81     }
82 
83     // only sparse and hypersparse matrices can have pending work
84     ASSERT (GB_IS_SPARSE (A) || GB_IS_HYPERSPARSE (A)) ;
85     ASSERT (GB_ZOMBIES_OK (A)) ;
86     ASSERT (GB_JUMBLED_OK (A)) ;
87     ASSERT (GB_PENDING_OK (A)) ;
88 
89     //--------------------------------------------------------------------------
90     // get the zombie and pending count, and burble if work needs to be done
91     //--------------------------------------------------------------------------
92 
93     int64_t nzombies = A->nzombies ;
94     int64_t npending = GB_Pending_n (A) ;
95     if (nzombies > 0 || npending > 0 || A->jumbled)
96     {
97         GB_BURBLE_MATRIX (A, "(wait:%s " GBd " %s, " GBd " pending%s) ", name,
98             nzombies, (nzombies == 1) ? "zombie" : "zombies", npending,
99             A->jumbled ? ", jumbled" : "") ;
100     }
101 
102     //--------------------------------------------------------------------------
103     // determine the max # of threads to use
104     //--------------------------------------------------------------------------
105 
106     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
107 
108     //--------------------------------------------------------------------------
109     // ensure A is not shallow
110     //--------------------------------------------------------------------------
111 
112     int64_t anz_orig = GB_NNZ (A) ;
113     int64_t asize = A->type->size ;
114 
115 #if 0
116     // this code is currently unused
117     if (GB_is_shallow (A))
118     {
119         // shallow matrices will never have any pending tuples
120         ASSERT (npending == 0) ;
121 
122         if (A->p_shallow)
123         {
124             int64_t len = (A->plen + 1) * sizeof (int64_t) ;
125             W = GB_MALLOC (len, GB_void, &W_size) ;
126             if (W == NULL)
127             {
128                 // out of memory
129                 return (GrB_OUT_OF_MEMORY) ;
130             }
131             GB_memcpy (W, A->p, len, nthreads_max) ;
132             A->p = (int64_t *) W ; A->p_size = W_size ;
133             A->p_shallow = false ;
134             W = NULL ;
135         }
136 
137         if (A->h_shallow)
138         {
139             int64_t len = A->nvec * sizeof (int64_t) ;
140             W = GB_MALLOC (len, GB_void, &W_size) ;
141             if (W == NULL)
142             {
143                 // out of memory
144                 return (GrB_OUT_OF_MEMORY) ;
145             }
146             GB_memcpy (W, A->h, len, nthreads_max) ;
147             A->h = W ; A->h_size = W_size ;
148             A->h_shallow = false ;
149             W = NULL ;
150         }
151 
152         if (A->i_shallow)
153         {
154             int64_t len = anz_orig * sizeof (int64_t) ;
155             W = GB_MALLOC (len, GB_void, &W_size) ;
156             if (W == NULL)
157             {
158                 // out of memory
159                 return (GrB_OUT_OF_MEMORY) ;
160             }
161             GB_memcpy (W, A->i, len, nthreads_max) ;
162             A->i = (int64_t *) W ; A->i_size = W_size ;
163             A->i_shallow = false ;
164             W = NULL ;
165         }
166 
167         if (A->x_shallow)
168         {
169             int64_t len = anz_orig * asize ;
170             W = GB_MALLOC (len, GB_void, &W_size) ;
171             if (W == NULL)
172             {
173                 // out of memory
174                 return (GrB_OUT_OF_MEMORY) ;
175             }
176             GB_memcpy (W, A->x, len, nthreads_max) ;
177             A->x = (GB_void *) W ; A->x_size = W_size ;
178             A->x_shallow = false ;
179             W = NULL ;
180         }
181     }
182 #endif
183 
184     ASSERT (!GB_is_shallow (A)) ;
185 
186     //--------------------------------------------------------------------------
187     // check if A only needs to be unjumbled
188     //--------------------------------------------------------------------------
189 
190     if (npending == 0 && nzombies == 0)
191     {
192         // A is not conformed, so the sparsity structure of A is not modified.
193         // That is, if A has no pending tuples and no zombies, but is just
194         // jumbled, then it stays sparse or hypersparse.
195         GB_OK (GB_unjumble (A, Context)) ;
196         return (info) ;
197     }
198 
199     //--------------------------------------------------------------------------
200     // assemble the pending tuples into T
201     //--------------------------------------------------------------------------
202 
203     int64_t tnz = 0 ;
204     if (npending > 0)
205     {
206 
207         //----------------------------------------------------------------------
208         // construct a new hypersparse matrix T with just the pending tuples
209         //----------------------------------------------------------------------
210 
211         // T has the same type as A->type, which can differ from the type of
212         // the pending tuples, A->Pending->type.  The Pending->op can be NULL
213         // (an implicit SECOND function), or it can be any accum operator.  The
214         // z=accum(x,y) operator can have any types, and it does not have to be
215         // associative.
216 
217         info = GB_builder
218         (
219             T,                      // create T using a static header
220             A->type,                // T->type = A->type
221             A->vlen,                // T->vlen = A->vlen
222             A->vdim,                // T->vdim = A->vdim
223             A->is_csc,              // T->is_csc = A->is_csc
224             &(A->Pending->i),       // iwork_handle, becomes T->i on output
225             &(A->Pending->i_size),
226             &(A->Pending->j),       // jwork_handle, free on output
227             &(A->Pending->j_size),
228             &(A->Pending->x),       // Swork_handle, free on output
229             &(A->Pending->x_size),
230             A->Pending->sorted,     // tuples may or may not be sorted
231             false,                  // there might be duplicates; look for them
232             A->Pending->nmax,       // size of Pending->[ijx] arrays
233             true,                   // is_matrix: unused
234             NULL, NULL, NULL,       // original I,J,S tuples, not used here
235             npending,               // # of tuples
236             A->Pending->op,         // dup operator for assembling duplicates
237             A->Pending->type->code, // type of Pending->x
238             Context
239         ) ;
240 
241         //----------------------------------------------------------------------
242         // free pending tuples
243         //----------------------------------------------------------------------
244 
245         // The tuples have been converted to T, which is more compact, and
246         // duplicates have been removed.  The following work needs to be done
247         // even if the builder fails.
248 
249         // GB_builder frees A->Pending->j and A->Pending->x.  If successful,
250         // A->Pending->i is now T->i.  Otherwise A->Pending->i is freed.  In
251         // both cases, A->Pending->i is NULL.
252         ASSERT (A->Pending->i == NULL) ;
253         ASSERT (A->Pending->j == NULL) ;
254         ASSERT (A->Pending->x == NULL) ;
255 
256         // free the list of pending tuples
257         GB_Pending_free (&(A->Pending)) ;
258         ASSERT (!GB_PENDING (A)) ;
259 
260         ASSERT_MATRIX_OK (A, "A after moving pending tuples to T", GB0) ;
261 
262         //----------------------------------------------------------------------
263         // check the status of the builder
264         //----------------------------------------------------------------------
265 
266         // Finally check the status of the builder.  The pending tuples, must
267         // be freed (just above), whether or not the builder is successful.
268         if (info != GrB_SUCCESS)
269         {
270             // out of memory in GB_builder
271             GB_FREE_ALL ;
272             return (info) ;
273         }
274 
275         ASSERT_MATRIX_OK (T, "T = hypersparse matrix of pending tuples", GB0) ;
276         ASSERT (GB_IS_HYPERSPARSE (T)) ;
277         ASSERT (!GB_ZOMBIES (T)) ;
278         ASSERT (!GB_JUMBLED (T)) ;
279         ASSERT (!GB_PENDING (T)) ;
280 
281         tnz = GB_NNZ (T) ;
282         ASSERT (tnz > 0) ;
283     }
284 
285     //--------------------------------------------------------------------------
286     // delete zombies
287     //--------------------------------------------------------------------------
288 
289     // A zombie is an entry A(i,j) in the matrix that as been marked for
290     // deletion, but hasn't been deleted yet.  It is marked by "negating"
291     // replacing its index i with GB_FLIP(i).
292 
293     // TODO: pass tnz to GB_selector, to pad the reallocated A matrix
294     ASSERT_MATRIX_OK (A, "A before zombies removed", GB0) ;
295 
296     if (nzombies > 0)
297     {
298         // remove all zombies from A
299         GB_OK (GB_selector (NULL /* A in-place */, GB_NONZOMBIE_opcode, NULL,
300             false, A, 0, NULL, Context)) ;
301         ASSERT (A->nzombies == (anz_orig - GB_NNZ (A))) ;
302         A->nzombies = 0 ;
303     }
304 
305     ASSERT_MATRIX_OK (A, "A after zombies removed", GB0) ;
306 
307     // all the zombies are gone, and pending tuples are now in T
308     ASSERT (!GB_ZOMBIES (A)) ;
309     ASSERT (GB_JUMBLED_OK (A)) ;
310     ASSERT (!GB_PENDING (A)) ;
311 
312     //--------------------------------------------------------------------------
313     // unjumble the matrix
314     //--------------------------------------------------------------------------
315 
316     GB_OK (GB_unjumble (A, Context)) ;
317 
318     ASSERT (!GB_ZOMBIES (A)) ;
319     ASSERT (!GB_JUMBLED (A)) ;
320     ASSERT (!GB_PENDING (A)) ;
321 
322     //--------------------------------------------------------------------------
323     // check for pending tuples
324     //--------------------------------------------------------------------------
325 
326     if (npending == 0)
327     {
328         // conform A to its desired sparsity structure and return result
329         info = GB_conform (A, Context) ;
330         #pragma omp flush
331         return (info) ;
332     }
333 
334     //--------------------------------------------------------------------------
335     // check for quick transplant
336     //--------------------------------------------------------------------------
337 
338     int64_t anz = GB_NNZ (A) ;
339     if (anz == 0)
340     {
341         // A has no entries so just transplant T into A, then free T and
342         // conform A to its desired hypersparsity.
343         info = GB_transplant_conform (A, A->type, &T, Context) ;
344         #pragma omp flush
345         return (info) ;
346     }
347 
348     //--------------------------------------------------------------------------
349     // determine the method for A = A+T
350     //--------------------------------------------------------------------------
351 
352     // If anz > 0, T is hypersparse, even if A is a GrB_Vector
353     ASSERT (GB_IS_HYPERSPARSE (T)) ;
354     ASSERT (tnz > 0) ;
355     ASSERT (T->nvec > 0) ;
356     ASSERT (A->nvec > 0) ;
357 
358     // tjfirst = first vector in T
359     int64_t tjfirst = T->h [0] ;
360     int64_t anz0 = 0 ;
361     int64_t kA = 0 ;
362     int64_t jlast ;
363 
364     int64_t *restrict Ap = A->p ;
365     int64_t *restrict Ah = A->h ;
366     int64_t *restrict Ai = A->i ;
367     GB_void *restrict Ax = (GB_void *) A->x ;
368 
369     int64_t anvec = A->nvec ;
370 
371     // anz0 = nnz (A0) = nnz (A (:, 0:tjfirst-1)), the region not modified by T
372     if (A->h != NULL)
373     {
374         // find tjfirst in A->h
375         int64_t pright = anvec - 1 ;
376         bool found ;
377         GB_SPLIT_BINARY_SEARCH (tjfirst, A->h, kA, pright, found) ;
378         // A->h [0 ... kA-1] excludes vector tjfirst.  The list
379         // A->h [kA ... anvec-1] includes tjfirst.
380         ASSERT (kA >= 0 && kA <= anvec) ;
381         ASSERT (GB_IMPLIES (kA > 0 && kA < anvec, A->h [kA-1] < tjfirst)) ;
382         ASSERT (GB_IMPLIES (found, A->h [kA] == tjfirst)) ;
383         jlast = (kA > 0) ? A->h [kA-1] : (-1) ;
384     }
385     else
386     {
387         kA = tjfirst ;
388         jlast = tjfirst - 1 ;
389     }
390 
391     // anz1 = nnz (A1) = nnz (A (:, kA:end)), the region modified by T
392     anz0 = A->p [kA] ;
393     int64_t anz1 = anz - anz0 ;
394     bool ignore ;
395 
396     // A + T will have anz_new entries
397     int64_t anz_new = anz + tnz ;       // must have at least this space
398 
399     if (2 * anz1 < anz0)
400     {
401 
402         //----------------------------------------------------------------------
403         // append new tuples to A
404         //----------------------------------------------------------------------
405 
406         // A is growing incrementally.  It splits into two parts: A = [A0 A1].
407         // where A0 = A (:, 0:kA-1) and A1 = A (:, kA:end).  The
408         // first part (A0 with anz0 = nnz (A0) entries) is not modified.  The
409         // second part (A1, with anz1 = nnz (A1) entries) overlaps with T.
410         // If anz1 is zero, or small compared to anz0, then it is faster to
411         // leave A0 unmodified, and to update just A1.
412 
413         // TODO: if A also had zombies, GB_selector could pad A so that
414         // A->nzmax = anz + tnz.
415 
416         // make sure A has enough space for the new tuples
417         if (anz_new > A->nzmax)
418         {
419             // double the size if not enough space
420             GB_OK (GB_ix_resize (A, anz_new, Context)) ;
421             Ai = A->i ;
422             Ax = (GB_void *) A->x ;
423         }
424 
425         //----------------------------------------------------------------------
426         // T = A1 + T
427         //----------------------------------------------------------------------
428 
429         if (anz1 > 0)
430         {
431 
432             //------------------------------------------------------------------
433             // extract A1 = A (:, kA:end) as a shallow copy
434             //------------------------------------------------------------------
435 
436             // A1 = [0, A (:, kA:end)], hypersparse with same dimensions as A
437             A1 = GB_clear_static_header (&A1_header) ;
438             GB_OK (GB_new (&A1, true, // hyper, static header
439                 A->type, A->vlen, A->vdim, GB_Ap_malloc, A->is_csc,
440                 GxB_HYPERSPARSE, GB_ALWAYS_HYPER, anvec - kA, Context)) ;
441 
442             // the A1->i and A1->x content are shallow copies of A(:,kA:end).
443             // They are not allocated pointers, but point to space inside
444             // Ai and Ax.
445             A1->x = (void *) (Ax + asize * anz0) ;
446             A1->x_size = 0 ;        // A1->x is not a pointer to malloc'd block
447             A1->i = Ai + anz0 ;
448             A1->i_size = 0 ;        // A1->i is not a pointer to malloc'd block
449             A1->x_shallow = true ;
450             A1->i_shallow = true ;
451             A1->nzmax = anz1 ;
452 
453             // fill the column A1->h and A1->p with A->h and A->p, shifted
454             int64_t *restrict A1p = A1->p ;
455             int64_t *restrict A1h = A1->h ;
456             int64_t a1nvec = 0 ;
457             for (int64_t k = kA ; k < anvec ; k++)
458             {
459                 // get A (:,k)
460                 int64_t pA_start = Ap [k] ;
461                 int64_t pA_end = Ap [k+1] ;
462                 if (pA_end > pA_start)
463                 {
464                     // add this column to A1 if A (:,k) is not empty
465                     int64_t j = GBH (Ah, k) ;
466                     A1p [a1nvec] = pA_start - anz0 ;
467                     A1h [a1nvec] = j ;
468                     a1nvec++ ;
469                 }
470             }
471 
472             // finalize A1
473             A1p [a1nvec] = anz1 ;
474             A1->nvec = a1nvec ;
475             A1->nvec_nonempty = a1nvec ;
476             A1->magic = GB_MAGIC ;
477 
478             ASSERT_MATRIX_OK (A1, "A1 slice for GB_Matrix_wait", GB0) ;
479 
480             //------------------------------------------------------------------
481             // S = A1 + T, with no operator or mask
482             //------------------------------------------------------------------
483 
484             GB_OK (GB_add (S, A->type, A->is_csc, NULL, 0, 0, &ignore,
485                 A1, T, NULL, Context)) ;
486 
487             ASSERT_MATRIX_OK (S, "S = A1+T", GB0) ;
488 
489             // free A1 and T
490             GB_phbix_free (T) ;
491             GB_phbix_free (A1) ;
492 
493             //------------------------------------------------------------------
494             // replace T with S
495             //------------------------------------------------------------------
496 
497             T = S ;
498             S = NULL ;
499             tnz = GB_NNZ (T) ;
500 
501             //------------------------------------------------------------------
502             // remove A1 from the vectors of A, if A is hypersparse
503             //------------------------------------------------------------------
504 
505             if (A->h != NULL)
506             {
507                 A->nvec = kA ;
508             }
509         }
510 
511         //----------------------------------------------------------------------
512         // append T to the end of A0
513         //----------------------------------------------------------------------
514 
515         const int64_t *restrict Tp = T->p ;
516         const int64_t *restrict Th = T->h ;
517         const int64_t *restrict Ti = T->i ;
518         const GB_void *restrict Tx = (GB_void *) T->x ;
519         int64_t tnvec = T->nvec ;
520 
521         anz = anz0 ;
522         int64_t anz_last = anz ;
523 
524         int nthreads = GB_nthreads (tnz, chunk, nthreads_max) ;
525 
526         // append the indices and values of T to the end of A
527         GB_memcpy (Ai + anz        , Ti, tnz * sizeof (int64_t), nthreads) ;
528         GB_memcpy (Ax + anz * asize, Tx, tnz * asize           , nthreads) ;
529 
530         // append the vectors of T to the end of A
531         for (int64_t k = 0 ; k < tnvec ; k++)
532         {
533             int64_t j = Th [k] ;
534             ASSERT (j >= tjfirst) ;
535             anz += (Tp [k+1] - Tp [k]) ;
536             GB_OK (GB_jappend (A, j, &jlast, anz, &anz_last, Context)) ;
537         }
538 
539         GB_jwrapup (A, jlast, anz) ;
540         ASSERT (anz == anz_new) ;
541 
542         // need to recompute the # of non-empty vectors in GB_conform
543         A->nvec_nonempty = -1 ;     // recomputed just below
544 
545         ASSERT_MATRIX_OK (A, "A after GB_Matrix_wait:append", GB0) ;
546 
547         GB_phbix_free (T) ;
548 
549         // conform A to its desired sparsity structure
550         info = GB_conform (A, Context) ;
551 
552     }
553     else
554     {
555 
556         //----------------------------------------------------------------------
557         // A = A+T
558         //----------------------------------------------------------------------
559 
560         // The update is not incremental since most of A is changing.  Just do
561         // a single parallel add: S=A+T, free T, and then transplant S back
562         // into A.  The nzmax of A is tight, with no room for future
563         // incremental growth.
564 
565         // FUTURE:: if GB_add could tolerate zombies in A, then the initial
566         // prune of zombies can be skipped.
567 
568         GB_OK (GB_add (S, A->type, A->is_csc, NULL, 0, 0, &ignore, A, T, NULL,
569             Context)) ;
570         GB_phbix_free (T) ;
571         ASSERT_MATRIX_OK (S, "S after GB_Matrix_wait:add", GB0) ;
572         info = GB_transplant_conform (A, A->type, &S, Context) ;
573     }
574 
575     //--------------------------------------------------------------------------
576     // flush the matrix and return result
577     //--------------------------------------------------------------------------
578 
579     #pragma omp flush
580     return (info) ;
581 }
582 
583