1 //------------------------------------------------------------------------------
2 // GB_add_phase0: find vectors of C to compute for C=A+B or C<M>=A+B
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // The eWise add of two matrices, C=A+B, C<M>=A+B, or C<!M>=A+B starts with
11 // this phase, which determines which vectors of C need to be computed.
12 // This phase is also used for GB_masker, and for GB_SUBASSIGN_TWO_SLICE.
13 
14 // On input, A and B are the two matrices being added, and M is the optional
15 // mask matrix (not complemented).  The complemented mask is handed in GB_mask,
16 // not here.
17 
18 // On output, an integer (Cnvec) a boolean (Ch_to_Mh) and up to 3 arrays are
19 // returned, either NULL or of size Cnvec.  Let n = A->vdim be the vector
20 // dimension of A, B, M and C.
21 
22 //      Ch:  the list of vectors to compute.  If not NULL, Ch [k] = j is the
23 //      kth vector in C to compute, which will become the hyperlist C->h of C.
24 //      Note that some of these vectors may turn out to be empty, because of
25 //      the mask, or because the vector j appeared in A or B, but is empty.
26 //      It is pruned at the end of GB_add_phase2.  If Ch is NULL then it is an
27 //      implicit list of size n, and Ch [k] == k for all k = 0:n-1.  In this
28 //      case, C will be a sparse matrix, not hypersparse.  Thus, the kth
29 //      vector is j = GBH (Ch, k).
30 
31 //      Ch is freed by GB_add if phase1 fails.  phase2 either frees it or
32 //      transplants it into C, if C is hypersparse.
33 
34 //      Ch_is_Mh:  true if the mask M is present, hypersparse, and not
35 //      complemented, false otherwise.  In this case Ch is a deep copy of Mh.
36 //      Only GB_add uses this option; it is not used by GB_masker or
37 //      GB_SUBASSIGN_TWO_SLICE (Ch_is_Mh is always false in this case).  This
38 //      is determined by passing in p_Ch_is_Mh as a NULL or non-NULL pointer.
39 
40 //      C_to_A:  if A is hypersparse, then C_to_A [k] = kA if the kth vector,
41 //      j = GBH (Ch, k) appears in A, as j = Ah [kA].  If j does not appear in
42 //      A, then C_to_A [k] = -1.  If A is not hypersparse, then C_to_A is
43 //      returned as NULL.
44 
45 //      C_to_B:  if B is hypersparse, then C_to_B [k] = kB if the kth vector,
46 //      j = GBH (Ch, k) appears in B, as j = Bh [kB].  If j does not appear in
47 //      B, then C_to_B [k] = -1.  If B is not hypersparse, then C_to_B is
48 //      returned as NULL.
49 
50 //      C_to_M:  if M is hypersparse, and Ch_is_Mh is false, then C_to_M [k] =
51 //      kM if the kth vector, j = GBH (Ch, k) appears in M, as j = Mh [kM].  If
52 //      j does not appear in M, then C_to_M [k] = -1.  If M is not hypersparse,
53 //      then C_to_M is returned as NULL.
54 
55 // M, A, B: any sparsity structure (hypersparse, sparse, bitmap, or full)
56 // C: not present here, but its sparsity structure is finalized, via the
57 // input/output parameter C_sparsity.
58 
59 #include "GB_add.h"
60 
61 #define GB_FREE_WORK                \
62 {                                   \
63     GB_WERK_POP (Work, int64_t) ;   \
64 }
65 
66 //------------------------------------------------------------------------------
67 // GB_allocate_result
68 //------------------------------------------------------------------------------
69 
GB_allocate_result(int64_t Cnvec,int64_t * restrict * Ch_handle,size_t * Ch_size_handle,int64_t * restrict * C_to_M_handle,size_t * C_to_M_size_handle,int64_t * restrict * C_to_A_handle,size_t * C_to_A_size_handle,int64_t * restrict * C_to_B_handle,size_t * C_to_B_size_handle)70 static inline bool GB_allocate_result
71 (
72     int64_t Cnvec,
73     int64_t *restrict *Ch_handle,        size_t *Ch_size_handle,
74     int64_t *restrict *C_to_M_handle,    size_t *C_to_M_size_handle,
75     int64_t *restrict *C_to_A_handle,    size_t *C_to_A_size_handle,
76     int64_t *restrict *C_to_B_handle,    size_t *C_to_B_size_handle
77 )
78 {
79     bool ok = true ;
80     if (Ch_handle != NULL)
81     {
82         (*Ch_handle) = GB_MALLOC (Cnvec, int64_t, Ch_size_handle) ;
83         ok = (*Ch_handle != NULL) ;
84     }
85     if (C_to_M_handle != NULL)
86     {
87         (*C_to_M_handle) = GB_MALLOC_WERK (Cnvec, int64_t, C_to_M_size_handle) ;
88         ok = ok && (*C_to_M_handle != NULL) ;
89     }
90     if (C_to_A_handle != NULL)
91     {
92         *C_to_A_handle = GB_MALLOC_WERK (Cnvec, int64_t, C_to_A_size_handle) ;
93         ok = ok && (*C_to_A_handle != NULL) ;
94     }
95     if (C_to_B_handle != NULL)
96     {
97         *C_to_B_handle = GB_MALLOC_WERK (Cnvec, int64_t, C_to_B_size_handle) ;
98         ok = ok && (*C_to_B_handle != NULL) ;
99     }
100 
101     if (!ok)
102     {
103         // out of memory
104         if (Ch_handle != NULL)
105         {
106             GB_FREE (Ch_handle, *Ch_size_handle) ;
107         }
108         if (C_to_M_handle != NULL)
109         {
110             GB_FREE_WERK (C_to_M_handle, *C_to_M_size_handle) ;
111         }
112         if (C_to_A_handle != NULL)
113         {
114             GB_FREE_WERK (C_to_A_handle, *C_to_A_size_handle) ;
115         }
116         if (C_to_B_handle != NULL)
117         {
118             GB_FREE_WERK (C_to_B_handle, *C_to_B_size_handle) ;
119         }
120     }
121     return (ok) ;
122 }
123 
124 //------------------------------------------------------------------------------
125 // GB_add_phase0:  find the vectors of C for C<M>=A+B
126 //------------------------------------------------------------------------------
127 
GB_add_phase0(int64_t * p_Cnvec,int64_t * restrict * Ch_handle,size_t * Ch_size_handle,int64_t * restrict * C_to_M_handle,size_t * C_to_M_size_handle,int64_t * restrict * C_to_A_handle,size_t * C_to_A_size_handle,int64_t * restrict * C_to_B_handle,size_t * C_to_B_size_handle,bool * p_Ch_is_Mh,int * C_sparsity,const GrB_Matrix M,const GrB_Matrix A,const GrB_Matrix B,GB_Context Context)128 GrB_Info GB_add_phase0          // find vectors in C for C=A+B or C<M>=A+B
129 (
130     int64_t *p_Cnvec,           // # of vectors to compute in C
131     int64_t *restrict *Ch_handle,        // Ch: size Cnvec, or NULL
132     size_t *Ch_size_handle,                 // size of Ch in bytes
133     int64_t *restrict *C_to_M_handle,    // C_to_M: size Cnvec, or NULL
134     size_t *C_to_M_size_handle,             // size of C_to_M in bytes
135     int64_t *restrict *C_to_A_handle,    // C_to_A: size Cnvec, or NULL
136     size_t *C_to_A_size_handle,             // size of C_to_A in bytes
137     int64_t *restrict *C_to_B_handle,    // C_to_B: of size Cnvec, or NULL
138     size_t *C_to_B_size_handle,             // size of C_to_A in bytes
139     bool *p_Ch_is_Mh,           // if true, then Ch == Mh
140     int *C_sparsity,            // sparsity structure of C
141     const GrB_Matrix M,         // optional mask, may be NULL; not complemented
142     const GrB_Matrix A,         // first input matrix
143     const GrB_Matrix B,         // second input matrix
144     GB_Context Context
145 )
146 {
147 
148     //--------------------------------------------------------------------------
149     // check inputs
150     //--------------------------------------------------------------------------
151 
152     // M, A, and B can be jumbled for this phase, but not phase1 or phase2
153 
154     ASSERT (p_Cnvec != NULL) ;
155     ASSERT (Ch_handle != NULL) ;
156     ASSERT (C_to_A_handle != NULL) ;
157     ASSERT (C_to_B_handle != NULL) ;
158 
159     ASSERT_MATRIX_OK_OR_NULL (M, "M for add phase0", GB0) ;
160     ASSERT (!GB_ZOMBIES (M)) ;
161     ASSERT (GB_JUMBLED_OK (M)) ;        // pattern not accessed
162     ASSERT (!GB_PENDING (M)) ;
163 
164     ASSERT_MATRIX_OK (A, "A for add phase0", GB0) ;
165     ASSERT (!GB_ZOMBIES (A)) ;
166     ASSERT (GB_JUMBLED_OK (B)) ;        // pattern not accessed
167     ASSERT (!GB_PENDING (A)) ;
168 
169     ASSERT_MATRIX_OK (B, "B for add phase0", GB0) ;
170     ASSERT (!GB_ZOMBIES (B)) ;
171     ASSERT (GB_JUMBLED_OK (A)) ;        // pattern not accessed
172     ASSERT (!GB_PENDING (B)) ;
173 
174     ASSERT (A->vdim == B->vdim) ;
175     ASSERT (A->vlen == B->vlen) ;
176     ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim)) ;
177     ASSERT (GB_IMPLIES (M != NULL, A->vlen == M->vlen)) ;
178 
179     //--------------------------------------------------------------------------
180     // initializations
181     //--------------------------------------------------------------------------
182 
183     (*p_Cnvec) = 0 ;
184     (*Ch_handle) = NULL ;
185     if (C_to_M_handle != NULL)
186     {
187         (*C_to_M_handle) = NULL ;
188     }
189     (*C_to_A_handle) = NULL ;
190     (*C_to_B_handle) = NULL ;
191     if (p_Ch_is_Mh != NULL)
192     {
193         (*p_Ch_is_Mh) = false ;
194     }
195 
196     if ((*C_sparsity) == GxB_BITMAP || (*C_sparsity) == GxB_FULL)
197     {
198         // nothing to do in phase0 for C bitmap or full
199         (*p_Cnvec) = A->vdim ;  // not needed; to be consistent with GB_emult
200         return (GrB_SUCCESS) ;
201     }
202 
203     int64_t *restrict Ch     = NULL ; size_t Ch_size = 0 ;
204     int64_t *restrict C_to_M = NULL ; size_t C_to_M_size = 0 ;
205     int64_t *restrict C_to_A = NULL ; size_t C_to_A_size = 0 ;
206     int64_t *restrict C_to_B = NULL ; size_t C_to_B_size = 0 ;
207 
208     GB_WERK_DECLARE (Work, int64_t) ;
209     int ntasks = 0 ;
210 
211     //--------------------------------------------------------------------------
212     // determine the number of threads to use
213     //--------------------------------------------------------------------------
214 
215     GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
216     int nthreads = 1 ;      // nthreads depends on Cnvec, computed below
217 
218     //--------------------------------------------------------------------------
219     // get content of M, A, and B
220     //--------------------------------------------------------------------------
221 
222     int64_t Cnvec ;
223 
224     int64_t n = A->vdim ;
225     int64_t Anvec = A->nvec ;
226     int64_t vlen = A->vlen ;
227     const int64_t *restrict Ap = A->p ;
228     const int64_t *restrict Ah = A->h ;
229     bool A_is_hyper = (Ah != NULL) ;
230     #define GB_Ah(k) (A_is_hyper ? Ah [k] : (k))
231 
232     int64_t Bnvec = B->nvec ;
233     const int64_t *restrict Bp = B->p ;
234     const int64_t *restrict Bh = B->h ;
235     bool B_is_hyper = (Bh != NULL) ;
236 
237     int64_t Mnvec = 0 ;
238     const int64_t *restrict Mp = NULL ;
239     const int64_t *restrict Mh = NULL ;
240     bool M_is_hyper = GB_IS_HYPERSPARSE (M) ;
241     if (M != NULL)
242     {
243         Mnvec = M->nvec ;
244         Mp = M->p ;
245         Mh = M->h ;
246     }
247 
248     // For GB_add, if M is present, hypersparse, and not complemented, then C
249     // will be hypersparse, and it will have set of vectors as M (Ch == Mh).
250     // For GB_masker, Ch is never equal to Mh.
251     bool Ch_is_Mh = (p_Ch_is_Mh != NULL) && (M != NULL && M_is_hyper) ;
252 
253     //--------------------------------------------------------------------------
254     // find the set union of the non-empty vectors of A and B
255     //--------------------------------------------------------------------------
256 
257     if (Ch_is_Mh)
258     {
259 
260         //----------------------------------------------------------------------
261         // C and M are hypersparse, with the same vectors as the hypersparse M
262         //----------------------------------------------------------------------
263 
264         (*C_sparsity) = GxB_HYPERSPARSE ;
265         ASSERT (M_is_hyper) ;
266         Cnvec = Mnvec ;
267         nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
268 
269         if (!GB_allocate_result (Cnvec,
270             &Ch,    &Ch_size,
271             NULL,   NULL,
272             (A_is_hyper) ? (&C_to_A) : NULL, &C_to_A_size,
273             (B_is_hyper) ? (&C_to_B) : NULL, &C_to_B_size))
274         {
275             // out of memory
276             GB_FREE_WORK ;
277             return (GrB_OUT_OF_MEMORY) ;
278         }
279 
280         // copy Mh into Ch.  Ch is Mh so C_to_M is not needed.
281         GB_memcpy (Ch, Mh, Mnvec * sizeof (int64_t), nthreads) ;
282 
283         // construct the mapping from C to A and B, if they are hypersparse
284         if (A_is_hyper || B_is_hyper)
285         {
286             int64_t k ;
287             #pragma omp parallel for num_threads(nthreads) schedule(static)
288             for (k = 0 ; k < Cnvec ; k++)
289             {
290                 int64_t j = Ch [k] ;
291                 if (A_is_hyper)
292                 {
293                     // C_to_A [k] = kA if Ah [kA] == j and A(:,j) is non-empty
294                     int64_t kA = 0, pA, pA_end ;
295                     GB_lookup (true, Ah, Ap, vlen, &kA, Anvec-1, j,
296                         &pA, &pA_end) ;
297                     C_to_A [k] = (pA < pA_end) ? kA : -1 ;
298                 }
299                 if (B_is_hyper)
300                 {
301                     // C_to_B [k] = kB if Bh [kB] == j and B(:,j) is non-empty
302                     int64_t kB = 0, pB, pB_end ;
303                     GB_lookup (true, Bh, Bp, vlen, &kB, Bnvec-1, j,
304                         &pB, &pB_end) ;
305                     C_to_B [k] = (pB < pB_end) ? kB : -1 ;
306                 }
307             }
308         }
309 
310     }
311     else if (A_is_hyper && B_is_hyper)
312     {
313 
314         //----------------------------------------------------------------------
315         // A and B are hypersparse: C will be hypersparse
316         //----------------------------------------------------------------------
317 
318         // Ch is the set union of Ah and Bh.  This is handled with a parallel
319         // merge, since Ah and Bh are both sorted lists.
320 
321         (*C_sparsity) = GxB_HYPERSPARSE ;
322 
323         //----------------------------------------------------------------------
324         // create the tasks to construct Ch
325         //----------------------------------------------------------------------
326 
327         double work = GB_IMIN (Anvec + Bnvec, n) ;
328         nthreads = GB_nthreads (work, chunk, nthreads_max) ;
329 
330         ntasks = (nthreads == 1) ? 1 : (64 * nthreads) ;
331         ntasks = GB_IMIN (ntasks, work) ;
332 
333         // allocate workspace
334         GB_WERK_PUSH (Work, 3*(ntasks+1), int64_t) ;
335         if (Work == NULL)
336         {
337             // out of memory
338             GB_FREE_WORK ;
339             return (GrB_OUT_OF_MEMORY) ;
340         }
341         int64_t *restrict kA_start = Work ;
342         int64_t *restrict kB_start = Work + (ntasks+1) ;
343         int64_t *restrict kC_start = Work + (ntasks+1)*2 ;
344 
345         kA_start [0] = (Anvec == 0) ? -1 : 0 ;
346         kB_start [0] = (Bnvec == 0) ? -1 : 0 ;
347         kA_start [ntasks] = (Anvec == 0) ? -1 : Anvec ;
348         kB_start [ntasks] = (Bnvec == 0) ? -1 : Bnvec ;
349 
350         for (int taskid = 1 ; taskid < ntasks ; taskid++)
351         {
352             // create tasks: A and B are both hyper
353             double target_work = ((ntasks-taskid) * work) / ntasks ;
354             GB_slice_vector (NULL, NULL,
355                 &(kA_start [taskid]), &(kB_start [taskid]),
356                 0, 0, NULL,                 // Mi not present
357                 0, Anvec, Ah,               // Ah, explicit list
358                 0, Bnvec, Bh,               // Bh, explicit list
359                 n,                          // Ah and Bh have dimension n
360                 target_work) ;
361         }
362 
363         //----------------------------------------------------------------------
364         // count the entries in Ch for each task
365         //----------------------------------------------------------------------
366 
367         int taskid ;
368         #pragma omp parallel for num_threads(nthreads) schedule (dynamic,1)
369         for (taskid = 0 ; taskid < ntasks ; taskid++)
370         {
371             // merge Ah and Bh into Ch
372             int64_t kA = kA_start [taskid] ;
373             int64_t kB = kB_start [taskid] ;
374             int64_t kA_end = kA_start [taskid+1] ;
375             int64_t kB_end = kB_start [taskid+1] ;
376             int64_t kC = 0 ;
377             for ( ; kA < kA_end && kB < kB_end ; kC++)
378             {
379                 int64_t jA = GB_Ah (kA) ;
380                 int64_t jB = Bh [kB] ;
381                 if (jA < jB)
382                 {
383                     // jA appears in A but not B
384                     kA++ ;
385                 }
386                 else if (jB < jA)
387                 {
388                     // jB appears in B but not A
389                     kB++ ;
390                 }
391                 else
392                 {
393                     // j = jA = jB appears in both A and B
394                     kA++ ;
395                     kB++ ;
396                 }
397             }
398             kC_start [taskid] = kC + (kA_end - kA) + (kB_end - kB) ;
399         }
400 
401         //----------------------------------------------------------------------
402         // cumulative sum of entries in Ch for each task
403         //----------------------------------------------------------------------
404 
405         GB_cumsum (kC_start, ntasks, NULL, 1, NULL) ;
406         Cnvec = kC_start [ntasks] ;
407 
408         //----------------------------------------------------------------------
409         // allocate the result: Ch and the mappings C_to_[MAB]
410         //----------------------------------------------------------------------
411 
412         // C will be hypersparse, so Ch is allocated.  The mask M is ignored
413         // for computing Ch.  Ch is the set union of Ah and Bh.
414 
415         if (!GB_allocate_result (Cnvec,
416             &Ch,    &Ch_size,
417             (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
418             &C_to_A, &C_to_A_size,
419             &C_to_B, &C_to_B_size))
420         {
421             // out of memory
422             GB_FREE_WORK ;
423             return (GrB_OUT_OF_MEMORY) ;
424         }
425 
426         //----------------------------------------------------------------------
427         // compute the result: Ch and the mappings C_to_[AB]
428         //----------------------------------------------------------------------
429 
430         #pragma omp parallel for num_threads(nthreads) schedule (dynamic,1)
431         for (taskid = 0 ; taskid < ntasks ; taskid++)
432         {
433             // merge Ah and Bh into Ch
434             int64_t kA = kA_start [taskid] ;
435             int64_t kB = kB_start [taskid] ;
436             int64_t kC = kC_start [taskid] ;
437             int64_t kA_end = kA_start [taskid+1] ;
438             int64_t kB_end = kB_start [taskid+1] ;
439 
440             // merge Ah and Bh into Ch
441             for ( ; kA < kA_end && kB < kB_end ; kC++)
442             {
443                 int64_t jA = GB_Ah (kA) ;
444                 int64_t jB = Bh [kB] ;
445                 if (jA < jB)
446                 {
447                     // append jA to Ch
448                     Ch     [kC] = jA ;
449                     C_to_A [kC] = kA++ ;
450                     C_to_B [kC] = -1 ;       // jA does not appear in B
451                 }
452                 else if (jB < jA)
453                 {
454                     // append jB to Ch
455                     Ch     [kC] = jB ;
456                     C_to_A [kC] = -1 ;       // jB does not appear in A
457                     C_to_B [kC] = kB++ ;
458                 }
459                 else
460                 {
461                     // j appears in both A and B; append it to Ch
462                     Ch     [kC] = jA ;
463                     C_to_A [kC] = kA++ ;
464                     C_to_B [kC] = kB++ ;
465                 }
466             }
467             if (kA < kA_end)
468             {
469                 // B is exhausted but A is not
470                 for ( ; kA < kA_end ; kA++, kC++)
471                 {
472                     // append jA to Ch
473                     int64_t jA = GB_Ah (kA) ;
474                     Ch     [kC] = jA ;
475                     C_to_A [kC] = kA ;
476                     C_to_B [kC] = -1 ;
477                 }
478             }
479             else if (kB < kB_end)
480             {
481                 // A is exhausted but B is not
482                 for ( ; kB < kB_end ; kB++, kC++)
483                 {
484                     // append jB to Ch
485                     int64_t jB = Bh [kB] ;
486                     Ch     [kC] = jB ;
487                     C_to_A [kC] = -1 ;
488                     C_to_B [kC] = kB ;
489                 }
490             }
491             ASSERT (kC == kC_start [taskid+1]) ;
492         }
493 
494         //----------------------------------------------------------------------
495         // check result via a sequential merge
496         //----------------------------------------------------------------------
497 
498         #ifdef GB_DEBUG
499         // merge Ah and Bh into Ch
500         int64_t kA = 0 ;
501         int64_t kB = 0 ;
502         int64_t kC = 0 ;
503         for ( ; kA < Anvec && kB < Bnvec ; kC++)
504         {
505             int64_t jA = GB_Ah (kA) ;
506             int64_t jB = Bh [kB] ;
507             if (jA < jB)
508             {
509                 // append jA to Ch
510                 ASSERT (Ch     [kC] == jA) ;
511                 ASSERT (C_to_A [kC] == kA) ; kA++ ;
512                 ASSERT (C_to_B [kC] == -1) ;      // jA does not appear in B
513             }
514             else if (jB < jA)
515             {
516                 // append jB to Ch
517                 ASSERT (Ch     [kC] == jB) ;
518                 ASSERT (C_to_A [kC] == -1) ;       // jB does not appear in A
519                 ASSERT (C_to_B [kC] == kB) ; kB++ ;
520             }
521             else
522             {
523                 // j appears in both A and B; append it to Ch
524                 ASSERT (Ch     [kC] == jA) ;
525                 ASSERT (C_to_A [kC] == kA) ; kA++ ;
526                 ASSERT (C_to_B [kC] == kB) ; kB++ ;
527             }
528         }
529         if (kA < Anvec)
530         {
531             // B is exhausted but A is not
532             for ( ; kA < Anvec ; kA++, kC++)
533             {
534                 // append jA to Ch
535                 int64_t jA = GB_Ah (kA) ;
536                 ASSERT (Ch     [kC] == jA) ;
537                 ASSERT (C_to_A [kC] == kA) ;
538                 ASSERT (C_to_B [kC] == -1) ;
539             }
540         }
541         else if (kB < Bnvec)
542         {
543             // A is exhausted but B is not
544             for ( ; kB < Bnvec ; kB++, kC++)
545             {
546                 // append jB to Ch
547                 int64_t jB = Bh [kB] ;
548                 ASSERT (Ch     [kC] == jB) ;
549                 ASSERT (C_to_A [kC] == -1) ;
550                 ASSERT (C_to_B [kC] == kB) ;
551             }
552         }
553         ASSERT (kC == Cnvec) ;
554         #endif
555 
556     }
557     else if (A_is_hyper && !B_is_hyper)
558     {
559 
560         //----------------------------------------------------------------------
561         // A is hypersparse, B is not hypersparse
562         //----------------------------------------------------------------------
563 
564         // C will be sparse.  Construct the C_to_A mapping.
565 
566         Cnvec = n ;
567         nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
568 
569         if (!GB_allocate_result (Cnvec,
570             NULL, NULL,
571             (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
572             &C_to_A, &C_to_A_size,
573             NULL, NULL))
574         {
575             // out of memory
576             GB_FREE_WORK ;
577             return (GrB_OUT_OF_MEMORY) ;
578         }
579 
580         int64_t j ;
581         #pragma omp parallel for num_threads(nthreads) schedule(static)
582         for (j = 0 ; j < n ; j++)
583         {
584             C_to_A [j] = -1 ;
585         }
586 
587         // scatter Ah into C_to_A
588         int64_t kA ;
589         #pragma omp parallel for num_threads(nthreads) schedule(static)
590         for (kA = 0 ; kA < Anvec ; kA++)
591         {
592             int64_t jA = GB_Ah (kA) ;
593             C_to_A [jA] = kA ;
594         }
595 
596     }
597     else if (!A_is_hyper && B_is_hyper)
598     {
599 
600         //----------------------------------------------------------------------
601         // A is not hypersparse, B is hypersparse
602         //----------------------------------------------------------------------
603 
604         // C will be sparse.  Construct the C_to_B mapping.
605 
606         Cnvec = n ;
607         nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
608 
609         if (!GB_allocate_result (Cnvec,
610             NULL, NULL,
611             (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
612             NULL, NULL,
613             &C_to_B, &C_to_B_size))
614         {
615             // out of memory
616             GB_FREE_WORK ;
617             return (GrB_OUT_OF_MEMORY) ;
618         }
619 
620         int64_t j ;
621         #pragma omp parallel for num_threads(nthreads) schedule(static)
622         for (j = 0 ; j < n ; j++)
623         {
624             C_to_B [j] = -1 ;
625         }
626 
627         // scatter Bh into C_to_B
628         int64_t kB ;
629         #pragma omp parallel for num_threads(nthreads) schedule(static)
630         for (kB = 0 ; kB < Bnvec ; kB++)
631         {
632             int64_t jB = Bh [kB] ;
633             C_to_B [jB] = kB ;
634         }
635 
636     }
637     else
638     {
639 
640         //----------------------------------------------------------------------
641         // A and B are both non-hypersparse
642         //----------------------------------------------------------------------
643 
644         // C will be sparse
645         Cnvec = n ;
646         nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
647 
648         if (!GB_allocate_result (Cnvec,
649             NULL, NULL,
650             (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
651             NULL, NULL,
652             NULL, NULL))
653         {
654             // out of memory
655             GB_FREE_WORK ;
656             return (GrB_OUT_OF_MEMORY) ;
657         }
658     }
659 
660     //--------------------------------------------------------------------------
661     // construct C_to_M if needed, if M is hypersparse
662     //--------------------------------------------------------------------------
663 
664     if (C_to_M != NULL)
665     {
666         ASSERT (M_is_hyper) ;
667         if (Ch != NULL)
668         {
669             // C is hypersparse
670             int64_t k ;
671             #pragma omp parallel for num_threads(nthreads) schedule(static)
672             for (k = 0 ; k < Cnvec ; k++)
673             {
674                 int64_t j = Ch [k] ;
675                 // C_to_M [k] = kM if Mh [kM] == j and M(:,j) is non-empty
676                 int64_t kM = 0, pM, pM_end ;
677                 GB_lookup (true, Mh, Mp, vlen, &kM, Mnvec-1, j, &pM, &pM_end) ;
678                 C_to_M [k] = (pM < pM_end) ? kM : -1 ;
679             }
680         }
681         else
682         {
683             // C is sparse
684             int64_t j ;
685             #pragma omp parallel for num_threads(nthreads) schedule(static)
686             for (j = 0 ; j < n ; j++)
687             {
688                 C_to_M [j] = -1 ;
689             }
690             // scatter Mh into C_to_M
691             int64_t kM ;
692             #pragma omp parallel for num_threads(nthreads) schedule(static)
693             for (kM = 0 ; kM < Mnvec ; kM++)
694             {
695                 int64_t jM = Mh [kM] ;
696                 C_to_M [jM] = kM ;
697             }
698         }
699     }
700 
701     //--------------------------------------------------------------------------
702     // return result
703     //--------------------------------------------------------------------------
704 
705     (*p_Cnvec) = Cnvec ;
706     (*Ch_handle) = Ch ;             (*Ch_size_handle) = Ch_size ;
707     if (C_to_M_handle != NULL)
708     {
709         (*C_to_M_handle) = C_to_M ; (*C_to_M_size_handle) = C_to_M_size ;
710     }
711     (*C_to_A_handle) = C_to_A ;     (*C_to_A_size_handle) = C_to_A_size ;
712     (*C_to_B_handle) = C_to_B ;     (*C_to_B_size_handle) = C_to_B_size ;
713     if (p_Ch_is_Mh != NULL)
714     {
715         // return Ch_is_Mh to GB_add.  For GB_masker, Ch is never Mh.
716         (*p_Ch_is_Mh) = Ch_is_Mh ;
717     }
718 
719     //--------------------------------------------------------------------------
720     // The code below describes what the output contains:
721     //--------------------------------------------------------------------------
722 
723     #ifdef GB_DEBUG
724     // the mappings are only constructed when C is sparse or hypersparse
725     ASSERT ((*C_sparsity) == GxB_SPARSE || (*C_sparsity) == GxB_HYPERSPARSE) ;
726     ASSERT (A != NULL) ;        // A and B are always present
727     ASSERT (B != NULL) ;
728     int64_t jlast = -1 ;
729     for (int64_t k = 0 ; k < Cnvec ; k++)
730     {
731 
732         // C(:,j) is in the list, as the kth vector
733         int64_t j ;
734         if (Ch == NULL)
735         {
736             // C will be constructed as sparse
737             ASSERT ((*C_sparsity) == GxB_SPARSE) ;
738             j = k ;
739         }
740         else
741         {
742             // C will be constructed as hypersparse
743             ASSERT ((*C_sparsity) == GxB_HYPERSPARSE) ;
744             j = Ch [k] ;
745         }
746 
747         // vectors j in Ch are sorted, and in the range 0:n-1
748         ASSERT (j >= 0 && j < n) ;
749         ASSERT (j > jlast) ;
750         jlast = j ;
751 
752         // see if A (:,j) exists
753         if (C_to_A != NULL)
754         {
755             // A is hypersparse
756             ASSERT (A_is_hyper) ;
757             int64_t kA = C_to_A [k] ;
758             ASSERT (kA >= -1 && kA < A->nvec) ;
759             if (kA >= 0)
760             {
761                 int64_t jA = GB_Ah (kA) ;
762                 ASSERT (j == jA) ;
763             }
764         }
765         else
766         {
767             // A is not hypersparse
768             // C_to_A exists only if A is hypersparse
769             ASSERT (!A_is_hyper) ;
770         }
771 
772         // see if B (:,j) exists
773         if (C_to_B != NULL)
774         {
775             // B is hypersparse
776             ASSERT (B_is_hyper) ;
777             int64_t kB = C_to_B [k] ;
778             ASSERT (kB >= -1 && kB < B->nvec) ;
779             if (kB >= 0)
780             {
781                 int64_t jB = B->h [kB] ;
782                 ASSERT (j == jB) ;
783             }
784         }
785         else
786         {
787             // B is not hypersparse
788             // C_to_B exists only if B is hypersparse
789             ASSERT (!B_is_hyper) ;
790         }
791 
792         // see if M (:,j) exists
793         if (Ch_is_Mh)
794         {
795             // Ch is the same as Mh
796             ASSERT (M != NULL) ;
797             ASSERT (M_is_hyper) ;
798             ASSERT (Ch != NULL && M->h != NULL && Ch [k] == M->h [k]) ;
799             ASSERT (C_to_M == NULL) ;
800         }
801         else if (C_to_M != NULL)
802         {
803             // M is present and hypersparse
804             ASSERT (M != NULL) ;
805             ASSERT (M_is_hyper) ;
806             int64_t kM = C_to_M [k] ;
807             ASSERT (kM >= -1 && kM < M->nvec) ;
808             if (kM >= 0)
809             {
810                 int64_t jM = M->h [kM] ;
811                 ASSERT (j == jM) ;
812             }
813         }
814         else
815         {
816             // M is not present, or present and sparse, bitmap or full
817             ASSERT (M == NULL || !M_is_hyper) ;
818         }
819     }
820     #endif
821 
822     //--------------------------------------------------------------------------
823     // free workspace and return result
824     //--------------------------------------------------------------------------
825 
826     GB_FREE_WORK ;
827     return (GrB_SUCCESS) ;
828 }
829 
830