1 //------------------------------------------------------------------------------
2 // GB_add_phase0: find vectors of C to compute for C=A+B or C<M>=A+B
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // The eWise add of two matrices, C=A+B, C<M>=A+B, or C<!M>=A+B starts with
11 // this phase, which determines which vectors of C need to be computed.
12 // This phase is also used for GB_masker, and for GB_SUBASSIGN_TWO_SLICE.
13
14 // On input, A and B are the two matrices being added, and M is the optional
15 // mask matrix (not complemented). The complemented mask is handed in GB_mask,
16 // not here.
17
18 // On output, an integer (Cnvec) a boolean (Ch_to_Mh) and up to 3 arrays are
19 // returned, either NULL or of size Cnvec. Let n = A->vdim be the vector
20 // dimension of A, B, M and C.
21
22 // Ch: the list of vectors to compute. If not NULL, Ch [k] = j is the
23 // kth vector in C to compute, which will become the hyperlist C->h of C.
24 // Note that some of these vectors may turn out to be empty, because of
25 // the mask, or because the vector j appeared in A or B, but is empty.
26 // It is pruned at the end of GB_add_phase2. If Ch is NULL then it is an
27 // implicit list of size n, and Ch [k] == k for all k = 0:n-1. In this
28 // case, C will be a sparse matrix, not hypersparse. Thus, the kth
29 // vector is j = GBH (Ch, k).
30
31 // Ch is freed by GB_add if phase1 fails. phase2 either frees it or
32 // transplants it into C, if C is hypersparse.
33
34 // Ch_is_Mh: true if the mask M is present, hypersparse, and not
35 // complemented, false otherwise. In this case Ch is a deep copy of Mh.
36 // Only GB_add uses this option; it is not used by GB_masker or
37 // GB_SUBASSIGN_TWO_SLICE (Ch_is_Mh is always false in this case). This
38 // is determined by passing in p_Ch_is_Mh as a NULL or non-NULL pointer.
39
40 // C_to_A: if A is hypersparse, then C_to_A [k] = kA if the kth vector,
41 // j = GBH (Ch, k) appears in A, as j = Ah [kA]. If j does not appear in
42 // A, then C_to_A [k] = -1. If A is not hypersparse, then C_to_A is
43 // returned as NULL.
44
45 // C_to_B: if B is hypersparse, then C_to_B [k] = kB if the kth vector,
46 // j = GBH (Ch, k) appears in B, as j = Bh [kB]. If j does not appear in
47 // B, then C_to_B [k] = -1. If B is not hypersparse, then C_to_B is
48 // returned as NULL.
49
50 // C_to_M: if M is hypersparse, and Ch_is_Mh is false, then C_to_M [k] =
51 // kM if the kth vector, j = GBH (Ch, k) appears in M, as j = Mh [kM]. If
52 // j does not appear in M, then C_to_M [k] = -1. If M is not hypersparse,
53 // then C_to_M is returned as NULL.
54
55 // M, A, B: any sparsity structure (hypersparse, sparse, bitmap, or full)
56 // C: not present here, but its sparsity structure is finalized, via the
57 // input/output parameter C_sparsity.
58
59 #include "GB_add.h"
60
61 #define GB_FREE_WORK \
62 { \
63 GB_WERK_POP (Work, int64_t) ; \
64 }
65
66 //------------------------------------------------------------------------------
67 // GB_allocate_result
68 //------------------------------------------------------------------------------
69
GB_allocate_result(int64_t Cnvec,int64_t * restrict * Ch_handle,size_t * Ch_size_handle,int64_t * restrict * C_to_M_handle,size_t * C_to_M_size_handle,int64_t * restrict * C_to_A_handle,size_t * C_to_A_size_handle,int64_t * restrict * C_to_B_handle,size_t * C_to_B_size_handle)70 static inline bool GB_allocate_result
71 (
72 int64_t Cnvec,
73 int64_t *restrict *Ch_handle, size_t *Ch_size_handle,
74 int64_t *restrict *C_to_M_handle, size_t *C_to_M_size_handle,
75 int64_t *restrict *C_to_A_handle, size_t *C_to_A_size_handle,
76 int64_t *restrict *C_to_B_handle, size_t *C_to_B_size_handle
77 )
78 {
79 bool ok = true ;
80 if (Ch_handle != NULL)
81 {
82 (*Ch_handle) = GB_MALLOC (Cnvec, int64_t, Ch_size_handle) ;
83 ok = (*Ch_handle != NULL) ;
84 }
85 if (C_to_M_handle != NULL)
86 {
87 (*C_to_M_handle) = GB_MALLOC_WERK (Cnvec, int64_t, C_to_M_size_handle) ;
88 ok = ok && (*C_to_M_handle != NULL) ;
89 }
90 if (C_to_A_handle != NULL)
91 {
92 *C_to_A_handle = GB_MALLOC_WERK (Cnvec, int64_t, C_to_A_size_handle) ;
93 ok = ok && (*C_to_A_handle != NULL) ;
94 }
95 if (C_to_B_handle != NULL)
96 {
97 *C_to_B_handle = GB_MALLOC_WERK (Cnvec, int64_t, C_to_B_size_handle) ;
98 ok = ok && (*C_to_B_handle != NULL) ;
99 }
100
101 if (!ok)
102 {
103 // out of memory
104 if (Ch_handle != NULL)
105 {
106 GB_FREE (Ch_handle, *Ch_size_handle) ;
107 }
108 if (C_to_M_handle != NULL)
109 {
110 GB_FREE_WERK (C_to_M_handle, *C_to_M_size_handle) ;
111 }
112 if (C_to_A_handle != NULL)
113 {
114 GB_FREE_WERK (C_to_A_handle, *C_to_A_size_handle) ;
115 }
116 if (C_to_B_handle != NULL)
117 {
118 GB_FREE_WERK (C_to_B_handle, *C_to_B_size_handle) ;
119 }
120 }
121 return (ok) ;
122 }
123
124 //------------------------------------------------------------------------------
125 // GB_add_phase0: find the vectors of C for C<M>=A+B
126 //------------------------------------------------------------------------------
127
GB_add_phase0(int64_t * p_Cnvec,int64_t * restrict * Ch_handle,size_t * Ch_size_handle,int64_t * restrict * C_to_M_handle,size_t * C_to_M_size_handle,int64_t * restrict * C_to_A_handle,size_t * C_to_A_size_handle,int64_t * restrict * C_to_B_handle,size_t * C_to_B_size_handle,bool * p_Ch_is_Mh,int * C_sparsity,const GrB_Matrix M,const GrB_Matrix A,const GrB_Matrix B,GB_Context Context)128 GrB_Info GB_add_phase0 // find vectors in C for C=A+B or C<M>=A+B
129 (
130 int64_t *p_Cnvec, // # of vectors to compute in C
131 int64_t *restrict *Ch_handle, // Ch: size Cnvec, or NULL
132 size_t *Ch_size_handle, // size of Ch in bytes
133 int64_t *restrict *C_to_M_handle, // C_to_M: size Cnvec, or NULL
134 size_t *C_to_M_size_handle, // size of C_to_M in bytes
135 int64_t *restrict *C_to_A_handle, // C_to_A: size Cnvec, or NULL
136 size_t *C_to_A_size_handle, // size of C_to_A in bytes
137 int64_t *restrict *C_to_B_handle, // C_to_B: of size Cnvec, or NULL
138 size_t *C_to_B_size_handle, // size of C_to_A in bytes
139 bool *p_Ch_is_Mh, // if true, then Ch == Mh
140 int *C_sparsity, // sparsity structure of C
141 const GrB_Matrix M, // optional mask, may be NULL; not complemented
142 const GrB_Matrix A, // first input matrix
143 const GrB_Matrix B, // second input matrix
144 GB_Context Context
145 )
146 {
147
148 //--------------------------------------------------------------------------
149 // check inputs
150 //--------------------------------------------------------------------------
151
152 // M, A, and B can be jumbled for this phase, but not phase1 or phase2
153
154 ASSERT (p_Cnvec != NULL) ;
155 ASSERT (Ch_handle != NULL) ;
156 ASSERT (C_to_A_handle != NULL) ;
157 ASSERT (C_to_B_handle != NULL) ;
158
159 ASSERT_MATRIX_OK_OR_NULL (M, "M for add phase0", GB0) ;
160 ASSERT (!GB_ZOMBIES (M)) ;
161 ASSERT (GB_JUMBLED_OK (M)) ; // pattern not accessed
162 ASSERT (!GB_PENDING (M)) ;
163
164 ASSERT_MATRIX_OK (A, "A for add phase0", GB0) ;
165 ASSERT (!GB_ZOMBIES (A)) ;
166 ASSERT (GB_JUMBLED_OK (B)) ; // pattern not accessed
167 ASSERT (!GB_PENDING (A)) ;
168
169 ASSERT_MATRIX_OK (B, "B for add phase0", GB0) ;
170 ASSERT (!GB_ZOMBIES (B)) ;
171 ASSERT (GB_JUMBLED_OK (A)) ; // pattern not accessed
172 ASSERT (!GB_PENDING (B)) ;
173
174 ASSERT (A->vdim == B->vdim) ;
175 ASSERT (A->vlen == B->vlen) ;
176 ASSERT (GB_IMPLIES (M != NULL, A->vdim == M->vdim)) ;
177 ASSERT (GB_IMPLIES (M != NULL, A->vlen == M->vlen)) ;
178
179 //--------------------------------------------------------------------------
180 // initializations
181 //--------------------------------------------------------------------------
182
183 (*p_Cnvec) = 0 ;
184 (*Ch_handle) = NULL ;
185 if (C_to_M_handle != NULL)
186 {
187 (*C_to_M_handle) = NULL ;
188 }
189 (*C_to_A_handle) = NULL ;
190 (*C_to_B_handle) = NULL ;
191 if (p_Ch_is_Mh != NULL)
192 {
193 (*p_Ch_is_Mh) = false ;
194 }
195
196 if ((*C_sparsity) == GxB_BITMAP || (*C_sparsity) == GxB_FULL)
197 {
198 // nothing to do in phase0 for C bitmap or full
199 (*p_Cnvec) = A->vdim ; // not needed; to be consistent with GB_emult
200 return (GrB_SUCCESS) ;
201 }
202
203 int64_t *restrict Ch = NULL ; size_t Ch_size = 0 ;
204 int64_t *restrict C_to_M = NULL ; size_t C_to_M_size = 0 ;
205 int64_t *restrict C_to_A = NULL ; size_t C_to_A_size = 0 ;
206 int64_t *restrict C_to_B = NULL ; size_t C_to_B_size = 0 ;
207
208 GB_WERK_DECLARE (Work, int64_t) ;
209 int ntasks = 0 ;
210
211 //--------------------------------------------------------------------------
212 // determine the number of threads to use
213 //--------------------------------------------------------------------------
214
215 GB_GET_NTHREADS_MAX (nthreads_max, chunk, Context) ;
216 int nthreads = 1 ; // nthreads depends on Cnvec, computed below
217
218 //--------------------------------------------------------------------------
219 // get content of M, A, and B
220 //--------------------------------------------------------------------------
221
222 int64_t Cnvec ;
223
224 int64_t n = A->vdim ;
225 int64_t Anvec = A->nvec ;
226 int64_t vlen = A->vlen ;
227 const int64_t *restrict Ap = A->p ;
228 const int64_t *restrict Ah = A->h ;
229 bool A_is_hyper = (Ah != NULL) ;
230 #define GB_Ah(k) (A_is_hyper ? Ah [k] : (k))
231
232 int64_t Bnvec = B->nvec ;
233 const int64_t *restrict Bp = B->p ;
234 const int64_t *restrict Bh = B->h ;
235 bool B_is_hyper = (Bh != NULL) ;
236
237 int64_t Mnvec = 0 ;
238 const int64_t *restrict Mp = NULL ;
239 const int64_t *restrict Mh = NULL ;
240 bool M_is_hyper = GB_IS_HYPERSPARSE (M) ;
241 if (M != NULL)
242 {
243 Mnvec = M->nvec ;
244 Mp = M->p ;
245 Mh = M->h ;
246 }
247
248 // For GB_add, if M is present, hypersparse, and not complemented, then C
249 // will be hypersparse, and it will have set of vectors as M (Ch == Mh).
250 // For GB_masker, Ch is never equal to Mh.
251 bool Ch_is_Mh = (p_Ch_is_Mh != NULL) && (M != NULL && M_is_hyper) ;
252
253 //--------------------------------------------------------------------------
254 // find the set union of the non-empty vectors of A and B
255 //--------------------------------------------------------------------------
256
257 if (Ch_is_Mh)
258 {
259
260 //----------------------------------------------------------------------
261 // C and M are hypersparse, with the same vectors as the hypersparse M
262 //----------------------------------------------------------------------
263
264 (*C_sparsity) = GxB_HYPERSPARSE ;
265 ASSERT (M_is_hyper) ;
266 Cnvec = Mnvec ;
267 nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
268
269 if (!GB_allocate_result (Cnvec,
270 &Ch, &Ch_size,
271 NULL, NULL,
272 (A_is_hyper) ? (&C_to_A) : NULL, &C_to_A_size,
273 (B_is_hyper) ? (&C_to_B) : NULL, &C_to_B_size))
274 {
275 // out of memory
276 GB_FREE_WORK ;
277 return (GrB_OUT_OF_MEMORY) ;
278 }
279
280 // copy Mh into Ch. Ch is Mh so C_to_M is not needed.
281 GB_memcpy (Ch, Mh, Mnvec * sizeof (int64_t), nthreads) ;
282
283 // construct the mapping from C to A and B, if they are hypersparse
284 if (A_is_hyper || B_is_hyper)
285 {
286 int64_t k ;
287 #pragma omp parallel for num_threads(nthreads) schedule(static)
288 for (k = 0 ; k < Cnvec ; k++)
289 {
290 int64_t j = Ch [k] ;
291 if (A_is_hyper)
292 {
293 // C_to_A [k] = kA if Ah [kA] == j and A(:,j) is non-empty
294 int64_t kA = 0, pA, pA_end ;
295 GB_lookup (true, Ah, Ap, vlen, &kA, Anvec-1, j,
296 &pA, &pA_end) ;
297 C_to_A [k] = (pA < pA_end) ? kA : -1 ;
298 }
299 if (B_is_hyper)
300 {
301 // C_to_B [k] = kB if Bh [kB] == j and B(:,j) is non-empty
302 int64_t kB = 0, pB, pB_end ;
303 GB_lookup (true, Bh, Bp, vlen, &kB, Bnvec-1, j,
304 &pB, &pB_end) ;
305 C_to_B [k] = (pB < pB_end) ? kB : -1 ;
306 }
307 }
308 }
309
310 }
311 else if (A_is_hyper && B_is_hyper)
312 {
313
314 //----------------------------------------------------------------------
315 // A and B are hypersparse: C will be hypersparse
316 //----------------------------------------------------------------------
317
318 // Ch is the set union of Ah and Bh. This is handled with a parallel
319 // merge, since Ah and Bh are both sorted lists.
320
321 (*C_sparsity) = GxB_HYPERSPARSE ;
322
323 //----------------------------------------------------------------------
324 // create the tasks to construct Ch
325 //----------------------------------------------------------------------
326
327 double work = GB_IMIN (Anvec + Bnvec, n) ;
328 nthreads = GB_nthreads (work, chunk, nthreads_max) ;
329
330 ntasks = (nthreads == 1) ? 1 : (64 * nthreads) ;
331 ntasks = GB_IMIN (ntasks, work) ;
332
333 // allocate workspace
334 GB_WERK_PUSH (Work, 3*(ntasks+1), int64_t) ;
335 if (Work == NULL)
336 {
337 // out of memory
338 GB_FREE_WORK ;
339 return (GrB_OUT_OF_MEMORY) ;
340 }
341 int64_t *restrict kA_start = Work ;
342 int64_t *restrict kB_start = Work + (ntasks+1) ;
343 int64_t *restrict kC_start = Work + (ntasks+1)*2 ;
344
345 kA_start [0] = (Anvec == 0) ? -1 : 0 ;
346 kB_start [0] = (Bnvec == 0) ? -1 : 0 ;
347 kA_start [ntasks] = (Anvec == 0) ? -1 : Anvec ;
348 kB_start [ntasks] = (Bnvec == 0) ? -1 : Bnvec ;
349
350 for (int taskid = 1 ; taskid < ntasks ; taskid++)
351 {
352 // create tasks: A and B are both hyper
353 double target_work = ((ntasks-taskid) * work) / ntasks ;
354 GB_slice_vector (NULL, NULL,
355 &(kA_start [taskid]), &(kB_start [taskid]),
356 0, 0, NULL, // Mi not present
357 0, Anvec, Ah, // Ah, explicit list
358 0, Bnvec, Bh, // Bh, explicit list
359 n, // Ah and Bh have dimension n
360 target_work) ;
361 }
362
363 //----------------------------------------------------------------------
364 // count the entries in Ch for each task
365 //----------------------------------------------------------------------
366
367 int taskid ;
368 #pragma omp parallel for num_threads(nthreads) schedule (dynamic,1)
369 for (taskid = 0 ; taskid < ntasks ; taskid++)
370 {
371 // merge Ah and Bh into Ch
372 int64_t kA = kA_start [taskid] ;
373 int64_t kB = kB_start [taskid] ;
374 int64_t kA_end = kA_start [taskid+1] ;
375 int64_t kB_end = kB_start [taskid+1] ;
376 int64_t kC = 0 ;
377 for ( ; kA < kA_end && kB < kB_end ; kC++)
378 {
379 int64_t jA = GB_Ah (kA) ;
380 int64_t jB = Bh [kB] ;
381 if (jA < jB)
382 {
383 // jA appears in A but not B
384 kA++ ;
385 }
386 else if (jB < jA)
387 {
388 // jB appears in B but not A
389 kB++ ;
390 }
391 else
392 {
393 // j = jA = jB appears in both A and B
394 kA++ ;
395 kB++ ;
396 }
397 }
398 kC_start [taskid] = kC + (kA_end - kA) + (kB_end - kB) ;
399 }
400
401 //----------------------------------------------------------------------
402 // cumulative sum of entries in Ch for each task
403 //----------------------------------------------------------------------
404
405 GB_cumsum (kC_start, ntasks, NULL, 1, NULL) ;
406 Cnvec = kC_start [ntasks] ;
407
408 //----------------------------------------------------------------------
409 // allocate the result: Ch and the mappings C_to_[MAB]
410 //----------------------------------------------------------------------
411
412 // C will be hypersparse, so Ch is allocated. The mask M is ignored
413 // for computing Ch. Ch is the set union of Ah and Bh.
414
415 if (!GB_allocate_result (Cnvec,
416 &Ch, &Ch_size,
417 (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
418 &C_to_A, &C_to_A_size,
419 &C_to_B, &C_to_B_size))
420 {
421 // out of memory
422 GB_FREE_WORK ;
423 return (GrB_OUT_OF_MEMORY) ;
424 }
425
426 //----------------------------------------------------------------------
427 // compute the result: Ch and the mappings C_to_[AB]
428 //----------------------------------------------------------------------
429
430 #pragma omp parallel for num_threads(nthreads) schedule (dynamic,1)
431 for (taskid = 0 ; taskid < ntasks ; taskid++)
432 {
433 // merge Ah and Bh into Ch
434 int64_t kA = kA_start [taskid] ;
435 int64_t kB = kB_start [taskid] ;
436 int64_t kC = kC_start [taskid] ;
437 int64_t kA_end = kA_start [taskid+1] ;
438 int64_t kB_end = kB_start [taskid+1] ;
439
440 // merge Ah and Bh into Ch
441 for ( ; kA < kA_end && kB < kB_end ; kC++)
442 {
443 int64_t jA = GB_Ah (kA) ;
444 int64_t jB = Bh [kB] ;
445 if (jA < jB)
446 {
447 // append jA to Ch
448 Ch [kC] = jA ;
449 C_to_A [kC] = kA++ ;
450 C_to_B [kC] = -1 ; // jA does not appear in B
451 }
452 else if (jB < jA)
453 {
454 // append jB to Ch
455 Ch [kC] = jB ;
456 C_to_A [kC] = -1 ; // jB does not appear in A
457 C_to_B [kC] = kB++ ;
458 }
459 else
460 {
461 // j appears in both A and B; append it to Ch
462 Ch [kC] = jA ;
463 C_to_A [kC] = kA++ ;
464 C_to_B [kC] = kB++ ;
465 }
466 }
467 if (kA < kA_end)
468 {
469 // B is exhausted but A is not
470 for ( ; kA < kA_end ; kA++, kC++)
471 {
472 // append jA to Ch
473 int64_t jA = GB_Ah (kA) ;
474 Ch [kC] = jA ;
475 C_to_A [kC] = kA ;
476 C_to_B [kC] = -1 ;
477 }
478 }
479 else if (kB < kB_end)
480 {
481 // A is exhausted but B is not
482 for ( ; kB < kB_end ; kB++, kC++)
483 {
484 // append jB to Ch
485 int64_t jB = Bh [kB] ;
486 Ch [kC] = jB ;
487 C_to_A [kC] = -1 ;
488 C_to_B [kC] = kB ;
489 }
490 }
491 ASSERT (kC == kC_start [taskid+1]) ;
492 }
493
494 //----------------------------------------------------------------------
495 // check result via a sequential merge
496 //----------------------------------------------------------------------
497
498 #ifdef GB_DEBUG
499 // merge Ah and Bh into Ch
500 int64_t kA = 0 ;
501 int64_t kB = 0 ;
502 int64_t kC = 0 ;
503 for ( ; kA < Anvec && kB < Bnvec ; kC++)
504 {
505 int64_t jA = GB_Ah (kA) ;
506 int64_t jB = Bh [kB] ;
507 if (jA < jB)
508 {
509 // append jA to Ch
510 ASSERT (Ch [kC] == jA) ;
511 ASSERT (C_to_A [kC] == kA) ; kA++ ;
512 ASSERT (C_to_B [kC] == -1) ; // jA does not appear in B
513 }
514 else if (jB < jA)
515 {
516 // append jB to Ch
517 ASSERT (Ch [kC] == jB) ;
518 ASSERT (C_to_A [kC] == -1) ; // jB does not appear in A
519 ASSERT (C_to_B [kC] == kB) ; kB++ ;
520 }
521 else
522 {
523 // j appears in both A and B; append it to Ch
524 ASSERT (Ch [kC] == jA) ;
525 ASSERT (C_to_A [kC] == kA) ; kA++ ;
526 ASSERT (C_to_B [kC] == kB) ; kB++ ;
527 }
528 }
529 if (kA < Anvec)
530 {
531 // B is exhausted but A is not
532 for ( ; kA < Anvec ; kA++, kC++)
533 {
534 // append jA to Ch
535 int64_t jA = GB_Ah (kA) ;
536 ASSERT (Ch [kC] == jA) ;
537 ASSERT (C_to_A [kC] == kA) ;
538 ASSERT (C_to_B [kC] == -1) ;
539 }
540 }
541 else if (kB < Bnvec)
542 {
543 // A is exhausted but B is not
544 for ( ; kB < Bnvec ; kB++, kC++)
545 {
546 // append jB to Ch
547 int64_t jB = Bh [kB] ;
548 ASSERT (Ch [kC] == jB) ;
549 ASSERT (C_to_A [kC] == -1) ;
550 ASSERT (C_to_B [kC] == kB) ;
551 }
552 }
553 ASSERT (kC == Cnvec) ;
554 #endif
555
556 }
557 else if (A_is_hyper && !B_is_hyper)
558 {
559
560 //----------------------------------------------------------------------
561 // A is hypersparse, B is not hypersparse
562 //----------------------------------------------------------------------
563
564 // C will be sparse. Construct the C_to_A mapping.
565
566 Cnvec = n ;
567 nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
568
569 if (!GB_allocate_result (Cnvec,
570 NULL, NULL,
571 (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
572 &C_to_A, &C_to_A_size,
573 NULL, NULL))
574 {
575 // out of memory
576 GB_FREE_WORK ;
577 return (GrB_OUT_OF_MEMORY) ;
578 }
579
580 int64_t j ;
581 #pragma omp parallel for num_threads(nthreads) schedule(static)
582 for (j = 0 ; j < n ; j++)
583 {
584 C_to_A [j] = -1 ;
585 }
586
587 // scatter Ah into C_to_A
588 int64_t kA ;
589 #pragma omp parallel for num_threads(nthreads) schedule(static)
590 for (kA = 0 ; kA < Anvec ; kA++)
591 {
592 int64_t jA = GB_Ah (kA) ;
593 C_to_A [jA] = kA ;
594 }
595
596 }
597 else if (!A_is_hyper && B_is_hyper)
598 {
599
600 //----------------------------------------------------------------------
601 // A is not hypersparse, B is hypersparse
602 //----------------------------------------------------------------------
603
604 // C will be sparse. Construct the C_to_B mapping.
605
606 Cnvec = n ;
607 nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
608
609 if (!GB_allocate_result (Cnvec,
610 NULL, NULL,
611 (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
612 NULL, NULL,
613 &C_to_B, &C_to_B_size))
614 {
615 // out of memory
616 GB_FREE_WORK ;
617 return (GrB_OUT_OF_MEMORY) ;
618 }
619
620 int64_t j ;
621 #pragma omp parallel for num_threads(nthreads) schedule(static)
622 for (j = 0 ; j < n ; j++)
623 {
624 C_to_B [j] = -1 ;
625 }
626
627 // scatter Bh into C_to_B
628 int64_t kB ;
629 #pragma omp parallel for num_threads(nthreads) schedule(static)
630 for (kB = 0 ; kB < Bnvec ; kB++)
631 {
632 int64_t jB = Bh [kB] ;
633 C_to_B [jB] = kB ;
634 }
635
636 }
637 else
638 {
639
640 //----------------------------------------------------------------------
641 // A and B are both non-hypersparse
642 //----------------------------------------------------------------------
643
644 // C will be sparse
645 Cnvec = n ;
646 nthreads = GB_nthreads (Cnvec, chunk, nthreads_max) ;
647
648 if (!GB_allocate_result (Cnvec,
649 NULL, NULL,
650 (M_is_hyper) ? (&C_to_M) : NULL, &C_to_M_size,
651 NULL, NULL,
652 NULL, NULL))
653 {
654 // out of memory
655 GB_FREE_WORK ;
656 return (GrB_OUT_OF_MEMORY) ;
657 }
658 }
659
660 //--------------------------------------------------------------------------
661 // construct C_to_M if needed, if M is hypersparse
662 //--------------------------------------------------------------------------
663
664 if (C_to_M != NULL)
665 {
666 ASSERT (M_is_hyper) ;
667 if (Ch != NULL)
668 {
669 // C is hypersparse
670 int64_t k ;
671 #pragma omp parallel for num_threads(nthreads) schedule(static)
672 for (k = 0 ; k < Cnvec ; k++)
673 {
674 int64_t j = Ch [k] ;
675 // C_to_M [k] = kM if Mh [kM] == j and M(:,j) is non-empty
676 int64_t kM = 0, pM, pM_end ;
677 GB_lookup (true, Mh, Mp, vlen, &kM, Mnvec-1, j, &pM, &pM_end) ;
678 C_to_M [k] = (pM < pM_end) ? kM : -1 ;
679 }
680 }
681 else
682 {
683 // C is sparse
684 int64_t j ;
685 #pragma omp parallel for num_threads(nthreads) schedule(static)
686 for (j = 0 ; j < n ; j++)
687 {
688 C_to_M [j] = -1 ;
689 }
690 // scatter Mh into C_to_M
691 int64_t kM ;
692 #pragma omp parallel for num_threads(nthreads) schedule(static)
693 for (kM = 0 ; kM < Mnvec ; kM++)
694 {
695 int64_t jM = Mh [kM] ;
696 C_to_M [jM] = kM ;
697 }
698 }
699 }
700
701 //--------------------------------------------------------------------------
702 // return result
703 //--------------------------------------------------------------------------
704
705 (*p_Cnvec) = Cnvec ;
706 (*Ch_handle) = Ch ; (*Ch_size_handle) = Ch_size ;
707 if (C_to_M_handle != NULL)
708 {
709 (*C_to_M_handle) = C_to_M ; (*C_to_M_size_handle) = C_to_M_size ;
710 }
711 (*C_to_A_handle) = C_to_A ; (*C_to_A_size_handle) = C_to_A_size ;
712 (*C_to_B_handle) = C_to_B ; (*C_to_B_size_handle) = C_to_B_size ;
713 if (p_Ch_is_Mh != NULL)
714 {
715 // return Ch_is_Mh to GB_add. For GB_masker, Ch is never Mh.
716 (*p_Ch_is_Mh) = Ch_is_Mh ;
717 }
718
719 //--------------------------------------------------------------------------
720 // The code below describes what the output contains:
721 //--------------------------------------------------------------------------
722
723 #ifdef GB_DEBUG
724 // the mappings are only constructed when C is sparse or hypersparse
725 ASSERT ((*C_sparsity) == GxB_SPARSE || (*C_sparsity) == GxB_HYPERSPARSE) ;
726 ASSERT (A != NULL) ; // A and B are always present
727 ASSERT (B != NULL) ;
728 int64_t jlast = -1 ;
729 for (int64_t k = 0 ; k < Cnvec ; k++)
730 {
731
732 // C(:,j) is in the list, as the kth vector
733 int64_t j ;
734 if (Ch == NULL)
735 {
736 // C will be constructed as sparse
737 ASSERT ((*C_sparsity) == GxB_SPARSE) ;
738 j = k ;
739 }
740 else
741 {
742 // C will be constructed as hypersparse
743 ASSERT ((*C_sparsity) == GxB_HYPERSPARSE) ;
744 j = Ch [k] ;
745 }
746
747 // vectors j in Ch are sorted, and in the range 0:n-1
748 ASSERT (j >= 0 && j < n) ;
749 ASSERT (j > jlast) ;
750 jlast = j ;
751
752 // see if A (:,j) exists
753 if (C_to_A != NULL)
754 {
755 // A is hypersparse
756 ASSERT (A_is_hyper) ;
757 int64_t kA = C_to_A [k] ;
758 ASSERT (kA >= -1 && kA < A->nvec) ;
759 if (kA >= 0)
760 {
761 int64_t jA = GB_Ah (kA) ;
762 ASSERT (j == jA) ;
763 }
764 }
765 else
766 {
767 // A is not hypersparse
768 // C_to_A exists only if A is hypersparse
769 ASSERT (!A_is_hyper) ;
770 }
771
772 // see if B (:,j) exists
773 if (C_to_B != NULL)
774 {
775 // B is hypersparse
776 ASSERT (B_is_hyper) ;
777 int64_t kB = C_to_B [k] ;
778 ASSERT (kB >= -1 && kB < B->nvec) ;
779 if (kB >= 0)
780 {
781 int64_t jB = B->h [kB] ;
782 ASSERT (j == jB) ;
783 }
784 }
785 else
786 {
787 // B is not hypersparse
788 // C_to_B exists only if B is hypersparse
789 ASSERT (!B_is_hyper) ;
790 }
791
792 // see if M (:,j) exists
793 if (Ch_is_Mh)
794 {
795 // Ch is the same as Mh
796 ASSERT (M != NULL) ;
797 ASSERT (M_is_hyper) ;
798 ASSERT (Ch != NULL && M->h != NULL && Ch [k] == M->h [k]) ;
799 ASSERT (C_to_M == NULL) ;
800 }
801 else if (C_to_M != NULL)
802 {
803 // M is present and hypersparse
804 ASSERT (M != NULL) ;
805 ASSERT (M_is_hyper) ;
806 int64_t kM = C_to_M [k] ;
807 ASSERT (kM >= -1 && kM < M->nvec) ;
808 if (kM >= 0)
809 {
810 int64_t jM = M->h [kM] ;
811 ASSERT (j == jM) ;
812 }
813 }
814 else
815 {
816 // M is not present, or present and sparse, bitmap or full
817 ASSERT (M == NULL || !M_is_hyper) ;
818 }
819 }
820 #endif
821
822 //--------------------------------------------------------------------------
823 // free workspace and return result
824 //--------------------------------------------------------------------------
825
826 GB_FREE_WORK ;
827 return (GrB_SUCCESS) ;
828 }
829
830