1 //------------------------------------------------------------------------------
2 // GB_emult_01_template: C=A.*B, C<M or !M>=A.*B when C is sparse/hyper
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Computes C=A.*B, C<M>=A.*B, or C<!M>=A.*B when C is sparse or hypersparse:
11 
12 // phase1: does not compute C itself, but just counts the # of entries in each
13 // vector of C.  Fine tasks compute the # of entries in their slice of a
14 // single vector of C, and the results are cumsum'd.
15 
16 // phase2: computes C, using the counts computed by phase1.
17 
18 // No input matrix can be jumbled, and C is constructed as unjumbled.
19 
20 // The following cases are handled:
21 
22         //      ------------------------------------------
23         //      C       =           A       .*      B
24         //      ------------------------------------------
25         //      sparse  .           sparse          sparse  (method: 01)
26 
27         //      ------------------------------------------
28         //      C       <M>=        A       .*      B
29         //      ------------------------------------------
30         //      sparse  sparse      sparse          sparse  (method: 01)
31         //      sparse  bitmap      sparse          sparse  (method: 01)
32         //      sparse  full        sparse          sparse  (method: 01)
33         //      sparse  sparse      sparse          bitmap  (04a or 02a)
34         //      sparse  sparse      sparse          full    (04a or 02a)
35         //      sparse  sparse      bitmap          sparse  (04b or 02b)
36         //      sparse  sparse      full            sparse  (04b or 02b)
37 
38         //      ------------------------------------------
39         //      C       <!M>=       A       .*      B
40         //      ------------------------------------------
41         //      sparse  sparse      sparse          sparse  (01: M later)
42         //      sparse  bitmap      sparse          sparse  (method: 01)
43         //      sparse  full        sparse          sparse  (method: 01)
44 
45 // The 04a and 04b methods are not yet implemented.  This method handles
46 // those cases.  Methods 02a and 02b can be used as well, but only if M is
47 // applied later.  See GB_emult_sparsity for this decision.
48 
49 {
50 
51     int taskid ;
52     #pragma omp parallel for num_threads(C_nthreads) schedule(dynamic,1)
53     for (taskid = 0 ; taskid < C_ntasks ; taskid++)
54     {
55 
56         //----------------------------------------------------------------------
57         // get the task descriptor
58         //----------------------------------------------------------------------
59 
60         int64_t kfirst = TaskList [taskid].kfirst ;
61         int64_t klast  = TaskList [taskid].klast ;
62         bool fine_task = (klast == -1) ;
63         int64_t len ;
64         if (fine_task)
65         {
66             // a fine task operates on a slice of a single vector
67             klast = kfirst ;
68             len = TaskList [taskid].len ;
69         }
70         else
71         {
72             // a coarse task operates on one or more whole vectors
73             len = vlen ;
74         }
75 
76         //----------------------------------------------------------------------
77         // compute all vectors in this task
78         //----------------------------------------------------------------------
79 
80         for (int64_t k = kfirst ; k <= klast ; k++)
81         {
82 
83             //------------------------------------------------------------------
84             // get j, the kth vector of C
85             //------------------------------------------------------------------
86 
87             int64_t j = GBH (Ch, k) ;
88 
89             #if defined ( GB_PHASE_1_OF_2 )
90             int64_t cjnz = 0 ;
91             #else
92             int64_t pC, pC_end ;
93             if (fine_task)
94             {
95                 // A fine task computes a slice of C(:,j)
96                 pC     = TaskList [taskid  ].pC ;
97                 pC_end = TaskList [taskid+1].pC ;
98                 ASSERT (Cp [k] <= pC && pC <= pC_end && pC_end <= Cp [k+1]) ;
99             }
100             else
101             {
102                 // The vectors of C are never sliced for a coarse task.
103                 pC     = Cp [k] ;
104                 pC_end = Cp [k+1] ;
105             }
106             int64_t cjnz = pC_end - pC ;
107             if (cjnz == 0) continue ;
108             #endif
109 
110             //------------------------------------------------------------------
111             // get A(:,j)
112             //------------------------------------------------------------------
113 
114             int64_t pA = -1, pA_end = -1 ;
115             if (fine_task)
116             {
117                 // A fine task operates on Ai,Ax [pA...pA_end-1], which is
118                 // a subset of the vector A(:,j)
119                 pA     = TaskList [taskid].pA ;
120                 pA_end = TaskList [taskid].pA_end ;
121             }
122             else
123             {
124                 // A coarse task operates on the entire vector A (:,j)
125                 int64_t kA = (Ch == Ah) ? k :
126                             ((C_to_A == NULL) ? j : C_to_A [k]) ;
127                 if (kA >= 0)
128                 {
129                     pA     = GBP (Ap, kA, vlen) ;
130                     pA_end = GBP (Ap, kA+1, vlen) ;
131                 }
132             }
133 
134             int64_t ajnz = pA_end - pA ;        // nnz in A(:,j) for this slice
135             int64_t pA_start = pA ;
136             bool adense = (ajnz == len) ;
137 
138             // get the first and last indices in A(:,j) for this vector
139             int64_t iA_first = -1 ;
140             if (ajnz > 0)
141             {
142                 iA_first = GBI (Ai, pA, vlen) ;
143             }
144             #if defined ( GB_PHASE_1_OF_2 ) || defined ( GB_DEBUG )
145             int64_t iA_last = -1 ;
146             if (ajnz > 0)
147             {
148                 iA_last  = GBI (Ai, pA_end-1, vlen) ;
149             }
150             #endif
151 
152             //------------------------------------------------------------------
153             // get B(:,j)
154             //------------------------------------------------------------------
155 
156             int64_t pB = -1, pB_end = -1 ;
157             if (fine_task)
158             {
159                 // A fine task operates on Bi,Bx [pB...pB_end-1], which is
160                 // a subset of the vector B(:,j)
161                 pB     = TaskList [taskid].pB ;
162                 pB_end = TaskList [taskid].pB_end ;
163             }
164             else
165             {
166                 // A coarse task operates on the entire vector B (:,j)
167                 int64_t kB = (Ch == Bh) ? k :
168                             ((C_to_B == NULL) ? j : C_to_B [k]) ;
169                 if (kB >= 0)
170                 {
171                     pB     = GBP (Bp, kB, vlen) ;
172                     pB_end = GBP (Bp, kB+1, vlen) ;
173                 }
174             }
175 
176             int64_t bjnz = pB_end - pB ;        // nnz in B(:,j) for this slice
177             int64_t pB_start = pB ;
178             bool bdense = (bjnz == len) ;
179 
180             // get the first and last indices in B(:,j) for this vector
181             int64_t iB_first = -1 ;
182             if (bjnz > 0)
183             {
184                 iB_first = GBI (Bi, pB, vlen) ;
185             }
186             #if defined ( GB_PHASE_1_OF_2 ) || defined ( GB_DEBUG )
187             int64_t iB_last = -1 ;
188             if (bjnz > 0)
189             {
190                 iB_last  = GBI (Bi, pB_end-1, vlen) ;
191             }
192             #endif
193 
194             //------------------------------------------------------------------
195             // get M(:,j) if M is sparse or hypersparse
196             //------------------------------------------------------------------
197 
198             int64_t pM = -1 ;
199             int64_t pM_end = -1 ;
200             if (M_is_sparse_or_hyper)
201             {
202                 if (fine_task)
203                 {
204                     // A fine task operates on Mi,Mx [pM...pM_end-1], which is
205                     // a subset of the vector M(:,j)
206                     pM     = TaskList [taskid].pM ;
207                     pM_end = TaskList [taskid].pM_end ;
208                 }
209                 else
210                 {
211                     int64_t kM = -1 ;
212                     if (Ch == Mh)
213                     {
214                         // Ch is the same as Mh (a shallow copy), or both NULL
215                         kM = k ;
216                     }
217                     else
218                     {
219                         kM = (C_to_M == NULL) ? j : C_to_M [k] ;
220                     }
221                     if (kM >= 0)
222                     {
223                         pM     = GBP (Mp, kM, vlen) ;
224                         pM_end = GBP (Mp, kM+1, vlen) ;
225                     }
226                 }
227             }
228 
229             //------------------------------------------------------------------
230             // C(:,j)<optional mask> = A (:,j) .* B (:,j) or subvector
231             //------------------------------------------------------------------
232 
233             #if defined ( GB_PHASE_1_OF_2 )
234 
235             if (ajnz == 0 || bjnz == 0)
236             {
237 
238                 //--------------------------------------------------------------
239                 // A(:,j) and/or B(:,j) are empty
240                 //--------------------------------------------------------------
241 
242                 ;
243 
244             }
245             else if (iA_last < iB_first || iB_last < iA_first)
246             {
247 
248                 //--------------------------------------------------------------
249                 // intersection of A(:,j) and B(:,j) is empty
250                 //--------------------------------------------------------------
251 
252                 // the last entry of A(:,j) comes before the first entry
253                 // of B(:,j), or visa versa
254                 ;
255 
256             }
257             else
258 
259             #endif
260 
261             if (M == NULL)
262             {
263 
264                 //--------------------------------------------------------------
265                 // C = A.*B
266                 //--------------------------------------------------------------
267 
268                 //      ------------------------------------------
269                 //      C       =           A       .*      B
270                 //      ------------------------------------------
271                 //      sparse  .           sparse          sparse  (method: 01)
272                 //      sparse  sparse      sparse          sparse  (01 M later)
273 
274                 // both A and B are sparse/hyper
275                 ASSERT (A_is_sparse || A_is_hyper) ;
276                 ASSERT (B_is_sparse || B_is_hyper) ;
277 
278                 if (ajnz > 32 * bjnz)
279                 {
280 
281                     //----------------------------------------------------------
282                     // A(:,j) is much denser than B(:,j)
283                     //----------------------------------------------------------
284 
285                     for ( ; pB < pB_end ; pB++)
286                     {
287                         int64_t i = Bi [pB] ;
288                         // find i in A(:,j)
289                         int64_t pright = pA_end - 1 ;
290                         bool found ;
291                         GB_BINARY_SEARCH (i, Ai, pA, pright, found) ;
292                         if (found)
293                         {
294                             // C (i,j) = A (i,j) .* B (i,j)
295                             #if defined ( GB_PHASE_1_OF_2 )
296                             cjnz++ ;
297                             #else
298                             ASSERT (pC < pC_end) ;
299                             Ci [pC] = i ;
300                             GB_GETA (aij, Ax, pA) ;
301                             GB_GETB (bij, Bx, pB) ;
302                             GB_BINOP (GB_CX (pC), aij, bij, i, j) ;
303                             pC++ ;
304                             #endif
305                         }
306                     }
307                     #if defined ( GB_PHASE_2_OF_2 )
308                     ASSERT (pC == pC_end) ;
309                     #endif
310 
311                 }
312                 else if (bjnz > 32 * ajnz)
313                 {
314 
315                     //----------------------------------------------------------
316                     // B(:,j) is much denser than A(:,j)
317                     //----------------------------------------------------------
318 
319                     for ( ; pA < pA_end ; pA++)
320                     {
321                         int64_t i = Ai [pA] ;
322                         // find i in B(:,j)
323                         int64_t pright = pB_end - 1 ;
324                         bool found ;
325                         GB_BINARY_SEARCH (i, Bi, pB, pright, found) ;
326                         if (found)
327                         {
328                             // C (i,j) = A (i,j) .* B (i,j)
329                             #if defined ( GB_PHASE_1_OF_2 )
330                             cjnz++ ;
331                             #else
332                             ASSERT (pC < pC_end) ;
333                             Ci [pC] = i ;
334                             GB_GETA (aij, Ax, pA) ;
335                             GB_GETB (bij, Bx, pB) ;
336                             GB_BINOP (GB_CX (pC), aij, bij, i, j) ;
337                             pC++ ;
338                             #endif
339                         }
340                     }
341                     #if defined ( GB_PHASE_2_OF_2 )
342                     ASSERT (pC == pC_end) ;
343                     #endif
344 
345                 }
346                 else
347                 {
348 
349                     //----------------------------------------------------------
350                     // A(:,j) and B(:,j) about the sparsity
351                     //----------------------------------------------------------
352 
353                     // linear-time scan of A(:,j) and B(:,j)
354 
355                     while (pA < pA_end && pB < pB_end)
356                     {
357                         int64_t iA = Ai [pA] ;
358                         int64_t iB = Bi [pB] ;
359                         if (iA < iB)
360                         {
361                             // A(i,j) exists but not B(i,j)
362                             pA++ ;
363                         }
364                         else if (iB < iA)
365                         {
366                             // B(i,j) exists but not A(i,j)
367                             pB++ ;
368                         }
369                         else
370                         {
371                             // both A(i,j) and B(i,j) exist
372                             // C (i,j) = A (i,j) .* B (i,j)
373                             #if defined ( GB_PHASE_1_OF_2 )
374                             cjnz++ ;
375                             #else
376                             ASSERT (pC < pC_end) ;
377                             Ci [pC] = iB ;
378                             GB_GETA (aij, Ax, pA) ;
379                             GB_GETB (bij, Bx, pB) ;
380                             GB_BINOP (GB_CX (pC), aij, bij, iB, j) ;
381                             pC++ ;
382                             #endif
383                             pA++ ;
384                             pB++ ;
385                         }
386                     }
387 
388                     #if defined ( GB_PHASE_2_OF_2 )
389                     ASSERT (pC == pC_end) ;
390                     #endif
391                 }
392 
393             }
394             else if (M_is_sparse_or_hyper)
395             {
396 
397                 //--------------------------------------------------------------
398                 // C and M are sparse or hypersparse
399                 //--------------------------------------------------------------
400 
401                 //      ------------------------------------------
402                 //      C       <M>=        A       .*      B
403                 //      ------------------------------------------
404                 //      sparse  sparse      sparse          sparse  (method: 01)
405                 //      sparse  sparse      sparse          bitmap  (04a or 02a)
406                 //      sparse  sparse      sparse          full    (04a or 02a)
407                 //      sparse  sparse      bitmap          sparse  (04b or 02b)
408                 //      sparse  sparse      full            sparse  (04b or 02b)
409 
410                 // Method 04 is not yet implemented; using this method
411                 // (GB_emult_01) instead.
412 
413                 // ether A or B are sparse/hyper
414                 ASSERT (A_is_sparse || A_is_hyper || B_is_sparse || B_is_hyper);
415 
416                 for ( ; pM < pM_end ; pM++)
417                 {
418 
419                     //----------------------------------------------------------
420                     // get M(i,j) for A(i,j) .* B (i,j)
421                     //----------------------------------------------------------
422 
423                     int64_t i = GBI (Mi, pM, vlen) ;
424                     bool mij = GB_mcast (Mx, pM, msize) ;
425                     if (!mij) continue ;
426 
427                     //----------------------------------------------------------
428                     // get A(i,j)
429                     //----------------------------------------------------------
430 
431                     bool afound ;
432                     if (adense)
433                     {
434                         // A(:,j) is dense, bitmap, or full; use quick lookup
435                         pA = pA_start + i - iA_first ;
436                         afound = GBB (Ab, pA) ;
437                     }
438                     else
439                     {
440                         // A(:,j) is sparse; use binary search for A(i,j)
441                         int64_t apright = pA_end - 1 ;
442                         GB_BINARY_SEARCH (i, Ai, pA, apright, afound) ;
443                     }
444                     if (!afound) continue ;
445                     ASSERT (GBI (Ai, pA, vlen) == i) ;
446 
447                     //----------------------------------------------------------
448                     // get B(i,j)
449                     //----------------------------------------------------------
450 
451                     bool bfound ;
452                     if (bdense)
453                     {
454                         // B(:,j) is dense; use direct lookup for B(i,j)
455                         pB = pB_start + i - iB_first ;
456                         bfound = GBB (Bb, pB) ;
457                     }
458                     else
459                     {
460                         // B(:,j) is sparse; use binary search for B(i,j)
461                         int64_t bpright = pB_end - 1 ;
462                         GB_BINARY_SEARCH (i, Bi, pB, bpright, bfound) ;
463                     }
464                     if (!bfound) continue ;
465                     ASSERT (GBI (Bi, pB, vlen) == i) ;
466 
467                     //----------------------------------------------------------
468                     // C(i,j) = A(i,j) .* B(i,j)
469                     //----------------------------------------------------------
470 
471                     // C (i,j) = A (i,j) .* B (i,j)
472                     #if defined ( GB_PHASE_1_OF_2 )
473                     cjnz++ ;
474                     #else
475                     Ci [pC] = i ;
476                     GB_GETA (aij, Ax, pA) ;
477                     GB_GETB (bij, Bx, pB) ;
478                     GB_BINOP (GB_CX (pC), aij, bij, i, j) ;
479                     pC++ ;
480                     #endif
481                 }
482 
483                 #if defined ( GB_PHASE_2_OF_2 )
484                 ASSERT (pC == pC_end) ;
485                 #endif
486 
487             }
488             else
489             {
490 
491                 //--------------------------------------------------------------
492                 // M is bitmap or full, for either C<M>=A.*B or C<!M>=A.*B
493                 //--------------------------------------------------------------
494 
495                 //      ------------------------------------------
496                 //      C       <M>=        A       .*      B
497                 //      ------------------------------------------
498                 //      sparse  bitmap      sparse          sparse  (method: 01)
499                 //      sparse  full        sparse          sparse  (method: 01)
500 
501                 //      ------------------------------------------
502                 //      C       <!M>=       A       .*      B
503                 //      ------------------------------------------
504                 //      sparse  bitmap      sparse          sparse  (method: 01)
505                 //      sparse  full        sparse          sparse  (method: 01)
506 
507                 // GB_GET_MIJ: get M(i,j) where M is bitmap or full
508                 #undef  GB_GET_MIJ
509                 #define GB_GET_MIJ(i)                                     \
510                     int64_t pM = pM_start + i ;                           \
511                     bool mij = GBB (Mb, pM) && GB_mcast (Mx, pM, msize) ; \
512                     if (Mask_comp) mij = !mij ;
513 
514                 // both A and B are sparse/hyper
515                 ASSERT (A_is_sparse || A_is_hyper) ;
516                 ASSERT (B_is_sparse || B_is_hyper) ;
517 
518                 int64_t pM_start = j * vlen ;
519 
520                 if (ajnz > 32 * bjnz)
521                 {
522 
523                     //----------------------------------------------------------
524                     // A(:,j) much denser than B(:,j), M bitmap/full
525                     //----------------------------------------------------------
526 
527                     for ( ; pB < pB_end ; pB++)
528                     {
529                         int64_t i = Bi [pB] ;
530                         GB_GET_MIJ (i) ;
531                         if (mij)
532                         {
533                             // find i in A(:,j)
534                             int64_t pright = pA_end - 1 ;
535                             bool found ;
536                             GB_BINARY_SEARCH (i, Ai, pA, pright, found) ;
537                             if (found)
538                             {
539                                 // C (i,j) = A (i,j) .* B (i,j)
540                                 #if defined ( GB_PHASE_1_OF_2 )
541                                 cjnz++ ;
542                                 #else
543                                 ASSERT (pC < pC_end) ;
544                                 Ci [pC] = i ;
545                                 GB_GETA (aij, Ax, pA) ;
546                                 GB_GETB (bij, Bx, pB) ;
547                                 GB_BINOP (GB_CX (pC), aij, bij, i, j) ;
548                                 pC++ ;
549                                 #endif
550                             }
551                         }
552                     }
553 
554                     #if defined ( GB_PHASE_2_OF_2 )
555                     ASSERT (pC == pC_end) ;
556                     #endif
557 
558                 }
559                 else if (bjnz > 32 * ajnz)
560                 {
561 
562                     //----------------------------------------------------------
563                     // B(:,j) much denser than A(:,j), M bitmap/full
564                     //----------------------------------------------------------
565 
566                     for ( ; pA < pA_end ; pA++)
567                     {
568                         int64_t i = Ai [pA] ;
569                         GB_GET_MIJ (i) ;
570                         if (mij)
571                         {
572 
573                             // find i in B(:,j)
574                             int64_t pright = pB_end - 1 ;
575                             bool found ;
576                             GB_BINARY_SEARCH (i, Bi, pB, pright, found) ;
577                             if (found)
578                             {
579                                 // C (i,j) = A (i,j) .* B (i,j)
580                                 #if defined ( GB_PHASE_1_OF_2 )
581                                 cjnz++ ;
582                                 #else
583                                 ASSERT (pC < pC_end) ;
584                                 Ci [pC] = i ;
585                                 GB_GETA (aij, Ax, pA) ;
586                                 GB_GETB (bij, Bx, pB) ;
587                                 GB_BINOP (GB_CX (pC), aij, bij, i, j) ;
588                                 pC++ ;
589                                 #endif
590                             }
591                         }
592                     }
593 
594                     #if defined ( GB_PHASE_2_OF_2 )
595                     ASSERT (pC == pC_end) ;
596                     #endif
597 
598                 }
599                 else
600                 {
601 
602                     //----------------------------------------------------------
603                     // A(:,j) and B(:,j) about the same, M bitmap/full
604                     //----------------------------------------------------------
605 
606                     // linear-time scan of A(:,j) and B(:,j)
607 
608                     while (pA < pA_end && pB < pB_end)
609                     {
610                         int64_t iA = Ai [pA] ;
611                         int64_t iB = Bi [pB] ;
612                         if (iA < iB)
613                         {
614                             // A(i,j) exists but not B(i,j)
615                             pA++ ;
616                         }
617                         else if (iB < iA)
618                         {
619                             // B(i,j) exists but not A(i,j)
620                             pB++ ;
621                         }
622                         else
623                         {
624                             // both A(i,j) and B(i,j) exist
625                             int64_t i = iA ;
626                             GB_GET_MIJ (i) ;
627                             if (mij)
628                             {
629                                 // C (i,j) = A (i,j) .* B (i,j)
630                                 #if defined ( GB_PHASE_1_OF_2 )
631                                 cjnz++ ;
632                                 #else
633                                 ASSERT (pC < pC_end) ;
634                                 Ci [pC] = i ;
635                                 GB_GETA (aij, Ax, pA) ;
636                                 GB_GETB (bij, Bx, pB) ;
637                                 GB_BINOP (GB_CX (pC), aij, bij, iB, j) ;
638                                 pC++ ;
639                                 #endif
640                             }
641                             pA++ ;
642                             pB++ ;
643                         }
644                     }
645 
646                     #if defined ( GB_PHASE_2_OF_2 )
647                     ASSERT (pC == pC_end) ;
648                     #endif
649                 }
650             }
651 
652             //------------------------------------------------------------------
653             // final count of nnz (C (:,j))
654             //------------------------------------------------------------------
655 
656             #if defined ( GB_PHASE_1_OF_2 )
657             if (fine_task)
658             {
659                 TaskList [taskid].pC = cjnz ;
660             }
661             else
662             {
663                 Cp [k] = cjnz ;
664             }
665             #endif
666         }
667     }
668 }
669 
670