1 //------------------------------------------------------------------------------
2 // GB_subassign_emult_slice: slice the entries and vectors for GB_subassign_08n
3 //------------------------------------------------------------------------------
4 
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7 
8 //------------------------------------------------------------------------------
9 
10 // Constructs a set of tasks to compute C for GB_subassign_08n, based on
11 // slicing two input matrices (A and M).  Fine tasks must also find their
12 // location in their vector C(:,jC).
13 
14 // This method is used only by GB_subassign_08n.  New zombies cannot be
15 // created, since no entries are deleted.  Old zombies can be brought back to
16 // life, however.
17 
18         //  =====================       ==============
19         //  M   cmp rpl acc A   S       method: action
20         //  =====================       ==============
21         //  M   -   -   +   A   -       08n:  C(I,J)<M> += A, no S
22 
23 // C, M, A: not bitmap.  C can be full.
24 
25 // If C is bitmap, then GB_bitmap_assign_M_accum is used instead.
26 // If M or A are bitmap, but C is sparse or hyper, then Method 08s is used
27 // instead (which handles both M and A as bitmap).  As a result, this method
28 // does not need to consider the bitmap case for C, M, or A.
29 
30 #include "GB_subassign_methods.h"
31 #include "GB_emult.h"
32 // Npending is set to NULL by the GB_EMPTY_TASKLIST macro, but unused here.
33 #include "GB_unused.h"
34 
GB_subassign_emult_slice(GB_task_struct ** p_TaskList,size_t * p_TaskList_size,int * p_ntasks,int * p_nthreads,int64_t * p_Znvec,const int64_t * restrict * Zh_handle,int64_t * restrict * Z_to_A_handle,size_t * Z_to_A_size_handle,int64_t * restrict * Z_to_M_handle,size_t * Z_to_M_size_handle,const GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix A,const GrB_Matrix M,GB_Context Context)35 GrB_Info GB_subassign_emult_slice
36 (
37     // output:
38     GB_task_struct **p_TaskList,    // array of structs, of size max_ntasks
39     size_t *p_TaskList_size,        // size of TaskList
40     int *p_ntasks,                  // # of tasks constructed
41     int *p_nthreads,                // # of threads to use
42     int64_t *p_Znvec,               // # of vectors to compute in Z
43     const int64_t *restrict *Zh_handle,  // Zh_shallow is A->h, M->h, or NULL
44     int64_t *restrict *Z_to_A_handle,    // Z_to_A: size Znvec, or NULL
45     size_t *Z_to_A_size_handle,
46     int64_t *restrict *Z_to_M_handle,    // Z_to_M: size Znvec, or NULL
47     size_t *Z_to_M_size_handle,
48     // input:
49     const GrB_Matrix C,             // output matrix C
50     const GrB_Index *I,
51     const int64_t nI,
52     const int Ikind,
53     const int64_t Icolon [3],
54     const GrB_Index *J,
55     const int64_t nJ,
56     const int Jkind,
57     const int64_t Jcolon [3],
58     const GrB_Matrix A,             // matrix to slice
59     const GrB_Matrix M,             // matrix to slice
60     GB_Context Context
61 )
62 {
63 
64     //--------------------------------------------------------------------------
65     // check inputs
66     //--------------------------------------------------------------------------
67 
68     ASSERT (!GB_IS_BITMAP (C)) ;
69     ASSERT (!GB_IS_BITMAP (M)) ;    // Method 08n is not used for M bitmap
70     ASSERT (!GB_IS_BITMAP (A)) ;    // Method 08n is not used for A bitmap
71 
72     GB_EMPTY_TASKLIST
73     ASSERT (p_TaskList != NULL) ;
74     ASSERT (p_ntasks != NULL) ;
75     ASSERT (p_nthreads != NULL) ;
76     ASSERT_MATRIX_OK (C, "C for emult_slice", GB0) ;
77     ASSERT_MATRIX_OK (M, "M for emult_slice", GB0) ;
78     ASSERT_MATRIX_OK (A, "A for emult_slice", GB0) ;
79 
80     ASSERT (!GB_JUMBLED (C)) ;
81     ASSERT (!GB_JUMBLED (M)) ;
82     ASSERT (!GB_JUMBLED (A)) ;
83 
84     ASSERT (p_Znvec != NULL) ;
85     ASSERT (Zh_handle != NULL) ;
86     ASSERT (Z_to_A_handle != NULL) ;
87     ASSERT (Z_to_M_handle != NULL) ;
88 
89     (*p_TaskList  ) = NULL ;
90     (*p_TaskList_size) = 0 ;
91     (*p_ntasks    ) = 0 ;
92     (*p_nthreads  ) = 1 ;
93 
94     (*p_Znvec      ) = 0 ;
95     (*Zh_handle    ) = NULL ;
96     (*Z_to_A_handle) = NULL ;
97     (*Z_to_M_handle) = NULL ;
98 
99     //--------------------------------------------------------------------------
100     // get inputs
101     //--------------------------------------------------------------------------
102 
103     int64_t *restrict Ci = C->i ;
104     int64_t nzombies = C->nzombies ;
105     const int64_t Cnvec = C->nvec ;
106     const int64_t Cvlen = C->vlen ;
107     const int64_t *restrict Ch = C->h ;
108     const int64_t *restrict Cp = C->p ;
109     const bool C_is_hyper = (Ch != NULL) ;
110 
111     const int64_t *restrict Mp = M->p ;
112     const int64_t *restrict Mh = M->h ;
113     const int64_t *restrict Mi = M->i ;
114     const int64_t Mvlen = M->vlen ;
115 
116     const int64_t *restrict Ap = A->p ;
117     const int64_t *restrict Ah = A->h ;
118     const int64_t *restrict Ai = A->i ;
119     const int64_t Avlen = A->vlen ;
120 
121     //--------------------------------------------------------------------------
122     // construct fine/coarse tasks for eWise multiply of A.*M
123     //--------------------------------------------------------------------------
124 
125     // Compare with the first part of GB_emult for A.*B.  Note that M in this
126     // function takes the place of B in GB_emult.
127 
128     int64_t Znvec ;
129     const int64_t *restrict Zh_shallow = NULL ;
130     int Z_sparsity = GxB_SPARSE ;
131     GB_OK (GB_emult_01_phase0 (&Znvec, &Zh_shallow, &Zh_size, NULL, NULL,
132         &Z_to_A, &Z_to_A_size, &Z_to_M, &Z_to_M_size, &Z_sparsity, NULL, A, M,
133         Context)) ;
134 
135     // Z is still sparse or hypersparse, not bitmap or full
136     ASSERT (Z_sparsity == GxB_SPARSE || Z_sparsity == GxB_HYPERSPARSE) ;
137 
138     GB_OK (GB_ewise_slice (
139         &TaskList, &TaskList_size, &ntasks, &nthreads,
140         Znvec, Zh_shallow, NULL, Z_to_A, Z_to_M, false,
141         NULL, A, M, Context)) ;
142 
143     //--------------------------------------------------------------------------
144     // slice C(:,jC) for each fine task
145     //--------------------------------------------------------------------------
146 
147     // Each fine task that operates on C(:,jC) must be limited to just its
148     // portion of C(:,jC).  Otherwise, one task could bring a zombie to life,
149     // at the same time another is attempting to do a binary search on that
150     // entry.  This is safe as long as a 64-bit integer read/write is always
151     // atomic, but there is no gaurantee that this is true for all
152     // architectures.  Note that GB_subassign_08n cannot create new zombies.
153 
154     // This work could be done in parallel, but each task does at most 2 binary
155     // searches.  The total work for all the binary searches will likely be
156     // small.  So do the work with a single thread.
157 
158     for (taskid = 0 ; taskid < ntasks ; taskid++)
159     {
160 
161         //----------------------------------------------------------------------
162         // get the task descriptor
163         //----------------------------------------------------------------------
164 
165         GB_GET_TASK_DESCRIPTOR ;
166 
167         //----------------------------------------------------------------------
168         // do the binary search for this fine task
169         //----------------------------------------------------------------------
170 
171         if (fine_task)
172         {
173 
174             //------------------------------------------------------------------
175             // get A(:,j) and M(:,j)
176             //------------------------------------------------------------------
177 
178             int64_t k = kfirst ;
179             int64_t j = GBH (Zh_shallow, k) ;
180             GB_GET_EVEC (pA, pA_end, pA, pA_end, Ap, Ah, j, k, Z_to_A, Avlen) ;
181             GB_GET_EVEC (pM, pM_end, pB, pB_end, Mp, Mh, j, k, Z_to_M, Mvlen) ;
182 
183             //------------------------------------------------------------------
184             // quick checks for empty intersection of A(:,j) and M(:,j)
185             //------------------------------------------------------------------
186 
187             int64_t ajnz = pA_end - pA ;
188             int64_t mjnz = pM_end - pM ;
189             if (ajnz == 0 || mjnz == 0) continue ;
190             int64_t iA_first = GBI (Ai, pA, Avlen) ;
191             int64_t iA_last  = GBI (Ai, pA_end-1, Avlen) ;
192             int64_t iM_first = GBI (Mi, pM, Mvlen) ;
193             int64_t iM_last  = GBI (Mi, pM_end-1, Mvlen) ;
194             if (iA_last < iM_first || iM_last < iA_first) continue ;
195 
196             //------------------------------------------------------------------
197             // get jC, the corresponding vector of C
198             //------------------------------------------------------------------
199 
200             int64_t GB_LOOKUP_jC ;
201             bool cjdense = (pC_end - pC_start == Cvlen) ;
202 
203             //------------------------------------------------------------------
204             // slice C(:,jC) for this fine task
205             //------------------------------------------------------------------
206 
207             if (cjdense)
208             {
209                 // do not slice C(:,jC) if it is dense
210                 TaskList [taskid].pC     = pC_start ;
211                 TaskList [taskid].pC_end = pC_end ;
212             }
213             else
214             {
215                 // find where this task starts and ends in C(:,jC)
216                 int64_t iA_start = GB_IMIN (iA_first, iM_first) ;
217                 int64_t iC1 = GB_ijlist (I, iA_start, Ikind, Icolon) ;
218                 int64_t iA_end = GB_IMAX (iA_last, iM_last) ;
219                 int64_t iC2 = GB_ijlist (I, iA_end, Ikind, Icolon) ;
220 
221                 // If I is an explicit list, it must be already sorted
222                 // in ascending order, and thus iC1 <= iC2.  If I is
223                 // GB_ALL or GB_STRIDE with inc >= 0, then iC1 < iC2.
224                 // But if inc < 0, then iC1 > iC2.  iC_start and iC_end
225                 // are used for a binary search bracket, so iC_start <=
226                 // iC_end must hold.
227                 int64_t iC_start = GB_IMIN (iC1, iC2) ;
228                 int64_t iC_end   = GB_IMAX (iC1, iC2) ;
229 
230                 // this task works on Ci,Cx [pC:pC_end-1]
231                 int64_t pleft = pC_start ;
232                 int64_t pright = pC_end - 1 ;
233                 bool found, is_zombie ;
234                 GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_start, Ci, pleft, pright,
235                     found, nzombies, is_zombie) ;
236                 TaskList [taskid].pC = pleft ;
237 
238                 pleft = pC_start ;
239                 pright = pC_end - 1 ;
240                 GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_end, Ci, pleft, pright,
241                     found, nzombies, is_zombie) ;
242                 TaskList [taskid].pC_end = (found) ? (pleft+1) : pleft ;
243             }
244 
245             ASSERT (TaskList [taskid].pC <= TaskList [taskid].pC_end) ;
246         }
247     }
248 
249     //--------------------------------------------------------------------------
250     // return result
251     //--------------------------------------------------------------------------
252 
253     (*p_TaskList  ) = TaskList ;
254     (*p_TaskList_size) = TaskList_size ;
255     (*p_ntasks    ) = ntasks ;
256     (*p_nthreads  ) = nthreads ;
257 
258     (*p_Znvec      ) = Znvec ;
259     (*Zh_handle    ) = Zh_shallow ;
260     (*Z_to_A_handle) = Z_to_A ; (*Z_to_A_size_handle) = Z_to_A_size ;
261     (*Z_to_M_handle) = Z_to_M ; (*Z_to_M_size_handle) = Z_to_M_size ;
262 
263     return (GrB_SUCCESS) ;
264 }
265 
266