1 //------------------------------------------------------------------------------
2 // GB_subassign_emult_slice: slice the entries and vectors for GB_subassign_08n
3 //------------------------------------------------------------------------------
4
5 // SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
6 // SPDX-License-Identifier: Apache-2.0
7
8 //------------------------------------------------------------------------------
9
10 // Constructs a set of tasks to compute C for GB_subassign_08n, based on
11 // slicing two input matrices (A and M). Fine tasks must also find their
12 // location in their vector C(:,jC).
13
14 // This method is used only by GB_subassign_08n. New zombies cannot be
15 // created, since no entries are deleted. Old zombies can be brought back to
16 // life, however.
17
18 // ===================== ==============
19 // M cmp rpl acc A S method: action
20 // ===================== ==============
21 // M - - + A - 08n: C(I,J)<M> += A, no S
22
23 // C, M, A: not bitmap. C can be full.
24
25 // If C is bitmap, then GB_bitmap_assign_M_accum is used instead.
26 // If M or A are bitmap, but C is sparse or hyper, then Method 08s is used
27 // instead (which handles both M and A as bitmap). As a result, this method
28 // does not need to consider the bitmap case for C, M, or A.
29
30 #include "GB_subassign_methods.h"
31 #include "GB_emult.h"
32 // Npending is set to NULL by the GB_EMPTY_TASKLIST macro, but unused here.
33 #include "GB_unused.h"
34
GB_subassign_emult_slice(GB_task_struct ** p_TaskList,size_t * p_TaskList_size,int * p_ntasks,int * p_nthreads,int64_t * p_Znvec,const int64_t * restrict * Zh_handle,int64_t * restrict * Z_to_A_handle,size_t * Z_to_A_size_handle,int64_t * restrict * Z_to_M_handle,size_t * Z_to_M_size_handle,const GrB_Matrix C,const GrB_Index * I,const int64_t nI,const int Ikind,const int64_t Icolon[3],const GrB_Index * J,const int64_t nJ,const int Jkind,const int64_t Jcolon[3],const GrB_Matrix A,const GrB_Matrix M,GB_Context Context)35 GrB_Info GB_subassign_emult_slice
36 (
37 // output:
38 GB_task_struct **p_TaskList, // array of structs, of size max_ntasks
39 size_t *p_TaskList_size, // size of TaskList
40 int *p_ntasks, // # of tasks constructed
41 int *p_nthreads, // # of threads to use
42 int64_t *p_Znvec, // # of vectors to compute in Z
43 const int64_t *restrict *Zh_handle, // Zh_shallow is A->h, M->h, or NULL
44 int64_t *restrict *Z_to_A_handle, // Z_to_A: size Znvec, or NULL
45 size_t *Z_to_A_size_handle,
46 int64_t *restrict *Z_to_M_handle, // Z_to_M: size Znvec, or NULL
47 size_t *Z_to_M_size_handle,
48 // input:
49 const GrB_Matrix C, // output matrix C
50 const GrB_Index *I,
51 const int64_t nI,
52 const int Ikind,
53 const int64_t Icolon [3],
54 const GrB_Index *J,
55 const int64_t nJ,
56 const int Jkind,
57 const int64_t Jcolon [3],
58 const GrB_Matrix A, // matrix to slice
59 const GrB_Matrix M, // matrix to slice
60 GB_Context Context
61 )
62 {
63
64 //--------------------------------------------------------------------------
65 // check inputs
66 //--------------------------------------------------------------------------
67
68 ASSERT (!GB_IS_BITMAP (C)) ;
69 ASSERT (!GB_IS_BITMAP (M)) ; // Method 08n is not used for M bitmap
70 ASSERT (!GB_IS_BITMAP (A)) ; // Method 08n is not used for A bitmap
71
72 GB_EMPTY_TASKLIST
73 ASSERT (p_TaskList != NULL) ;
74 ASSERT (p_ntasks != NULL) ;
75 ASSERT (p_nthreads != NULL) ;
76 ASSERT_MATRIX_OK (C, "C for emult_slice", GB0) ;
77 ASSERT_MATRIX_OK (M, "M for emult_slice", GB0) ;
78 ASSERT_MATRIX_OK (A, "A for emult_slice", GB0) ;
79
80 ASSERT (!GB_JUMBLED (C)) ;
81 ASSERT (!GB_JUMBLED (M)) ;
82 ASSERT (!GB_JUMBLED (A)) ;
83
84 ASSERT (p_Znvec != NULL) ;
85 ASSERT (Zh_handle != NULL) ;
86 ASSERT (Z_to_A_handle != NULL) ;
87 ASSERT (Z_to_M_handle != NULL) ;
88
89 (*p_TaskList ) = NULL ;
90 (*p_TaskList_size) = 0 ;
91 (*p_ntasks ) = 0 ;
92 (*p_nthreads ) = 1 ;
93
94 (*p_Znvec ) = 0 ;
95 (*Zh_handle ) = NULL ;
96 (*Z_to_A_handle) = NULL ;
97 (*Z_to_M_handle) = NULL ;
98
99 //--------------------------------------------------------------------------
100 // get inputs
101 //--------------------------------------------------------------------------
102
103 int64_t *restrict Ci = C->i ;
104 int64_t nzombies = C->nzombies ;
105 const int64_t Cnvec = C->nvec ;
106 const int64_t Cvlen = C->vlen ;
107 const int64_t *restrict Ch = C->h ;
108 const int64_t *restrict Cp = C->p ;
109 const bool C_is_hyper = (Ch != NULL) ;
110
111 const int64_t *restrict Mp = M->p ;
112 const int64_t *restrict Mh = M->h ;
113 const int64_t *restrict Mi = M->i ;
114 const int64_t Mvlen = M->vlen ;
115
116 const int64_t *restrict Ap = A->p ;
117 const int64_t *restrict Ah = A->h ;
118 const int64_t *restrict Ai = A->i ;
119 const int64_t Avlen = A->vlen ;
120
121 //--------------------------------------------------------------------------
122 // construct fine/coarse tasks for eWise multiply of A.*M
123 //--------------------------------------------------------------------------
124
125 // Compare with the first part of GB_emult for A.*B. Note that M in this
126 // function takes the place of B in GB_emult.
127
128 int64_t Znvec ;
129 const int64_t *restrict Zh_shallow = NULL ;
130 int Z_sparsity = GxB_SPARSE ;
131 GB_OK (GB_emult_01_phase0 (&Znvec, &Zh_shallow, &Zh_size, NULL, NULL,
132 &Z_to_A, &Z_to_A_size, &Z_to_M, &Z_to_M_size, &Z_sparsity, NULL, A, M,
133 Context)) ;
134
135 // Z is still sparse or hypersparse, not bitmap or full
136 ASSERT (Z_sparsity == GxB_SPARSE || Z_sparsity == GxB_HYPERSPARSE) ;
137
138 GB_OK (GB_ewise_slice (
139 &TaskList, &TaskList_size, &ntasks, &nthreads,
140 Znvec, Zh_shallow, NULL, Z_to_A, Z_to_M, false,
141 NULL, A, M, Context)) ;
142
143 //--------------------------------------------------------------------------
144 // slice C(:,jC) for each fine task
145 //--------------------------------------------------------------------------
146
147 // Each fine task that operates on C(:,jC) must be limited to just its
148 // portion of C(:,jC). Otherwise, one task could bring a zombie to life,
149 // at the same time another is attempting to do a binary search on that
150 // entry. This is safe as long as a 64-bit integer read/write is always
151 // atomic, but there is no gaurantee that this is true for all
152 // architectures. Note that GB_subassign_08n cannot create new zombies.
153
154 // This work could be done in parallel, but each task does at most 2 binary
155 // searches. The total work for all the binary searches will likely be
156 // small. So do the work with a single thread.
157
158 for (taskid = 0 ; taskid < ntasks ; taskid++)
159 {
160
161 //----------------------------------------------------------------------
162 // get the task descriptor
163 //----------------------------------------------------------------------
164
165 GB_GET_TASK_DESCRIPTOR ;
166
167 //----------------------------------------------------------------------
168 // do the binary search for this fine task
169 //----------------------------------------------------------------------
170
171 if (fine_task)
172 {
173
174 //------------------------------------------------------------------
175 // get A(:,j) and M(:,j)
176 //------------------------------------------------------------------
177
178 int64_t k = kfirst ;
179 int64_t j = GBH (Zh_shallow, k) ;
180 GB_GET_EVEC (pA, pA_end, pA, pA_end, Ap, Ah, j, k, Z_to_A, Avlen) ;
181 GB_GET_EVEC (pM, pM_end, pB, pB_end, Mp, Mh, j, k, Z_to_M, Mvlen) ;
182
183 //------------------------------------------------------------------
184 // quick checks for empty intersection of A(:,j) and M(:,j)
185 //------------------------------------------------------------------
186
187 int64_t ajnz = pA_end - pA ;
188 int64_t mjnz = pM_end - pM ;
189 if (ajnz == 0 || mjnz == 0) continue ;
190 int64_t iA_first = GBI (Ai, pA, Avlen) ;
191 int64_t iA_last = GBI (Ai, pA_end-1, Avlen) ;
192 int64_t iM_first = GBI (Mi, pM, Mvlen) ;
193 int64_t iM_last = GBI (Mi, pM_end-1, Mvlen) ;
194 if (iA_last < iM_first || iM_last < iA_first) continue ;
195
196 //------------------------------------------------------------------
197 // get jC, the corresponding vector of C
198 //------------------------------------------------------------------
199
200 int64_t GB_LOOKUP_jC ;
201 bool cjdense = (pC_end - pC_start == Cvlen) ;
202
203 //------------------------------------------------------------------
204 // slice C(:,jC) for this fine task
205 //------------------------------------------------------------------
206
207 if (cjdense)
208 {
209 // do not slice C(:,jC) if it is dense
210 TaskList [taskid].pC = pC_start ;
211 TaskList [taskid].pC_end = pC_end ;
212 }
213 else
214 {
215 // find where this task starts and ends in C(:,jC)
216 int64_t iA_start = GB_IMIN (iA_first, iM_first) ;
217 int64_t iC1 = GB_ijlist (I, iA_start, Ikind, Icolon) ;
218 int64_t iA_end = GB_IMAX (iA_last, iM_last) ;
219 int64_t iC2 = GB_ijlist (I, iA_end, Ikind, Icolon) ;
220
221 // If I is an explicit list, it must be already sorted
222 // in ascending order, and thus iC1 <= iC2. If I is
223 // GB_ALL or GB_STRIDE with inc >= 0, then iC1 < iC2.
224 // But if inc < 0, then iC1 > iC2. iC_start and iC_end
225 // are used for a binary search bracket, so iC_start <=
226 // iC_end must hold.
227 int64_t iC_start = GB_IMIN (iC1, iC2) ;
228 int64_t iC_end = GB_IMAX (iC1, iC2) ;
229
230 // this task works on Ci,Cx [pC:pC_end-1]
231 int64_t pleft = pC_start ;
232 int64_t pright = pC_end - 1 ;
233 bool found, is_zombie ;
234 GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_start, Ci, pleft, pright,
235 found, nzombies, is_zombie) ;
236 TaskList [taskid].pC = pleft ;
237
238 pleft = pC_start ;
239 pright = pC_end - 1 ;
240 GB_SPLIT_BINARY_SEARCH_ZOMBIE (iC_end, Ci, pleft, pright,
241 found, nzombies, is_zombie) ;
242 TaskList [taskid].pC_end = (found) ? (pleft+1) : pleft ;
243 }
244
245 ASSERT (TaskList [taskid].pC <= TaskList [taskid].pC_end) ;
246 }
247 }
248
249 //--------------------------------------------------------------------------
250 // return result
251 //--------------------------------------------------------------------------
252
253 (*p_TaskList ) = TaskList ;
254 (*p_TaskList_size) = TaskList_size ;
255 (*p_ntasks ) = ntasks ;
256 (*p_nthreads ) = nthreads ;
257
258 (*p_Znvec ) = Znvec ;
259 (*Zh_handle ) = Zh_shallow ;
260 (*Z_to_A_handle) = Z_to_A ; (*Z_to_A_size_handle) = Z_to_A_size ;
261 (*Z_to_M_handle) = Z_to_M ; (*Z_to_M_size_handle) = Z_to_M_size ;
262
263 return (GrB_SUCCESS) ;
264 }
265
266