1 /* ========================================================================== */
2 /* === ssmult_template.c ==================================================== */
3 /* ========================================================================== */
4 
5 /* C = A*B, where A and B are sparse.  The column pointers for C (the Cp array)
6  * have already been computed.  entries are dropped.  This code fragment is
7  * #include'd into ssmult.c four times, with all four combinations of ACOMPLEX
8  * (defined or not) and BCOMPLEX (defined or not).
9  *
10  * By default, C is returned with sorted column indices, and with explicit
11  * zero entries dropped.  If C is complex with an all-zero imaginary part, then
12  * the imaginary part is freed and C becomes real.  Thus, C is a pure MATLAB
13  * sparse matrix.
14  *
15  * If UNSORTED is defined (-DUNSORTED), then the nonzero pattern of C is
16  * returned with unsorted column indices.  This is much faster than returning a
17  * pure MATLAB sparse matrix, but the result must eventually be sorted prior to
18  * returning to MATLAB.
19  *
20  * If the compiler bug discussed below does not affect you, then uncomment the
21  * following line, or compile with code with -DNO_GCC_BUG.
22 
23 #define NO_GCC_BUG
24 
25  * The gcc bug occurs when cij underflows to zero:
26  *
27  *      cij = aik * bkj ;
28  *      if (cij == 0)
29  *      {
30  *          drop this entry
31  *      }
32  *
33  * If cij underflows, cij is zero but the above test is incorrectly FALSE with
34  * gcc -O, using gcc version 4.1.0 on an Intel Pentium.  The bug does not appear
35  * on an AMD Opteron with the same compiler.  The solution is to store cij to
36  * memory first, and then to read it back in and test it, which is slower.
37  */
38 
39 /* -------------------------------------------------------------------------- */
40 /* MULT: multiply (or multiply and accumulate, depending on op) */
41 /* -------------------------------------------------------------------------- */
42 
43 /* op can be "=" or "+=" */
44 
45 #ifdef ACOMPLEX
46 #ifdef BCOMPLEX
47 #define MULT(x,z,op) \
48     azik = (ac ? (-Az [pa]) : (Az [pa])) ; \
49     x op Ax [pa] * bkj - azik * bzkj ; \
50     z op azik * bkj + Ax [pa] * bzkj ;
51 #else
52 #define MULT(x,z,op) \
53     azik = (ac ? (-Az [pa]) : (Az [pa])) ; \
54     x op Ax [pa] * bkj ; \
55     z op azik * bkj ;
56 #endif
57 #else
58 #ifdef BCOMPLEX
59 #define MULT(x,z,op) \
60     x op Ax [pa] * bkj ; \
61     z op Ax [pa] * bzkj ;
62 #else
63 #define MULT(x,z,op) \
64     x op Ax [pa] * bkj ;
65 #endif
66 #endif
67 
68 /* -------------------------------------------------------------------------- */
69 /* ASSIGN_BKJ: copy B(k,j) into a local scalar */
70 /* -------------------------------------------------------------------------- */
71 
72 #ifdef BCOMPLEX
73 #define ASSIGN_BKJ \
74     bkj = Bx [pb] ; \
75     bzkj = (bc ? (-Bz [pb]) : (Bz [pb])) ;
76 #else
77 #define ASSIGN_BKJ \
78     bkj = Bx [pb] ;
79 #endif
80 
81 /* -------------------------------------------------------------------------- */
82 /* DROP_CHECK: check if an entry must be dropped */
83 /* -------------------------------------------------------------------------- */
84 
85 #if defined (ACOMPLEX) || defined (BCOMPLEX)
86 #define DROP_CHECK(x,z) \
87     if (x == 0 && z == 0) drop = 1 ; \
88     if (z != 0) zallzero = 0 ;
89 #else
90 #define DROP_CHECK(x,z) if (x == 0) drop = 1 ;
91 #endif
92 
93 /* -------------------------------------------------------------------------- */
94 /* sparse matrix multiply template */
95 /* -------------------------------------------------------------------------- */
96 
97 {
98 
99 #ifdef ACOMPLEX
100     double azik ;
101 #endif
102 
103     /* ---------------------------------------------------------------------- */
104     /* initialize drop tests */
105     /* ---------------------------------------------------------------------- */
106 
107     drop = 0 ;                  /* true if any entry in C is zero */
108     zallzero = 1 ;              /* true if Cz is all zero */
109 
110     /* ---------------------------------------------------------------------- */
111     /* quick check if A is diagonal, or a permutation matrix */
112     /* ---------------------------------------------------------------------- */
113 
114     if (Anrow == Ancol && Ap [Ancol] == Ancol)
115     {
116         /* A is square, with n == nnz (A); check the pattern */
117         A_is_permutation = 1 ;
118         A_is_diagonal = 1 ;
119         for (j = 0 ; j < Ancol ; j++)
120         {
121             if (Ap [j] != j)
122             {
123                 /* A has a column with no entries, or more than 1 entry */
124                 A_is_permutation = 0 ;
125                 A_is_diagonal = 0 ;
126                 break ;
127             }
128         }
129         mark-- ;                        /* Flag [0..n-1] != mark is now true */
130         for (j = 0 ; j < Ancol && (A_is_permutation || A_is_diagonal) ; j++)
131         {
132             /* A has one entry in each column, so j == Ap [j] */
133             i = Ai [j] ;
134             if (i != j)
135             {
136                 /* A is not diagonal, but might still be a permutation */
137                 A_is_diagonal = 0 ;
138             }
139             if (Flag [i] == mark)
140             {
141                 /* row i appears twice; A is neither permutation nor diagonal */
142                 A_is_permutation = 0 ;
143                 A_is_diagonal = 0 ;
144             }
145             /* mark row i, so we know if we see it again */
146             Flag [i] = mark ;
147         }
148     }
149     else
150     {
151         /* A is not square, or nnz (A) is not equal to n */
152         A_is_permutation = 0 ;
153         A_is_diagonal = 0 ;
154     }
155 
156     /* ---------------------------------------------------------------------- */
157     /* allocate workspace */
158     /* ---------------------------------------------------------------------- */
159 
160 #ifndef UNSORTED
161     W = NULL ;
162     if (!A_is_diagonal)
163     {
164 #if defined (ACOMPLEX) || defined (BCOMPLEX)
165         W = mxMalloc (Anrow * 2 * sizeof (double)) ;
166         Wz = W + Anrow ;
167 #else
168         W = mxMalloc (Anrow * sizeof (double)) ;
169 #endif
170     }
171 #endif
172 
173     /* ---------------------------------------------------------------------- */
174     /* compute C one column at a time */
175     /* ---------------------------------------------------------------------- */
176 
177     if (A_is_diagonal)
178     {
179 
180         /* ------------------------------------------------------------------ */
181         /* C = A*B where A is diagonal */
182         /* ------------------------------------------------------------------ */
183 
184         pb = 0 ;
185         for (j = 0 ; j < Bncol ; j++)
186         {
187             pcstart = pb ;
188             pbend = Bp [j+1] ;  /* column B is in Bi,Bx,Bz [pb ... pbend+1] */
189             for ( ; pb < pbend ; pb++)
190             {
191                 k = Bi [pb] ;                   /* nonzero entry B(k,j) */
192                 ASSIGN_BKJ ;
193                 Ci [pb] = k ;
194                 pa = k ;
195                 MULT (Cx [pb], Cz [pb], =) ;    /* C(k,j) = A(k,k)*B(k,j) */
196 #ifdef NO_GCC_BUG
197                 DROP_CHECK (Cx [pb], Cz [pb]) ; /* check if C(k,j) == 0 */
198 #endif
199             }
200 
201 #ifndef NO_GCC_BUG
202             for (pc = pcstart ; pc < pbend ; pc++)
203             {
204                 DROP_CHECK (Cx [pc], Cz [pc]) ;   /* check if C(k,j) == 0 */
205             }
206 #endif
207         }
208 
209     }
210     else
211     {
212 
213         /* ------------------------------------------------------------------ */
214         /* C = A*B, general case, or A permutation */
215         /* ------------------------------------------------------------------ */
216 
217         pb = 0 ;
218         cnz = 0 ;
219         for (j = 0 ; j < Bncol ; j++)
220         {
221 
222             /* -------------------------------------------------------------- */
223             /* compute jth column of C: C(:,j) = A * B(:,j) */
224             /* -------------------------------------------------------------- */
225 
226             pbend = Bp [j+1] ;  /* column B is in Bi,Bx,Bz [pb ... pbend+1] */
227             pcstart = cnz ;     /* start of column j in C */
228             blen = pbend - pb ; /* number of entries in B */
229             needs_sorting = 0 ; /* true if column j needs sorting */
230 
231             if (blen == 0)
232             {
233 
234                 /* ---------------------------------------------------------- */
235                 /* nothing to do, B(:,j) and C(:,j) are empty */
236                 /* ---------------------------------------------------------- */
237 
238                 continue ;
239 
240             }
241             else if (blen == 1)
242             {
243 
244                 /* ---------------------------------------------------------- */
245                 /* B(:,j) contains only one nonzero */
246                 /* ---------------------------------------------------------- */
247 
248                 /* since there is only one entry in B, just scale column A(:,k):
249                  * C(:,j) = A(:,k) * B(k,j)
250                  * C is sorted only if A is sorted on input */
251 
252                 k = Bi [pb] ;                   /* nonzero entry B(k,j) */
253                 ASSIGN_BKJ ;
254                 paend = Ap [k+1] ;
255                 for (pa = Ap [k] ; pa < paend ; pa++, cnz++)
256                 {
257                     Ci [cnz] = Ai [pa] ;            /* nonzero entry A(i,k) */
258                     MULT (Cx [cnz], Cz [cnz], =) ;  /* C(i,j) = A(i,k)*B(k,j) */
259 #ifdef NO_GCC_BUG
260                     DROP_CHECK (Cx [cnz], Cz [cnz]) ;   /* check C(i,j) == 0 */
261 #endif
262                 }
263                 pb++ ;
264 
265 #ifndef NO_GCC_BUG
266                 for (pc = pcstart ; pc < cnz ; pc++)
267                 {
268                     DROP_CHECK (Cx [pc], Cz [pc]) ;   /* check if C(i,j) == 0 */
269                 }
270 #endif
271 
272             }
273             else
274             {
275 
276                 /* ---------------------------------------------------------- */
277                 /* B(:,j) has two or more entries */
278                 /* ---------------------------------------------------------- */
279 
280                 if (A_is_permutation)
281                 {
282 
283                     /* ------------------------------------------------------ */
284                     /* A is a permutation matrix */
285                     /* ------------------------------------------------------ */
286 
287                     needs_sorting = 1 ;
288                     for ( ; pb < pbend ; pb++)
289                     {
290                         k = Bi [pb] ;           /* nonzero entry B(k,j) */
291                         ASSIGN_BKJ ;
292                         i = Ai [k] ;            /* nonzero entry A(i,k) */
293                         Ci [pb] = i ;
294                         pa = k ;
295                         /* C(i,j) = A(i,k)*B(k,j) */
296 #ifndef UNSORTED
297                         MULT (W [i], Wz [i], =) ;
298 #else
299                         MULT (Cx [pb], Cz [pb], =) ;
300 #endif
301                     }
302                     cnz = pbend ;
303 
304                 }
305                 else
306                 {
307 
308                     /* ------------------------------------------------------ */
309                     /* general case */
310                     /* ------------------------------------------------------ */
311 
312                     /* first entry in jth column of B is simpler */
313                     /* C(:,j) = A (:,k) * B (k,j) */
314                     k = Bi [pb] ;                   /* nonzero entry B(k,j) */
315                     ASSIGN_BKJ ;
316                     paend = Ap [k+1] ;
317                     for (pa = Ap [k] ; pa < paend ; pa++)
318                     {
319                         i = Ai [pa] ;               /* nonzero entry A(i,k) */
320                         Flag [i] = cnz ;
321                         Ci [cnz] = i ;              /* new entry C(i,j) */
322                         /* C(i,j) = A(i,k)*B(k,j) */
323 #ifndef UNSORTED
324                         MULT (W [i], Wz [i], =) ;
325 #else
326                         MULT (Cx [cnz], Cz [cnz], =) ;
327 #endif
328                         cnz++ ;
329                     }
330                     pb++ ;
331                     for ( ; pb < pbend ; pb++)
332                     {
333                         k = Bi [pb] ;               /* nonzero entry B(k,j) */
334                         ASSIGN_BKJ ;
335                         /* C(:,j) += A (:,k) * B (k,j) */
336                         paend = Ap [k+1] ;
337                         for (pa = Ap [k] ; pa < paend ; pa++)
338                         {
339                             i = Ai [pa] ;           /* nonzero entry A(i,k) */
340                             pc = Flag [i] ;
341                             if (pc < pcstart)
342                             {
343                                 pc = cnz++ ;
344                                 Flag [i] = pc ;
345                                 Ci [pc] = i ;           /* new entry C(i,j) */
346                                 /* C(i,j) = A(i,k)*B(k,j) */
347 #ifndef UNSORTED
348                                 MULT (W [i], Wz [i], =) ;
349                                 needs_sorting = 1 ;
350 #else
351                                 MULT (Cx [pc], Cz [pc], =) ;
352 #endif
353                             }
354                             else
355                             {
356                                 /* C(i,j) += A(i,k)*B(k,j) */
357 #ifndef UNSORTED
358                                 MULT (W [i], Wz [i], +=) ;
359 #else
360                                 MULT (Cx [pc], Cz [pc], +=) ;
361 #endif
362                             }
363                         }
364                     }
365                 }
366 
367                 /* ---------------------------------------------------------- */
368                 /* sort the pattern of C(:,j) and gather the values of C(:,j) */
369                 /* ---------------------------------------------------------- */
370 
371 #ifndef UNSORTED
372                 /* Sort the row indices in C(:,j).  Use Cx as Int workspace.
373                  * This assumes sizeof (Int) < sizeof (double). If blen <= 1,
374                  * or if subsequent entries in B(:,j) appended entries onto C,
375                  * there is no need to sort C(:,j), assuming A is sorted. */
376                 if (needs_sorting)
377                 {
378                     ssmergesort (Ci + pcstart, (Int *) (Cx + pcstart),
379                         cnz - pcstart) ;
380                 }
381                 for (pc = pcstart ; pc < cnz ; pc++)
382                 {
383 #if defined (ACOMPLEX) || defined (BCOMPLEX)
384                     i = Ci [pc] ;
385                     cij = W [i] ;                   /* get C(i,j) from W */
386                     czij = Wz [i] ;
387                     Cx [pc] = cij ;                 /* copy C(i,j) into C */
388                     Cz [pc] = czij ;
389 #else
390                     cij = W [Ci [pc]] ;             /* get C(i,j) from W */
391                     Cx [pc] = cij ;                 /* copy C(i,j) into C */
392 #endif
393                     DROP_CHECK (cij, czij) ;        /* check if C(i,j) == 0 */
394                 }
395 #else
396                 /* no need to sort, but we do need to check for drop */
397                 for (pc = pcstart ; pc < cnz ; pc++)
398                 {
399                     DROP_CHECK (Cx [pc], Cz [pc]) ; /* check if C(i,j) == 0 */
400                 }
401 #endif
402             }
403         }
404     }
405 
406     /* ---------------------------------------------------------------------- */
407     /* free workspace */
408     /* ---------------------------------------------------------------------- */
409 
410 #ifndef UNSORTED
411     mxFree (W) ;
412 #endif
413 
414 
415 }
416 
417 #undef ACOMPLEX
418 #undef BCOMPLEX
419 #undef MULT
420 #undef ASSIGN_BKJ
421 #undef DROP_CHECK
422