1function codegen_axb_method (addop, multop, add, addfunc, mult, ztype, ...
2    xytype, identity, terminal, omp_atomic, omp_microsoft_atomic)
3%CODEGEN_AXB_METHOD create a function to compute C=A*B over a semiring
4%
5% codegen_axb_method (addop, multop, add, addfunc, mult, ztype, xytype, identity, terminal, omp_atomic, omp_microsoft_atomic)
6
7% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved.
8% SPDX-License-Identifier: Apache-2.0
9
10if (isempty (mult))
11    return
12end
13
14f = fopen ('control.m4', 'w') ;
15
16is_first  = false ;
17is_second = false ;
18is_pair   = false ;
19is_positional = false ;
20switch (multop)
21    case { 'firsti', 'firsti1', 'firstj', 'firstj1', 'secondj', 'secondj1' }
22        is_positional = true ;
23    case { 'first' }
24        is_first = true ;
25    case { 'second' }
26        is_second = true ;
27    case { 'pair' }
28        is_pair = true ;
29end
30
31is_any = isequal (addop, 'any') ;
32is_max = isequal (addop, 'max') ;
33is_min = isequal (addop, 'min') ;
34is_eq  = isequal (addop, 'eq') ;
35is_any_pair = is_any && isequal (multop, 'pair') ;
36ztype_is_real = ~contains (ztype, 'FC') ;
37is_any_complex = is_any && ~ztype_is_real ;
38is_plus_pair_real = isequal (addop, 'plus') && isequal (multop, 'pair') ...
39    && ztype_is_real ;
40
41t_is_simple = isequal (multop, 'pair') || contains (multop, 'first') || contains (multop, 'second') ;
42t_is_nonnan = isequal (multop (1:2), 'is') || (multop (1) == 'l') ;
43
44switch (ztype)
45    case { 'bool' }
46        ztype_is_float = false ;
47        ztype_ignore_overflow = false ;
48        nbits = 8 ;
49        bits = '0x1L' ;
50    case { 'int8_t', 'uint8_t' }
51        ztype_is_float = false ;
52        ztype_ignore_overflow = false ;
53        nbits = 8 ;
54        bits = '0xffL' ;
55        xbits = '0xFF' ;
56    case { 'int16_t', 'uint16_t' }
57        ztype_is_float = false ;
58        ztype_ignore_overflow = false ;
59        nbits = 16 ;
60        bits = '0xffffL' ;
61        xbits = '0xFFFF' ;
62    case { 'int32_t', 'uint32_t' }
63        ztype_is_float = false ;
64        ztype_ignore_overflow = false ;
65        nbits = 32 ;
66        bits = '0xffffffffL' ;
67        xbits = '0xFFFFFFFF' ;
68    case { 'int64_t', 'uint64_t' }
69        ztype_is_float = false ;
70        ztype_ignore_overflow = true ;
71        nbits = 64 ;
72        bits = '0' ;
73        xbits = '0xFFFFFFFFFFFFFFFFL' ;
74    case { 'float' }
75        ztype_is_float = true ;
76        ztype_ignore_overflow = true ;
77        nbits = 32 ;
78        bits = '0' ;
79    case { 'double', 'GxB_FC32_t' }
80        ztype_is_float = true ;
81        ztype_ignore_overflow = true ;
82        nbits = 64 ;
83        bits = '0' ;
84    case { 'GxB_FC64_t' }
85        ztype_is_float = true ;
86        ztype_ignore_overflow = true ;
87        nbits = 128 ;
88        bits = '0' ;
89    otherwise
90        error ('unknown type') ;
91end
92
93% bits: special cases for the PAIR multiplier
94fprintf (f, 'define(`GB_ctype_bits'', `%s'')\n', bits) ;
95
96% nbits: # of bits in the type, needed for the atomic compare-exchange:
97fprintf (f, 'define(`GB_atomic_compare_exchange'', `GB_ATOMIC_COMPARE_EXCHANGE_%d'')\n', nbits) ;
98
99if (is_pair)
100    % these semirings are renamed to any_pair, and not thus created
101    if (isequal (addop, 'land') || isequal (addop, 'eq'   ) || ...
102        isequal (addop, 'lor' ) || isequal (addop, 'max'  ) || ...
103        isequal (addop, 'min' ) || isequal (addop, 'times'))
104        return
105    end
106end
107
108[fname, unsigned, bits] = codegen_type (xytype) ;
109[zname, ~, ~] = codegen_type (ztype) ;
110
111name = sprintf ('%s_%s_%s', addop, multop, fname) ;
112
113% function names
114fprintf (f, 'define(`_Adot2B'', `_Adot2B__%s'')\n', name) ;
115fprintf (f, 'define(`_Adot3B'', `_Adot3B__%s'')\n', name) ;
116fprintf (f, 'define(`_Adot4B'', `_Adot4B__%s'')\n', name) ;
117fprintf (f, 'define(`_Asaxpy3B'', `_Asaxpy3B__%s'')\n', name) ;
118fprintf (f, 'define(`_Asaxpy3B_M'', `_Asaxpy3B_M__%s'')\n', name) ;
119fprintf (f, 'define(`_Asaxpy3B_noM'', `_Asaxpy3B_noM__%s'')\n', name) ;
120fprintf (f, 'define(`_Asaxpy3B_notM'', `_Asaxpy3B_notM__%s'')\n', name) ;
121fprintf (f, 'define(`_AsaxbitB'', `_AsaxbitB__%s'')\n', name) ;
122fprintf (f, 'define(`GB_AxB_defs'', `GB_AxB_defs__%s'')\n', name) ;
123fprintf (f, 'define(`GB_AxB_defs_include'', `#include "GB_AxB_defs__%s.h"'')\n', name) ;
124
125% type of C, A, and B
126fprintf (f, 'define(`GB_ctype'', `%s'')\n', ztype) ;
127fprintf (f, 'define(`GB_atype'', `%s'')\n', xytype) ;
128fprintf (f, 'define(`GB_btype'', `%s'')\n', xytype) ;
129
130% flag if ztype can ignore overflow in some computations
131fprintf (f, 'define(`GB_ctype_ignore_overflow'', `%d'')\n', ztype_ignore_overflow) ;
132
133% simple typecast from 1 (or 2) real scalars to any other type
134switch (ztype)
135    case { 'GxB_FC32_t' }
136        fprintf (f, 'define(`GB_ctype_cast'', `GxB_CMPLXF (((float) $1), ((float) $2))'')\n') ;
137    case { 'GxB_FC64_t' }
138        fprintf (f, 'define(`GB_ctype_cast'', `GxB_CMPLX (((double) $1), ((double) $2))'')\n') ;
139    otherwise
140        fprintf (f, 'define(`GB_ctype_cast'', `((GB_ctype) $1)'')\n') ;
141end
142
143% simple typecast from 1 (or 2) real scalars to any other type
144switch (xytype)
145    case { 'GxB_FC32_t' }
146        fprintf (f, 'define(`GB_atype_cast'', `GxB_CMPLXF (((float) $1), ((float) $2))'')\n') ;
147    case { 'GxB_FC64_t' }
148        fprintf (f, 'define(`GB_atype_cast'', `GxB_CMPLX (((double) $1), ((double) $2))'')\n') ;
149    otherwise
150        fprintf (f, 'define(`GB_atype_cast'', `((GB_atype) $1)'')\n') ;
151end
152
153% identity and terminal values for the monoid
154fprintf (f, 'define(`GB_identity'', `%s'')\n', identity) ;
155
156if (is_any_pair)
157    fprintf (f, 'define(`GB_is_any_pair_semiring'', `1'')\n') ;
158else
159    fprintf (f, 'define(`GB_is_any_pair_semiring'', `0'')\n') ;
160end
161
162if (is_plus_pair_real)
163    fprintf (f, 'define(`GB_is_plus_pair_real_semiring'', `1'')\n') ;
164else
165    fprintf (f, 'define(`GB_is_plus_pair_real_semiring'', `0'')\n') ;
166end
167
168if (is_pair)
169    fprintf (f, 'define(`GB_is_pair_multiplier'', `1'')\n') ;
170else
171    fprintf (f, 'define(`GB_is_pair_multiplier'', `0'')\n') ;
172end
173
174if (is_eq)
175    fprintf (f, 'define(`GB_is_eq_monoid'', `1'')\n') ;
176else
177    fprintf (f, 'define(`GB_is_eq_monoid'', `0'')\n') ;
178end
179
180% for the conventional semirings in MATLAB, which get extra optimization
181% if (isequal (addop, 'plus') && isequal (multop, 'times') && ztype_is_float)
182%     fprintf (f, 'define(`GB_is_performance_critical_semiring'', `1'')\n') ;
183% else
184%     fprintf (f, 'define(`GB_is_performance_critical_semiring'', `0'')\n') ;
185% end
186
187
188if (is_any)
189    % the ANY monoid terminates on the first entry seen
190    fprintf (f, 'define(`GB_is_any_monoid'', `1'')\n') ;
191    fprintf (f, 'define(`GB_terminal'', `break ;'')\n') ;
192    fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ;
193elseif (~isempty (terminal))
194    % terminal monoids terminate when cij equals the terminal value
195    fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ;
196    fprintf (f, 'define(`GB_terminal'', `if (cij == %s) { break ; }'')\n', ...
197        terminal) ;
198    fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ;
199else
200    % non-terminal monoids
201    fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ;
202    fprintf (f, 'define(`GB_terminal'', `;'')\n') ;
203    op = '' ;
204    if (ztype_is_real)
205        switch (addop)
206            case { 'plus' }
207                op = '+' ;
208            case { 'times' }
209                op = '*' ;
210            case { 'lor' }
211                op = '||' ;
212            case { 'land' }
213                op = '&&' ;
214            case { 'lxor' }
215                op = '^' ;
216            case { 'bor' }
217                op = '|' ;
218            case { 'band' }
219                op = '&' ;
220            case { 'bxor' }
221                op = '^' ;
222            otherwise
223                op = '' ;
224        end
225    end
226    if (isempty (op))
227        fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ;
228    else
229        pragma = sprintf ('GB_PRAGMA_SIMD_REDUCTION (%s,$1)', op) ;
230        fprintf (f, 'define(`GB_dot_simd_vectorize'', `%s'')\n', pragma) ;
231    end
232end
233
234if (ztype_is_real)
235    if (omp_atomic || is_any)
236        % on x86: all built-in real monoids are atomic.
237        % The ANY monoid is atomic on any architecture.
238        % MIN, MAX, EQ, XNOR are implemented with atomic compare/exchange.
239        fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ;
240    else
241        %% % no built-in OpenMP atomic pragma for this monoid.
242        %% % Do not use atomic compare/exchange unless on the x86.
243        %% fprintf (f, 'define(`GB_has_atomic'', `GB_X86_64'')\n') ;
244        fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ;
245    end
246else
247    % complex monoids are not atomic, except for 'plus'
248    if (isequal (addop, 'plus'))
249        fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ;
250    else
251        fprintf (f, 'define(`GB_has_atomic'', `0'')\n') ;
252    end
253end
254
255% firsti multiply operator
256if (contains (multop, 'firsti'))
257    fprintf (f, 'define(`GB_is_firsti_multiplier'', `1'')\n') ;
258else
259    fprintf (f, 'define(`GB_is_firsti_multiplier'', `0'')\n') ;
260end
261
262% firstj multiply operator
263if (contains (multop, 'firstj'))
264    fprintf (f, 'define(`GB_is_firstj_multiplier'', `1'')\n') ;
265else
266    fprintf (f, 'define(`GB_is_firstj_multiplier'', `0'')\n') ;
267end
268
269% secondj multiply operator
270if (contains (multop, 'secondj'))
271    fprintf (f, 'define(`GB_is_secondj_multiplier'', `1'')\n') ;
272else
273    fprintf (f, 'define(`GB_is_secondj_multiplier'', `0'')\n') ;
274end
275
276% plus_fc32 monoid:
277if (isequal (addop, 'plus') && isequal (ztype, 'GxB_FC32_t'))
278    fprintf (f, 'define(`GB_is_plus_fc32_monoid'', `1'')\n') ;
279else
280    fprintf (f, 'define(`GB_is_plus_fc32_monoid'', `0'')\n') ;
281end
282
283% plus_fc64 monoid:
284if (isequal (addop, 'plus') && isequal (ztype, 'GxB_FC64_t'))
285    fprintf (f, 'define(`GB_is_plus_fc64_monoid'', `1'')\n') ;
286else
287    fprintf (f, 'define(`GB_is_plus_fc64_monoid'', `0'')\n') ;
288end
289
290% any_fc32 monoid:
291if (isequal (addop, 'any') && isequal (ztype, 'GxB_FC32_t'))
292    fprintf (f, 'define(`GB_is_any_fc32_monoid'', `1'')\n') ;
293else
294    fprintf (f, 'define(`GB_is_any_fc32_monoid'', `0'')\n') ;
295end
296
297% any_fc64 monoid:
298if (isequal (addop, 'any') && isequal (ztype, 'GxB_FC64_t'))
299    fprintf (f, 'define(`GB_is_any_fc64_monoid'', `1'')\n') ;
300else
301    fprintf (f, 'define(`GB_is_any_fc64_monoid'', `0'')\n') ;
302end
303
304% min monoids:
305if (is_min)
306    if (contains (ztype, 'int'))
307        % min monoid for signed or unsigned integers
308        fprintf (f, 'define(`GB_is_imin_monoid'', `1'')\n') ;
309        fprintf (f, 'define(`GB_is_fmin_monoid'', `0'')\n') ;
310    else
311        % min monoid for float or double
312        fprintf (f, 'define(`GB_is_imin_monoid'', `0'')\n') ;
313        fprintf (f, 'define(`GB_is_fmin_monoid'', `1'')\n') ;
314    end
315else
316    % not a min monoid
317    fprintf (f, 'define(`GB_is_imin_monoid'', `0'')\n') ;
318    fprintf (f, 'define(`GB_is_fmin_monoid'', `0'')\n') ;
319end
320
321% max monoids:
322if (is_max)
323    if (contains (ztype, 'int'))
324        % max monoid for signed or unsigned integers
325        fprintf (f, 'define(`GB_is_imax_monoid'', `1'')\n') ;
326        fprintf (f, 'define(`GB_is_fmax_monoid'', `0'')\n') ;
327    else
328        % max monoid for float or double
329        fprintf (f, 'define(`GB_is_imax_monoid'', `0'')\n') ;
330        fprintf (f, 'define(`GB_is_fmax_monoid'', `1'')\n') ;
331    end
332else
333    % not a max monoid
334    fprintf (f, 'define(`GB_is_imax_monoid'', `0'')\n') ;
335    fprintf (f, 'define(`GB_is_fmax_monoid'', `0'')\n') ;
336end
337
338% only PLUS, TIMES, LOR, LAND, and LXOR can be done with OpenMP atomics
339% in gcc and icc.  However, only PLUS and TIMES work with OpenMP atomics
340% in Microsoft Visual Studio; the LOR, LAND, and LXOR atomics don't compile.
341fprintf (f, 'define(`GB_has_omp_atomic'', `%d'')\n', omp_atomic) ;
342fprintf (f, 'define(`GB_microsoft_has_omp_atomic'', `%d'')\n', omp_microsoft_atomic) ;
343
344% to get an entry from A
345if (is_second || is_pair || is_positional)
346    % value of A is ignored for the SECOND and PAIR operators
347    fprintf (f, 'define(`GB_a_is_pattern'', `1'')\n') ;
348    fprintf (f, 'define(`GB_geta'', `;'')\n') ;
349else
350    fprintf (f, 'define(`GB_a_is_pattern'', `0'')\n') ;
351    fprintf (f, 'define(`GB_geta'', `%s $1 = $2 [$3]'')\n', xytype) ;
352end
353
354% to get an entry from B
355if (is_first || is_pair || is_positional)
356    % value of B is ignored for the FIRST and PAIR operators
357    fprintf (f, 'define(`GB_b_is_pattern'', `1'')\n') ;
358    fprintf (f, 'define(`GB_getb'', `;'')\n') ;
359    fprintf (f, 'define(`GB_loadb'', `;'')\n') ;
360else
361    fprintf (f, 'define(`GB_b_is_pattern'', `0'')\n') ;
362    fprintf (f, 'define(`GB_getb'', `%s $1 = $2 [$3]'')\n', xytype) ;
363    fprintf (f, 'define(`GB_loadb'', `$1 [$2] = $3 [$4]'')\n', xytype) ;
364end
365
366% type-specific IDIV
367if (~isempty (strfind (mult, 'IDIV')))
368    if (unsigned)
369        mult = strrep (mult, 'IDIV', 'IDIV_UNSIGNED') ;
370    else
371        mult = strrep (mult, 'IDIV', 'IDIV_SIGNED') ;
372    end
373    mult = strrep (mult, ')', sprintf (', %d)', bits)) ;
374end
375
376% create the multiply operator (assignment)
377mult2 = strrep (mult,  'xarg', '`$2''') ;
378mult2 = strrep (mult2, 'yarg', '`$3''') ;
379fprintf (f, 'define(`GB_multiply'', `$1 = %s'')\n', mult2) ;
380
381% create the add update, of the form w += t
382if (is_min)
383    if (contains (ztype, 'int'))
384        % min monoid for signed or unsigned integers
385        add2 = 'if ($1 > $2) { $1 = $2 ; }' ;
386    else
387        % min monoid for float or double, with omitnan property
388        if (t_is_nonnan)
389            add2 = 'if (!islessequal ($1, $2)) { $1 = $2 ; }' ;
390        else
391            add2 = 'if (!isnan ($2) && !islessequal ($1, $2)) { $1 = $2 ; }' ;
392        end
393    end
394elseif (is_max)
395    if (contains (ztype, 'int'))
396        % max monoid for signed or unsigned integers
397        add2 = 'if ($1 < $2) { $1 = $2 ; }' ;
398    else
399        % max monoid for float or double, with omitnan property
400        if (t_is_nonnan)
401            add2 = 'if (!isgreaterequal ($1, $2)) { $1 = $2 ; }' ;
402        else
403            add2 = 'if (!isnan ($2) && !isgreaterequal ($1, $2)) { $1 = $2 ; }';
404        end
405    end
406else
407    % use the add function as given
408    add2 = strrep (add,  'w', '`$1''') ;
409    add2 = strrep (add2, 't', '`$2''') ;
410end
411fprintf (f, 'define(`GB_add_update'', `%s'')\n', add2) ;
412
413% create the add function, of the form w + t
414add2 = strrep (addfunc,  'w', '`$1''') ;
415add2 = strrep (add2,     't', '`$2''') ;
416fprintf (f, 'define(`GB_add_function'', `%s'')\n', add2) ;
417
418% create the multiply-add operator
419is_imin_or_imax = (isequal (addop, 'min') || isequal (addop, 'max')) && contains (ztype, 'int') ;
420if (~is_imin_or_imax && ...
421    (isequal (ztype, 'float') || isequal (ztype, 'double') || ...
422     isequal (ztype, 'bool') || is_first || is_second || is_pair || is_positional))
423    % float and double do not get promoted.
424    % bool is OK since promotion of the result (0 or 1) to int is safe.
425    % first and second are OK since no promotion occurs.
426    % is* operators are OK too.
427    multadd = strrep (add, 't',  mult) ;
428    multadd = strrep (multadd, 'w', '`$1''') ;
429    multadd = strrep (multadd, 'xarg', '`$2''') ;
430    multadd = strrep (multadd, 'yarg', '`$3''') ;
431    fprintf (f, 'define(`GB_multiply_add'', `%s'')\n', multadd) ;
432    need_mult_typecast = false ;
433else
434    % use explicit typecasting to avoid ANSI C integer promotion.
435    add2 = strrep (add,  'w', '`$1''') ;
436    add2 = strrep (add2, 't', 'x_op_y') ;
437    fprintf (f, 'define(`GB_multiply_add'', `%s x_op_y = %s ; %s'')\n', ...
438        ztype, mult2, add2) ;
439    need_mult_typecast = true ;
440end
441
442% create the bitmap multiply-add statement:
443% The bitmap_multadd (cb,cx,exists,ax,bx) macro computes does the following.
444% The value of cx has been initialized to the identity value of the monoid, so
445% cx += ax*bx can always be used (except for the ANY monoid).
446%
447%   if (exists)
448%       if (cb == 0)
449%           cx = ax * bx
450%           cb = 1
451%       else
452%           cx += ax * bx
453
454mult2 = strrep (mult,  'xarg', 'ax') ;
455mult2 = strrep (mult2, 'yarg', 'bx') ;
456xinit = ';' ;
457xload = ';' ;
458idbyte = '' ;
459if (need_mult_typecast)
460    % the result of the multiplier must be explicitly typcasted
461    mult2 = sprintf ('((%s) (%s))', ztype, mult2) ;
462else
463    mult2 = sprintf ('(%s)', mult2) ;
464end
465
466switch (addop)
467
468    % any monoid
469    case { 'any' }
470        if (isequal (multop, 'pair'))
471            s = ' ' ;
472        else
473            s = sprintf ('if (exists && !cb) { cx = %s ; }', mult2) ;
474        end
475
476    % boolean monoids (except eq / lxnor)
477    case { 'lor' }
478        % TODO: should this be: cx ||= exists && %s ?
479        s = sprintf ('cx |= exists & %s', mult2) ;
480        idbyte = '0' ;
481    case { 'land' }
482        % TODO: should this be: cx &&= !exists || %s ?
483        s = sprintf ('cx &= ~exists | %s', mult2) ;
484        idbyte = '1' ;
485    case { 'lxor' }
486        if (isequal (multop, 'pair'))
487            s = sprintf ('cx ^= exists') ;
488        else
489            % TODO: should this be: cx ^= exists && %s ?
490            s = sprintf ('cx ^= exists & %s', mult2) ;
491        end
492        idbyte = '0' ;
493
494    % min/max monoids:
495    case { 'min' }
496        if (contains (ztype, 'int'))
497            % min for signed or unsigned integers
498            if (t_is_simple)
499                s = sprintf ('if (exists && cx > %s) { cx = %s ; }', ...
500                    mult2, mult2) ;
501            else
502                s = sprintf (...
503                    '%s t = %s ; if (exists && cx > t) { cx = t ; }', ...
504                    ztype, mult2) ;
505            end
506        else
507            % min for float or double, with omitnan property
508            if (t_is_simple)
509                s = sprintf ( ...
510                'if (exists && !isnan (%s) && !islessequal (cx, %s)) { cx = %s ; }', ...
511                mult2, mult2, mult2) ;
512            elseif (t_is_nonnan)
513                s = sprintf ('%s t = %s ; if (exists && !islessequal (cx, t)) { cx = t ; } ', ...
514                ztype, mult2) ;
515            else
516                s = sprintf ('%s t = %s ; if (exists && !isnan (t) && !islessequal (cx, t)) { cx = t ; }', ...
517                ztype, mult2) ;
518            end
519        end
520        if (contains (ztype, 'uint'))
521            idbyte = '0xFF' ;
522        end
523    case { 'max' }
524        if (contains (ztype, 'int'))
525            % max for signed or unsigned integers
526            if (t_is_simple)
527                s = sprintf ('if (exists && cx < %s) { cx = %s ; }', ...
528                mult2, mult2) ;
529            else
530                s = sprintf ('%s t = %s ; if (exists && cx < t) { cx = t ; }', ...
531                ztype, mult2) ;
532            end
533        else
534            % max for float or double, with omitnan property
535            if (t_is_simple)
536                s = sprintf (...
537                'if (exists && !isnan (%s) && !isgreaterequal (cx, %s)) { cx = %s ; }', ...
538                mult2, mult2, mult2) ;
539            elseif (t_is_nonnan)
540                s = sprintf (...
541                '%s t = %s ; if (exists && !isgreaterequal (cx, t)) { cx = t ; }', ...
542                ztype, mult2) ;
543            else
544                s = sprintf (...
545                '%s t = %s ; if (exists && !isnan (t) && !isgreaterequal (cx, t)) { cx = t ; }', ...
546                ztype, mult2) ;
547            end
548        end
549        if (contains (ztype, 'uint'))
550            idbyte = '0' ;
551        end
552
553    % plus monoid: special cases for some multipliers
554    case { 'plus' }
555        idbyte = '0' ;
556        if (ztype_is_real)
557            if (isequal (multop, 'times'))
558                % X = {0,bx}
559                xinit = sprintf ('%s X [2] = {0,0}', ztype) ;
560                xload = 'X [1] = bx' ;
561                if (need_mult_typecast)
562                    s = sprintf ('cx += (%s) (ax * X [exists])', ztype) ;
563                else
564                    s = 'cx += ax * X [exists]' ;
565                end
566            elseif (isequal (multop, 'pair'))
567                s = 'cx += exists' ;
568            else
569                % X = {0,1}
570                xinit = sprintf ('%s X [2] = {0,1}', ztype) ;
571                if (need_mult_typecast)
572                    s = sprintf ('cx += (%s) (%s * X [exists])', ztype, mult2) ;
573                else
574                    s = sprintf ('cx += %s * X [exists]', mult2) ;
575                end
576            end
577        else
578            % plus monoids for complex types
579            s = '' ;
580        end
581
582    % bitwise monoids (except bxnor)
583    case { 'bor' }
584        % X = {all zeros, all ones}
585        xinit = sprintf ('%s X [2] = {0,%s}', ztype, xbits) ;
586        s = sprintf ('cx |= X [exists] & %s', mult2) ;
587        idbyte = '0' ;
588    case { 'band' }
589        % X = {all ones, all zeros}
590        xinit = sprintf ('%s X [2] = {%s,0}', ztype, xbits) ;
591        s = sprintf ('cx &= X [exists] | %s', mult2) ;
592        idbyte = '0xFF' ;
593    case { 'bxor' }
594        % X = {all zeros, all ones}
595        xinit = sprintf ('%s X [2] = {0,%s}', ztype, xbits) ;
596        s = sprintf ('cx ^= X [exists] & %s', mult2) ;
597        idbyte = '0' ;
598
599    % these monoids do not have a concise bitmap multiply-add
600    case { 'eq' }
601        s = '' ;
602        idbyte = '1' ;      % eq monoid: identity byte for memset
603    case { 'times' }
604        s = '' ;
605        idbyte = '' ;
606    case {'bxnor' }
607        s = '' ;
608        idbyte = '0xFF' ;   % bxnor monoid: identity byte for memset
609end
610
611if (isempty (idbyte))
612    fprintf (f, 'define(`GB_has_identity_byte'', `0'')\n') ;
613    fprintf (f, 'define(`GB_identity_byte'', `(none)'')\n') ;
614else
615    fprintf (f, 'define(`GB_has_identity_byte'', `1'')\n') ;
616    fprintf (f, 'define(`GB_identity_byte'', `%s'')\n', idbyte) ;
617end
618
619% disable the bitmap multadd when using div or rdiv and any floating-point
620% type, to avoid divide-by-zero when operating on entries not in the bitmap.
621if (contains (multop, 'div') && ztype_is_float)
622    s = '' ;
623end
624
625if (isempty (s))
626    fprintf (f, 'define(`GB_has_bitmap_multadd'', `0'')\n') ;
627    fprintf (f, 'define(`GB_bitmap_multadd'', `(none)'')\n') ;
628else
629    if (length (s) > 1)
630        s = [s ' ; cb |= exists'] ;
631    else
632        s = ['cb |= exists'] ;
633    end
634    s = strrep (s, 'cb', '$1') ;
635    s = strrep (s, 'cx', '$2') ;
636    s = strrep (s, 'exists', '$3') ;
637    s = strrep (s, 'ax', '$4') ;
638    s = strrep (s, 'bx', '$5') ;
639    fprintf (f, 'define(`GB_has_bitmap_multadd'', `1'')\n') ;
640    fprintf (f, 'define(`GB_bitmap_multadd'', `%s'')\n', s) ;
641end
642xload = strrep (xload, 'bx', '$1') ;
643fprintf (f, 'define(`GB_xload'', `%s'')\n', xload) ;
644fprintf (f, 'define(`GB_xinit'', `%s'')\n', xinit) ;
645% fprintf ('(%5s %-8s %10s): { %s } { %s } { %s }\n', addop, multop, ztype, xinit, xload, s) ;
646
647% create the disable flag
648disable  = sprintf ('GxB_NO_%s', upper (addop)) ;
649if (~isequal (addop, multop))
650    disable = [disable (sprintf (' || GxB_NO_%s', upper (multop)))] ;
651end
652disable = [disable (sprintf (' || GxB_NO_%s', upper (fname)))] ;
653disable = [disable (sprintf (' || GxB_NO_%s_%s', upper (addop), upper (zname)))] ;
654if (~ (isequal (addop, multop) && isequal (zname, fname)))
655    disable = [disable (sprintf (' || GxB_NO_%s_%s', upper (multop), upper (fname)))] ;
656end
657disable = [disable (sprintf (' || GxB_NO_%s_%s_%s', ...
658    upper (addop), upper (multop), upper (fname))) ] ;
659fprintf (f, 'define(`GB_disable'', `(%s)'')\n', disable) ;
660fclose (f) ;
661
662nprune = 57 ;
663
664% construct the *.c and *.h files for the semiring
665fmt = 'cat control.m4 Generator/%s.%s | m4 | tail -n +%d > Generated/%s__%s.%s' ;
666base = { 'GB_Adot2B', 'GB_Adot3B', 'GB_Adot4B', 'GB_AsaxbitB', ...
667    'GB_Asaxpy3B', 'GB_Asaxpy3B_noM', 'GB_Asaxpy3B_M', 'GB_Asaxpy3B_notM',  ...
668    'GB_AxB_defs' } ;
669suffix = { 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'h' } ;
670
671for k = 1:length (base)
672    cmd = sprintf (fmt, base {k}, suffix {k}, nprune, base {k}, name, suffix {k}) ;
673    system (cmd) ;
674end
675
676fprintf ('.') ;
677
678% append to the *.h file
679cmd = sprintf (...
680'cat control.m4 Generator/GB_AxB.h | m4 | tail -n +%d >> Generated/GB_AxB__include.h', nprune) ;
681system (cmd) ;
682
683delete ('control.m4') ;
684
685