1function codegen_axb_method (addop, multop, add, addfunc, mult, ztype, ... 2 xytype, identity, terminal, omp_atomic, omp_microsoft_atomic) 3%CODEGEN_AXB_METHOD create a function to compute C=A*B over a semiring 4% 5% codegen_axb_method (addop, multop, add, addfunc, mult, ztype, xytype, identity, terminal, omp_atomic, omp_microsoft_atomic) 6 7% SuiteSparse:GraphBLAS, Timothy A. Davis, (c) 2017-2021, All Rights Reserved. 8% SPDX-License-Identifier: Apache-2.0 9 10if (isempty (mult)) 11 return 12end 13 14f = fopen ('control.m4', 'w') ; 15 16is_first = false ; 17is_second = false ; 18is_pair = false ; 19is_positional = false ; 20switch (multop) 21 case { 'firsti', 'firsti1', 'firstj', 'firstj1', 'secondj', 'secondj1' } 22 is_positional = true ; 23 case { 'first' } 24 is_first = true ; 25 case { 'second' } 26 is_second = true ; 27 case { 'pair' } 28 is_pair = true ; 29end 30 31is_any = isequal (addop, 'any') ; 32is_max = isequal (addop, 'max') ; 33is_min = isequal (addop, 'min') ; 34is_eq = isequal (addop, 'eq') ; 35is_any_pair = is_any && isequal (multop, 'pair') ; 36ztype_is_real = ~contains (ztype, 'FC') ; 37is_any_complex = is_any && ~ztype_is_real ; 38is_plus_pair_real = isequal (addop, 'plus') && isequal (multop, 'pair') ... 39 && ztype_is_real ; 40 41t_is_simple = isequal (multop, 'pair') || contains (multop, 'first') || contains (multop, 'second') ; 42t_is_nonnan = isequal (multop (1:2), 'is') || (multop (1) == 'l') ; 43 44switch (ztype) 45 case { 'bool' } 46 ztype_is_float = false ; 47 ztype_ignore_overflow = false ; 48 nbits = 8 ; 49 bits = '0x1L' ; 50 case { 'int8_t', 'uint8_t' } 51 ztype_is_float = false ; 52 ztype_ignore_overflow = false ; 53 nbits = 8 ; 54 bits = '0xffL' ; 55 xbits = '0xFF' ; 56 case { 'int16_t', 'uint16_t' } 57 ztype_is_float = false ; 58 ztype_ignore_overflow = false ; 59 nbits = 16 ; 60 bits = '0xffffL' ; 61 xbits = '0xFFFF' ; 62 case { 'int32_t', 'uint32_t' } 63 ztype_is_float = false ; 64 ztype_ignore_overflow = false ; 65 nbits = 32 ; 66 bits = '0xffffffffL' ; 67 xbits = '0xFFFFFFFF' ; 68 case { 'int64_t', 'uint64_t' } 69 ztype_is_float = false ; 70 ztype_ignore_overflow = true ; 71 nbits = 64 ; 72 bits = '0' ; 73 xbits = '0xFFFFFFFFFFFFFFFFL' ; 74 case { 'float' } 75 ztype_is_float = true ; 76 ztype_ignore_overflow = true ; 77 nbits = 32 ; 78 bits = '0' ; 79 case { 'double', 'GxB_FC32_t' } 80 ztype_is_float = true ; 81 ztype_ignore_overflow = true ; 82 nbits = 64 ; 83 bits = '0' ; 84 case { 'GxB_FC64_t' } 85 ztype_is_float = true ; 86 ztype_ignore_overflow = true ; 87 nbits = 128 ; 88 bits = '0' ; 89 otherwise 90 error ('unknown type') ; 91end 92 93% bits: special cases for the PAIR multiplier 94fprintf (f, 'define(`GB_ctype_bits'', `%s'')\n', bits) ; 95 96% nbits: # of bits in the type, needed for the atomic compare-exchange: 97fprintf (f, 'define(`GB_atomic_compare_exchange'', `GB_ATOMIC_COMPARE_EXCHANGE_%d'')\n', nbits) ; 98 99if (is_pair) 100 % these semirings are renamed to any_pair, and not thus created 101 if (isequal (addop, 'land') || isequal (addop, 'eq' ) || ... 102 isequal (addop, 'lor' ) || isequal (addop, 'max' ) || ... 103 isequal (addop, 'min' ) || isequal (addop, 'times')) 104 return 105 end 106end 107 108[fname, unsigned, bits] = codegen_type (xytype) ; 109[zname, ~, ~] = codegen_type (ztype) ; 110 111name = sprintf ('%s_%s_%s', addop, multop, fname) ; 112 113% function names 114fprintf (f, 'define(`_Adot2B'', `_Adot2B__%s'')\n', name) ; 115fprintf (f, 'define(`_Adot3B'', `_Adot3B__%s'')\n', name) ; 116fprintf (f, 'define(`_Adot4B'', `_Adot4B__%s'')\n', name) ; 117fprintf (f, 'define(`_Asaxpy3B'', `_Asaxpy3B__%s'')\n', name) ; 118fprintf (f, 'define(`_Asaxpy3B_M'', `_Asaxpy3B_M__%s'')\n', name) ; 119fprintf (f, 'define(`_Asaxpy3B_noM'', `_Asaxpy3B_noM__%s'')\n', name) ; 120fprintf (f, 'define(`_Asaxpy3B_notM'', `_Asaxpy3B_notM__%s'')\n', name) ; 121fprintf (f, 'define(`_AsaxbitB'', `_AsaxbitB__%s'')\n', name) ; 122fprintf (f, 'define(`GB_AxB_defs'', `GB_AxB_defs__%s'')\n', name) ; 123fprintf (f, 'define(`GB_AxB_defs_include'', `#include "GB_AxB_defs__%s.h"'')\n', name) ; 124 125% type of C, A, and B 126fprintf (f, 'define(`GB_ctype'', `%s'')\n', ztype) ; 127fprintf (f, 'define(`GB_atype'', `%s'')\n', xytype) ; 128fprintf (f, 'define(`GB_btype'', `%s'')\n', xytype) ; 129 130% flag if ztype can ignore overflow in some computations 131fprintf (f, 'define(`GB_ctype_ignore_overflow'', `%d'')\n', ztype_ignore_overflow) ; 132 133% simple typecast from 1 (or 2) real scalars to any other type 134switch (ztype) 135 case { 'GxB_FC32_t' } 136 fprintf (f, 'define(`GB_ctype_cast'', `GxB_CMPLXF (((float) $1), ((float) $2))'')\n') ; 137 case { 'GxB_FC64_t' } 138 fprintf (f, 'define(`GB_ctype_cast'', `GxB_CMPLX (((double) $1), ((double) $2))'')\n') ; 139 otherwise 140 fprintf (f, 'define(`GB_ctype_cast'', `((GB_ctype) $1)'')\n') ; 141end 142 143% simple typecast from 1 (or 2) real scalars to any other type 144switch (xytype) 145 case { 'GxB_FC32_t' } 146 fprintf (f, 'define(`GB_atype_cast'', `GxB_CMPLXF (((float) $1), ((float) $2))'')\n') ; 147 case { 'GxB_FC64_t' } 148 fprintf (f, 'define(`GB_atype_cast'', `GxB_CMPLX (((double) $1), ((double) $2))'')\n') ; 149 otherwise 150 fprintf (f, 'define(`GB_atype_cast'', `((GB_atype) $1)'')\n') ; 151end 152 153% identity and terminal values for the monoid 154fprintf (f, 'define(`GB_identity'', `%s'')\n', identity) ; 155 156if (is_any_pair) 157 fprintf (f, 'define(`GB_is_any_pair_semiring'', `1'')\n') ; 158else 159 fprintf (f, 'define(`GB_is_any_pair_semiring'', `0'')\n') ; 160end 161 162if (is_plus_pair_real) 163 fprintf (f, 'define(`GB_is_plus_pair_real_semiring'', `1'')\n') ; 164else 165 fprintf (f, 'define(`GB_is_plus_pair_real_semiring'', `0'')\n') ; 166end 167 168if (is_pair) 169 fprintf (f, 'define(`GB_is_pair_multiplier'', `1'')\n') ; 170else 171 fprintf (f, 'define(`GB_is_pair_multiplier'', `0'')\n') ; 172end 173 174if (is_eq) 175 fprintf (f, 'define(`GB_is_eq_monoid'', `1'')\n') ; 176else 177 fprintf (f, 'define(`GB_is_eq_monoid'', `0'')\n') ; 178end 179 180% for the conventional semirings in MATLAB, which get extra optimization 181% if (isequal (addop, 'plus') && isequal (multop, 'times') && ztype_is_float) 182% fprintf (f, 'define(`GB_is_performance_critical_semiring'', `1'')\n') ; 183% else 184% fprintf (f, 'define(`GB_is_performance_critical_semiring'', `0'')\n') ; 185% end 186 187 188if (is_any) 189 % the ANY monoid terminates on the first entry seen 190 fprintf (f, 'define(`GB_is_any_monoid'', `1'')\n') ; 191 fprintf (f, 'define(`GB_terminal'', `break ;'')\n') ; 192 fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ; 193elseif (~isempty (terminal)) 194 % terminal monoids terminate when cij equals the terminal value 195 fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ; 196 fprintf (f, 'define(`GB_terminal'', `if (cij == %s) { break ; }'')\n', ... 197 terminal) ; 198 fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ; 199else 200 % non-terminal monoids 201 fprintf (f, 'define(`GB_is_any_monoid'', `0'')\n') ; 202 fprintf (f, 'define(`GB_terminal'', `;'')\n') ; 203 op = '' ; 204 if (ztype_is_real) 205 switch (addop) 206 case { 'plus' } 207 op = '+' ; 208 case { 'times' } 209 op = '*' ; 210 case { 'lor' } 211 op = '||' ; 212 case { 'land' } 213 op = '&&' ; 214 case { 'lxor' } 215 op = '^' ; 216 case { 'bor' } 217 op = '|' ; 218 case { 'band' } 219 op = '&' ; 220 case { 'bxor' } 221 op = '^' ; 222 otherwise 223 op = '' ; 224 end 225 end 226 if (isempty (op)) 227 fprintf (f, 'define(`GB_dot_simd_vectorize'', `;'')\n') ; 228 else 229 pragma = sprintf ('GB_PRAGMA_SIMD_REDUCTION (%s,$1)', op) ; 230 fprintf (f, 'define(`GB_dot_simd_vectorize'', `%s'')\n', pragma) ; 231 end 232end 233 234if (ztype_is_real) 235 if (omp_atomic || is_any) 236 % on x86: all built-in real monoids are atomic. 237 % The ANY monoid is atomic on any architecture. 238 % MIN, MAX, EQ, XNOR are implemented with atomic compare/exchange. 239 fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ; 240 else 241 %% % no built-in OpenMP atomic pragma for this monoid. 242 %% % Do not use atomic compare/exchange unless on the x86. 243 %% fprintf (f, 'define(`GB_has_atomic'', `GB_X86_64'')\n') ; 244 fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ; 245 end 246else 247 % complex monoids are not atomic, except for 'plus' 248 if (isequal (addop, 'plus')) 249 fprintf (f, 'define(`GB_has_atomic'', `1'')\n') ; 250 else 251 fprintf (f, 'define(`GB_has_atomic'', `0'')\n') ; 252 end 253end 254 255% firsti multiply operator 256if (contains (multop, 'firsti')) 257 fprintf (f, 'define(`GB_is_firsti_multiplier'', `1'')\n') ; 258else 259 fprintf (f, 'define(`GB_is_firsti_multiplier'', `0'')\n') ; 260end 261 262% firstj multiply operator 263if (contains (multop, 'firstj')) 264 fprintf (f, 'define(`GB_is_firstj_multiplier'', `1'')\n') ; 265else 266 fprintf (f, 'define(`GB_is_firstj_multiplier'', `0'')\n') ; 267end 268 269% secondj multiply operator 270if (contains (multop, 'secondj')) 271 fprintf (f, 'define(`GB_is_secondj_multiplier'', `1'')\n') ; 272else 273 fprintf (f, 'define(`GB_is_secondj_multiplier'', `0'')\n') ; 274end 275 276% plus_fc32 monoid: 277if (isequal (addop, 'plus') && isequal (ztype, 'GxB_FC32_t')) 278 fprintf (f, 'define(`GB_is_plus_fc32_monoid'', `1'')\n') ; 279else 280 fprintf (f, 'define(`GB_is_plus_fc32_monoid'', `0'')\n') ; 281end 282 283% plus_fc64 monoid: 284if (isequal (addop, 'plus') && isequal (ztype, 'GxB_FC64_t')) 285 fprintf (f, 'define(`GB_is_plus_fc64_monoid'', `1'')\n') ; 286else 287 fprintf (f, 'define(`GB_is_plus_fc64_monoid'', `0'')\n') ; 288end 289 290% any_fc32 monoid: 291if (isequal (addop, 'any') && isequal (ztype, 'GxB_FC32_t')) 292 fprintf (f, 'define(`GB_is_any_fc32_monoid'', `1'')\n') ; 293else 294 fprintf (f, 'define(`GB_is_any_fc32_monoid'', `0'')\n') ; 295end 296 297% any_fc64 monoid: 298if (isequal (addop, 'any') && isequal (ztype, 'GxB_FC64_t')) 299 fprintf (f, 'define(`GB_is_any_fc64_monoid'', `1'')\n') ; 300else 301 fprintf (f, 'define(`GB_is_any_fc64_monoid'', `0'')\n') ; 302end 303 304% min monoids: 305if (is_min) 306 if (contains (ztype, 'int')) 307 % min monoid for signed or unsigned integers 308 fprintf (f, 'define(`GB_is_imin_monoid'', `1'')\n') ; 309 fprintf (f, 'define(`GB_is_fmin_monoid'', `0'')\n') ; 310 else 311 % min monoid for float or double 312 fprintf (f, 'define(`GB_is_imin_monoid'', `0'')\n') ; 313 fprintf (f, 'define(`GB_is_fmin_monoid'', `1'')\n') ; 314 end 315else 316 % not a min monoid 317 fprintf (f, 'define(`GB_is_imin_monoid'', `0'')\n') ; 318 fprintf (f, 'define(`GB_is_fmin_monoid'', `0'')\n') ; 319end 320 321% max monoids: 322if (is_max) 323 if (contains (ztype, 'int')) 324 % max monoid for signed or unsigned integers 325 fprintf (f, 'define(`GB_is_imax_monoid'', `1'')\n') ; 326 fprintf (f, 'define(`GB_is_fmax_monoid'', `0'')\n') ; 327 else 328 % max monoid for float or double 329 fprintf (f, 'define(`GB_is_imax_monoid'', `0'')\n') ; 330 fprintf (f, 'define(`GB_is_fmax_monoid'', `1'')\n') ; 331 end 332else 333 % not a max monoid 334 fprintf (f, 'define(`GB_is_imax_monoid'', `0'')\n') ; 335 fprintf (f, 'define(`GB_is_fmax_monoid'', `0'')\n') ; 336end 337 338% only PLUS, TIMES, LOR, LAND, and LXOR can be done with OpenMP atomics 339% in gcc and icc. However, only PLUS and TIMES work with OpenMP atomics 340% in Microsoft Visual Studio; the LOR, LAND, and LXOR atomics don't compile. 341fprintf (f, 'define(`GB_has_omp_atomic'', `%d'')\n', omp_atomic) ; 342fprintf (f, 'define(`GB_microsoft_has_omp_atomic'', `%d'')\n', omp_microsoft_atomic) ; 343 344% to get an entry from A 345if (is_second || is_pair || is_positional) 346 % value of A is ignored for the SECOND and PAIR operators 347 fprintf (f, 'define(`GB_a_is_pattern'', `1'')\n') ; 348 fprintf (f, 'define(`GB_geta'', `;'')\n') ; 349else 350 fprintf (f, 'define(`GB_a_is_pattern'', `0'')\n') ; 351 fprintf (f, 'define(`GB_geta'', `%s $1 = $2 [$3]'')\n', xytype) ; 352end 353 354% to get an entry from B 355if (is_first || is_pair || is_positional) 356 % value of B is ignored for the FIRST and PAIR operators 357 fprintf (f, 'define(`GB_b_is_pattern'', `1'')\n') ; 358 fprintf (f, 'define(`GB_getb'', `;'')\n') ; 359 fprintf (f, 'define(`GB_loadb'', `;'')\n') ; 360else 361 fprintf (f, 'define(`GB_b_is_pattern'', `0'')\n') ; 362 fprintf (f, 'define(`GB_getb'', `%s $1 = $2 [$3]'')\n', xytype) ; 363 fprintf (f, 'define(`GB_loadb'', `$1 [$2] = $3 [$4]'')\n', xytype) ; 364end 365 366% type-specific IDIV 367if (~isempty (strfind (mult, 'IDIV'))) 368 if (unsigned) 369 mult = strrep (mult, 'IDIV', 'IDIV_UNSIGNED') ; 370 else 371 mult = strrep (mult, 'IDIV', 'IDIV_SIGNED') ; 372 end 373 mult = strrep (mult, ')', sprintf (', %d)', bits)) ; 374end 375 376% create the multiply operator (assignment) 377mult2 = strrep (mult, 'xarg', '`$2''') ; 378mult2 = strrep (mult2, 'yarg', '`$3''') ; 379fprintf (f, 'define(`GB_multiply'', `$1 = %s'')\n', mult2) ; 380 381% create the add update, of the form w += t 382if (is_min) 383 if (contains (ztype, 'int')) 384 % min monoid for signed or unsigned integers 385 add2 = 'if ($1 > $2) { $1 = $2 ; }' ; 386 else 387 % min monoid for float or double, with omitnan property 388 if (t_is_nonnan) 389 add2 = 'if (!islessequal ($1, $2)) { $1 = $2 ; }' ; 390 else 391 add2 = 'if (!isnan ($2) && !islessequal ($1, $2)) { $1 = $2 ; }' ; 392 end 393 end 394elseif (is_max) 395 if (contains (ztype, 'int')) 396 % max monoid for signed or unsigned integers 397 add2 = 'if ($1 < $2) { $1 = $2 ; }' ; 398 else 399 % max monoid for float or double, with omitnan property 400 if (t_is_nonnan) 401 add2 = 'if (!isgreaterequal ($1, $2)) { $1 = $2 ; }' ; 402 else 403 add2 = 'if (!isnan ($2) && !isgreaterequal ($1, $2)) { $1 = $2 ; }'; 404 end 405 end 406else 407 % use the add function as given 408 add2 = strrep (add, 'w', '`$1''') ; 409 add2 = strrep (add2, 't', '`$2''') ; 410end 411fprintf (f, 'define(`GB_add_update'', `%s'')\n', add2) ; 412 413% create the add function, of the form w + t 414add2 = strrep (addfunc, 'w', '`$1''') ; 415add2 = strrep (add2, 't', '`$2''') ; 416fprintf (f, 'define(`GB_add_function'', `%s'')\n', add2) ; 417 418% create the multiply-add operator 419is_imin_or_imax = (isequal (addop, 'min') || isequal (addop, 'max')) && contains (ztype, 'int') ; 420if (~is_imin_or_imax && ... 421 (isequal (ztype, 'float') || isequal (ztype, 'double') || ... 422 isequal (ztype, 'bool') || is_first || is_second || is_pair || is_positional)) 423 % float and double do not get promoted. 424 % bool is OK since promotion of the result (0 or 1) to int is safe. 425 % first and second are OK since no promotion occurs. 426 % is* operators are OK too. 427 multadd = strrep (add, 't', mult) ; 428 multadd = strrep (multadd, 'w', '`$1''') ; 429 multadd = strrep (multadd, 'xarg', '`$2''') ; 430 multadd = strrep (multadd, 'yarg', '`$3''') ; 431 fprintf (f, 'define(`GB_multiply_add'', `%s'')\n', multadd) ; 432 need_mult_typecast = false ; 433else 434 % use explicit typecasting to avoid ANSI C integer promotion. 435 add2 = strrep (add, 'w', '`$1''') ; 436 add2 = strrep (add2, 't', 'x_op_y') ; 437 fprintf (f, 'define(`GB_multiply_add'', `%s x_op_y = %s ; %s'')\n', ... 438 ztype, mult2, add2) ; 439 need_mult_typecast = true ; 440end 441 442% create the bitmap multiply-add statement: 443% The bitmap_multadd (cb,cx,exists,ax,bx) macro computes does the following. 444% The value of cx has been initialized to the identity value of the monoid, so 445% cx += ax*bx can always be used (except for the ANY monoid). 446% 447% if (exists) 448% if (cb == 0) 449% cx = ax * bx 450% cb = 1 451% else 452% cx += ax * bx 453 454mult2 = strrep (mult, 'xarg', 'ax') ; 455mult2 = strrep (mult2, 'yarg', 'bx') ; 456xinit = ';' ; 457xload = ';' ; 458idbyte = '' ; 459if (need_mult_typecast) 460 % the result of the multiplier must be explicitly typcasted 461 mult2 = sprintf ('((%s) (%s))', ztype, mult2) ; 462else 463 mult2 = sprintf ('(%s)', mult2) ; 464end 465 466switch (addop) 467 468 % any monoid 469 case { 'any' } 470 if (isequal (multop, 'pair')) 471 s = ' ' ; 472 else 473 s = sprintf ('if (exists && !cb) { cx = %s ; }', mult2) ; 474 end 475 476 % boolean monoids (except eq / lxnor) 477 case { 'lor' } 478 % TODO: should this be: cx ||= exists && %s ? 479 s = sprintf ('cx |= exists & %s', mult2) ; 480 idbyte = '0' ; 481 case { 'land' } 482 % TODO: should this be: cx &&= !exists || %s ? 483 s = sprintf ('cx &= ~exists | %s', mult2) ; 484 idbyte = '1' ; 485 case { 'lxor' } 486 if (isequal (multop, 'pair')) 487 s = sprintf ('cx ^= exists') ; 488 else 489 % TODO: should this be: cx ^= exists && %s ? 490 s = sprintf ('cx ^= exists & %s', mult2) ; 491 end 492 idbyte = '0' ; 493 494 % min/max monoids: 495 case { 'min' } 496 if (contains (ztype, 'int')) 497 % min for signed or unsigned integers 498 if (t_is_simple) 499 s = sprintf ('if (exists && cx > %s) { cx = %s ; }', ... 500 mult2, mult2) ; 501 else 502 s = sprintf (... 503 '%s t = %s ; if (exists && cx > t) { cx = t ; }', ... 504 ztype, mult2) ; 505 end 506 else 507 % min for float or double, with omitnan property 508 if (t_is_simple) 509 s = sprintf ( ... 510 'if (exists && !isnan (%s) && !islessequal (cx, %s)) { cx = %s ; }', ... 511 mult2, mult2, mult2) ; 512 elseif (t_is_nonnan) 513 s = sprintf ('%s t = %s ; if (exists && !islessequal (cx, t)) { cx = t ; } ', ... 514 ztype, mult2) ; 515 else 516 s = sprintf ('%s t = %s ; if (exists && !isnan (t) && !islessequal (cx, t)) { cx = t ; }', ... 517 ztype, mult2) ; 518 end 519 end 520 if (contains (ztype, 'uint')) 521 idbyte = '0xFF' ; 522 end 523 case { 'max' } 524 if (contains (ztype, 'int')) 525 % max for signed or unsigned integers 526 if (t_is_simple) 527 s = sprintf ('if (exists && cx < %s) { cx = %s ; }', ... 528 mult2, mult2) ; 529 else 530 s = sprintf ('%s t = %s ; if (exists && cx < t) { cx = t ; }', ... 531 ztype, mult2) ; 532 end 533 else 534 % max for float or double, with omitnan property 535 if (t_is_simple) 536 s = sprintf (... 537 'if (exists && !isnan (%s) && !isgreaterequal (cx, %s)) { cx = %s ; }', ... 538 mult2, mult2, mult2) ; 539 elseif (t_is_nonnan) 540 s = sprintf (... 541 '%s t = %s ; if (exists && !isgreaterequal (cx, t)) { cx = t ; }', ... 542 ztype, mult2) ; 543 else 544 s = sprintf (... 545 '%s t = %s ; if (exists && !isnan (t) && !isgreaterequal (cx, t)) { cx = t ; }', ... 546 ztype, mult2) ; 547 end 548 end 549 if (contains (ztype, 'uint')) 550 idbyte = '0' ; 551 end 552 553 % plus monoid: special cases for some multipliers 554 case { 'plus' } 555 idbyte = '0' ; 556 if (ztype_is_real) 557 if (isequal (multop, 'times')) 558 % X = {0,bx} 559 xinit = sprintf ('%s X [2] = {0,0}', ztype) ; 560 xload = 'X [1] = bx' ; 561 if (need_mult_typecast) 562 s = sprintf ('cx += (%s) (ax * X [exists])', ztype) ; 563 else 564 s = 'cx += ax * X [exists]' ; 565 end 566 elseif (isequal (multop, 'pair')) 567 s = 'cx += exists' ; 568 else 569 % X = {0,1} 570 xinit = sprintf ('%s X [2] = {0,1}', ztype) ; 571 if (need_mult_typecast) 572 s = sprintf ('cx += (%s) (%s * X [exists])', ztype, mult2) ; 573 else 574 s = sprintf ('cx += %s * X [exists]', mult2) ; 575 end 576 end 577 else 578 % plus monoids for complex types 579 s = '' ; 580 end 581 582 % bitwise monoids (except bxnor) 583 case { 'bor' } 584 % X = {all zeros, all ones} 585 xinit = sprintf ('%s X [2] = {0,%s}', ztype, xbits) ; 586 s = sprintf ('cx |= X [exists] & %s', mult2) ; 587 idbyte = '0' ; 588 case { 'band' } 589 % X = {all ones, all zeros} 590 xinit = sprintf ('%s X [2] = {%s,0}', ztype, xbits) ; 591 s = sprintf ('cx &= X [exists] | %s', mult2) ; 592 idbyte = '0xFF' ; 593 case { 'bxor' } 594 % X = {all zeros, all ones} 595 xinit = sprintf ('%s X [2] = {0,%s}', ztype, xbits) ; 596 s = sprintf ('cx ^= X [exists] & %s', mult2) ; 597 idbyte = '0' ; 598 599 % these monoids do not have a concise bitmap multiply-add 600 case { 'eq' } 601 s = '' ; 602 idbyte = '1' ; % eq monoid: identity byte for memset 603 case { 'times' } 604 s = '' ; 605 idbyte = '' ; 606 case {'bxnor' } 607 s = '' ; 608 idbyte = '0xFF' ; % bxnor monoid: identity byte for memset 609end 610 611if (isempty (idbyte)) 612 fprintf (f, 'define(`GB_has_identity_byte'', `0'')\n') ; 613 fprintf (f, 'define(`GB_identity_byte'', `(none)'')\n') ; 614else 615 fprintf (f, 'define(`GB_has_identity_byte'', `1'')\n') ; 616 fprintf (f, 'define(`GB_identity_byte'', `%s'')\n', idbyte) ; 617end 618 619% disable the bitmap multadd when using div or rdiv and any floating-point 620% type, to avoid divide-by-zero when operating on entries not in the bitmap. 621if (contains (multop, 'div') && ztype_is_float) 622 s = '' ; 623end 624 625if (isempty (s)) 626 fprintf (f, 'define(`GB_has_bitmap_multadd'', `0'')\n') ; 627 fprintf (f, 'define(`GB_bitmap_multadd'', `(none)'')\n') ; 628else 629 if (length (s) > 1) 630 s = [s ' ; cb |= exists'] ; 631 else 632 s = ['cb |= exists'] ; 633 end 634 s = strrep (s, 'cb', '$1') ; 635 s = strrep (s, 'cx', '$2') ; 636 s = strrep (s, 'exists', '$3') ; 637 s = strrep (s, 'ax', '$4') ; 638 s = strrep (s, 'bx', '$5') ; 639 fprintf (f, 'define(`GB_has_bitmap_multadd'', `1'')\n') ; 640 fprintf (f, 'define(`GB_bitmap_multadd'', `%s'')\n', s) ; 641end 642xload = strrep (xload, 'bx', '$1') ; 643fprintf (f, 'define(`GB_xload'', `%s'')\n', xload) ; 644fprintf (f, 'define(`GB_xinit'', `%s'')\n', xinit) ; 645% fprintf ('(%5s %-8s %10s): { %s } { %s } { %s }\n', addop, multop, ztype, xinit, xload, s) ; 646 647% create the disable flag 648disable = sprintf ('GxB_NO_%s', upper (addop)) ; 649if (~isequal (addop, multop)) 650 disable = [disable (sprintf (' || GxB_NO_%s', upper (multop)))] ; 651end 652disable = [disable (sprintf (' || GxB_NO_%s', upper (fname)))] ; 653disable = [disable (sprintf (' || GxB_NO_%s_%s', upper (addop), upper (zname)))] ; 654if (~ (isequal (addop, multop) && isequal (zname, fname))) 655 disable = [disable (sprintf (' || GxB_NO_%s_%s', upper (multop), upper (fname)))] ; 656end 657disable = [disable (sprintf (' || GxB_NO_%s_%s_%s', ... 658 upper (addop), upper (multop), upper (fname))) ] ; 659fprintf (f, 'define(`GB_disable'', `(%s)'')\n', disable) ; 660fclose (f) ; 661 662nprune = 57 ; 663 664% construct the *.c and *.h files for the semiring 665fmt = 'cat control.m4 Generator/%s.%s | m4 | tail -n +%d > Generated/%s__%s.%s' ; 666base = { 'GB_Adot2B', 'GB_Adot3B', 'GB_Adot4B', 'GB_AsaxbitB', ... 667 'GB_Asaxpy3B', 'GB_Asaxpy3B_noM', 'GB_Asaxpy3B_M', 'GB_Asaxpy3B_notM', ... 668 'GB_AxB_defs' } ; 669suffix = { 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'h' } ; 670 671for k = 1:length (base) 672 cmd = sprintf (fmt, base {k}, suffix {k}, nprune, base {k}, name, suffix {k}) ; 673 system (cmd) ; 674end 675 676fprintf ('.') ; 677 678% append to the *.h file 679cmd = sprintf (... 680'cat control.m4 Generator/GB_AxB.h | m4 | tail -n +%d >> Generated/GB_AxB__include.h', nprune) ; 681system (cmd) ; 682 683delete ('control.m4') ; 684 685