1/*========================== begin_copyright_notice ============================ 2 3Copyright (C) 2020-2021 Intel Corporation 4 5SPDX-License-Identifier: MIT 6 7============================= end_copyright_notice ===========================*/ 8 9#include "../imf.h" 10#pragma OPENCL FP_CONTRACT OFF 11typedef struct 12{ 13 unsigned int Log_tbl_H[32]; 14 unsigned int Log_tbl_L[32]; 15 unsigned int Exp_tbl_H[16]; 16 unsigned int C15; 17 unsigned int One; 18 unsigned int poly_coeff5; 19 unsigned int poly_coeff4; 20 unsigned int poly_coeff3; 21 unsigned int poly_coeff2; 22 unsigned int poly_coeff1; 23 unsigned int poly_c1h; 24 unsigned int Shifter; 25 unsigned int EMask; 26 unsigned int poly_e_coeff3; 27 unsigned int poly_e_coeff2; 28 unsigned int poly_e_coeff1; 29 30 unsigned int Zero; 31 unsigned int LIndexMask; 32 unsigned int EIndexMask; 33 unsigned int dAbsMask; 34 unsigned int dDomainRange; 35 unsigned int ExpMask; 36} __internal_spowr_la_data_avx512_t; 37static __constant __internal_spowr_la_data_avx512_t __internal_spowr_la_data_avx512 = { 38 { 39 0x00000000u, 0xbd35d000u, 0xbdb32000u, 0xbe046400u, 0xbe2e0000u, 0xbe567c00u, 0xbe7de000u, 0xbe922000u, 0xbea4d400u, 0xbeb71200u, 0xbec8de00u, 40 0xbeda4000u, 0xbeeb3a00u, 0xbefbd400u, 0xbf060800u, 0xbf0dfa00u, 0x3ed48000u, 0x3ec54400u, 0x3eb65800u, 0x3ea7b800u, 0x3e996000u, 0x3e8b4e00u, 41 0x3e7b0000u, 0x3e5fe400u, 0x3e454400u, 0x3e2b2000u, 0x3e116c00u, 0x3df05800u, 0x3dbeb000u, 0x3d8dd800u, 0x3d3ba000u, 0x3cba2000u} 42 , { 43 0x00000000u, 0xb6d3758fu, 0x3510536fu, 0x369dcc96u, 0xb651cfdfu, 0x3687492cu, 0xb635c813u, 0xb5f561c1u, 0x35f6865du, 0x36f19318u, 0x35aedc1du, 44 0x36a0463cu, 0xb69f0197u, 0xb5ad1961u, 0xb6201ac7u, 0x36ee16a3u, 0xb5d1cfdfu, 0x36c055feu, 0x3676865du, 0xb589a627u, 0xb48e4789u, 0x33a6c7e3u, 45 0xb69d5be7u, 0xb642c000u, 0x364055feu, 0xb68f5801u, 0x36b70aadu, 0x35d74798u, 0x3492d9f7u, 0x364a9801u, 0xb6566c4du, 0xb48bcf06u} 46 , { 47 0x3f800000u, 0x3f85aac3u, 0x3f8b95c2u, 0x3f91c3d3u, 0x3f9837f0u, 0x3f9ef532u, 0x3fa5fed7u, 0x3fad583fu, 0x3fb504f3u, 0x3fbd08a4u, 0x3fc5672au, 48 0x3fce248cu, 0x3fd744fdu, 0x3fe0ccdfu, 0x3feac0c7u, 0x3ff5257du} 49 50 , 0x3fc00000u, 0x3f800000u, 0x3E93C705u, 0xBEB8B3EDu, 0x3EF6384Fu, 0xBF38AA3Bu, 0x32A570CCu, 0x3FB8AA3Bu, 0x494007f0u, 0xbfffffffu, 0x3d6854cbu, 51 0x3e75f16cu, 0x3f317222u, 0x00000000u, 0x0000007cu, 0x0000003cu, 0x7fffffffu, 0x42fb0000u, 0x7f800000u 52}; 53 54typedef struct 55{ 56 57 unsigned int hsw_sMinNorm; 58 unsigned int hsw_sMaxNorm; 59 unsigned int hsw_RndBit; 60 unsigned int hsw_s_RndMask; 61 unsigned int hsw_sOne; 62 63 unsigned int hsw_sTh[16]; 64 unsigned int hsw_sTl[16]; 65 66 unsigned int hsw_sc7; 67 unsigned int hsw_sc6; 68 unsigned int hsw_sc5; 69 unsigned int hsw_sc4; 70 unsigned int hsw_sc1h; 71 unsigned int hsw_sc2; 72 unsigned int hsw_sc1; 73 unsigned int hsw_sc2h; 74 unsigned int hsw_sc3; 75 76 unsigned int hsw_sAbsMask; 77 unsigned int hsw_sDomainRange; 78 unsigned int hsw_sShifter; 79 80 unsigned int hsw_sTe[16]; 81 82 unsigned int hsw_sce3; 83 unsigned int hsw_sce2; 84 unsigned int hsw_sEMask; 85 unsigned int hsw_sce1; 86 87} __internal_spowr_la_data_avx2_t; 88static __constant __internal_spowr_la_data_avx2_t __internal_spowr_la_data_avx2 = { 89 90 0x00800000u, 91 0x7e400000u, 92 0x00080000u, 93 0xfff00000u, 94 0x3f800000u, 95 96 { 97 0x42fe0000u, 0x42fda900u, 0x42fd5b2cu, 0x42fd14c4u, 0x42fcd480u, 0x42fc9960u, 0x42fc62a2u, 0x42fc2facu, 98 0x42fe0000u, 0x42fda900u, 0x42fd5b2cu, 0x42fd14c4u, 0x42fcd480u, 0x42fc9960u, 0x42fc62a2u, 0x42fc2facu, 99 }, 100 { 101 0x00000000u, 0xb651cfdfu, 0x35f6865cu, 0x37307f34u, 0xb5d1cfdfu, 0xb48e4789u, 0x364055feu, 0x3492d9f8u, 102 0x00000000u, 0xb651cfdfu, 0x35f6865cu, 0x37307f34u, 0xb5d1cfdfu, 0xb48e4789u, 0x364055feu, 0x3492d9f8u, 103 }, 104 105 0x3e547edeu, 106 0xbe777cc7u, 107 0x3e93badcu, 108 0xbeb8a9eau, 109 0x3fb8aa3bu, 110 0xb239c255u, 111 0x32a56f15u, 112 0xbf38aa3bu, 113 0x3ef6384fu, 114 115 0x7fffffffu, 116 0x42fb0000u, 117 0x49c003f8u, 118 119 { 120 0x3f800000u, 0x3f8b95c2u, 0x3f9837f0u, 0x3fa5fed7u, 0x3fb504f3u, 0x3fc5672au, 0x3fd744fdu, 0x3feac0c7u, 121 0x3f800000u, 0x3f8b95c2u, 0x3f9837f0u, 0x3fa5fed7u, 0x3fb504f3u, 0x3fc5672au, 0x3fd744fdu, 0x3feac0c7u, 122 }, 123 124 0x3d636078u, 125 0x3e7607C9u, 126 0x7f800000u, 127 0x3f317218u 128}; 129 130typedef struct 131{ 132 unsigned int sHiMask; 133 unsigned int sRSValue; 134 unsigned int NMINNORM; 135 unsigned int NMAXVAL; 136 unsigned int INF; 137 138 unsigned int LFR_TBL[65][3]; 139 unsigned int LFR_I_CHK_WORK_SUB; 140 unsigned int LFR_I_CHK_WORK_CMP; 141 unsigned int S_MANT_MASK; 142 unsigned int S_ONE; 143 unsigned int LFR_I_INDEX_MASK; 144 unsigned int LFR_I_INDEX_ADD; 145 unsigned int S_HI10BITS_MASK; 146 unsigned int LFR_S_P4; 147 unsigned int LFR_S_P3; 148 unsigned int LFR_S_P2; 149 unsigned int I_BIAS; 150 unsigned int LFR_I_NZ_ADD; 151 unsigned int LFR_I_NZ_CMP; 152 unsigned int S_LOG2_HI; 153 unsigned int S_LOG2_LO; 154 155 unsigned int _sInvLn2; 156 unsigned int _sShifter; 157 unsigned int _sLn2hi; 158 unsigned int _sLn2lo; 159 unsigned int _sPC0; 160 unsigned int _sPC1; 161 unsigned int _sPC2; 162 unsigned int _sPC3; 163 unsigned int _sPC4; 164 unsigned int _sPC5; 165 unsigned int _iBias; 166 unsigned int _iAbsMask; 167 unsigned int _iDomainRange; 168 unsigned int _s2N_2; 169 unsigned int _sHuge; 170 unsigned int _sTiny; 171} __internal_spowr_la_data_t; 172static __constant __internal_spowr_la_data_t __internal_spowr_la_data = { 173 0xFFFFF000u, 174 0x45800800u, 175 0x80800000u, 176 0xfeffffffu, 177 0x7f800000u, 178 179 { 180 {0x00000000u, 0x00000000u, 0x3F800000u}, 181 {0x3C810000u, 0x35ACB127u, 0x3F7C0000u}, 182 {0x3D020000u, 0x372EC4F4u, 0x3F780000u}, 183 {0x3D33C000u, 0x38129E5Bu, 0x3F750000u}, 184 {0x3D774000u, 0x378C7002u, 0x3F710000u}, 185 {0x3D9DE000u, 0x37FAD3E9u, 0x3F6D0000u}, 186 {0x3DB80000u, 0x37530AEBu, 0x3F6A0000u}, 187 {0x3DD26000u, 0x381D902Cu, 0x3F670000u}, 188 {0x3DED2000u, 0x3849D8E1u, 0x3F640000u}, 189 {0x3E08B000u, 0x38474114u, 0x3F600000u}, 190 {0x3E168000u, 0x38308643u, 0x3F5D0000u}, 191 {0x3E248000u, 0x381EC19Au, 0x3F5A0000u}, 192 {0x3E2DF000u, 0x382035AAu, 0x3F580000u}, 193 {0x3E3C4000u, 0x3846C2A2u, 0x3F550000u}, 194 {0x3E4AD000u, 0x3735B9EEu, 0x3F520000u}, 195 {0x3E598000u, 0x386C2BAEu, 0x3F4F0000u}, 196 {0x3E637000u, 0x387DE378u, 0x3F4D0000u}, 197 {0x3E729000u, 0x38077FF4u, 0x3F4A0000u}, 198 {0x3E7CC000u, 0x380E365Au, 0x3F480000u}, 199 {0x3E862000u, 0x37359D86u, 0x3F450000u}, 200 {0x3E8B5800u, 0x37B9975Au, 0x3F430000u}, 201 {0x3E90A000u, 0x378ADA1Du, 0x3F410000u}, 202 {0x3E95F000u, 0x38707CDDu, 0x3F3F0000u}, 203 {0x3E9E1000u, 0x37A4EE66u, 0x3F3C0000u}, 204 {0x3EA38800u, 0x380DC272u, 0x3F3A0000u}, 205 {0x3EA91000u, 0x382E0739u, 0x3F380000u}, 206 {0x3EAEA800u, 0x383DDF59u, 0x3F360000u}, 207 {0x3EB45000u, 0x38483E9Cu, 0x3F340000u}, 208 {0x3EBA0800u, 0x385876C6u, 0x3F320000u}, 209 {0x3EBFD000u, 0x387A3BD8u, 0x3F300000u}, 210 {0x3EC5B000u, 0x3766A22Du, 0x3F2E0000u}, 211 {0x3ECB9800u, 0x38234313u, 0x3F2C0000u}, 212 {0x3ECE9800u, 0x372FA858u, 0x3F2B0000u}, 213 {0x3ED49800u, 0x386D3C8Bu, 0x3F290000u}, 214 {0x3EDAB000u, 0x387A0446u, 0x3F270000u}, 215 {0x3EE0E000u, 0x37C0D27Fu, 0x3F250000u}, 216 {0x3EE3F800u, 0x3879C745u, 0x3F240000u}, 217 {0x3EEA4800u, 0x3511B7BFu, 0x3F220000u}, 218 {0x3EED7000u, 0x37EA9099u, 0x3F210000u}, 219 {0x3EF3D800u, 0x378587BBu, 0x3F1F0000u}, 220 {0x3EF71000u, 0x382156B4u, 0x3F1E0000u}, 221 {0x3EFD9800u, 0x37B15EF5u, 0x3F1C0000u}, 222 {0x3F007000u, 0x3835CB64u, 0x3F1B0000u}, 223 {0x3F03C400u, 0x37FC14CFu, 0x3F190000u}, 224 {0x3F057000u, 0x386DC5A1u, 0x3F180000u}, 225 {0x3F08D400u, 0x3870478Fu, 0x3F160000u}, 226 {0x3F0A8C00u, 0x3807EDE5u, 0x3F150000u}, 227 {0x3F0C4400u, 0x385C81E4u, 0x3F140000u}, 228 {0x3F0FC000u, 0x3846BF8Cu, 0x3F120000u}, 229 {0x3F118400u, 0x37C362FBu, 0x3F110000u}, 230 {0x3F134800u, 0x3844226Au, 0x3F100000u}, 231 {0x3F16DC00u, 0x386AAF62u, 0x3F0E0000u}, 232 {0x3F18AC00u, 0x38348868u, 0x3F0D0000u}, 233 {0x3F1A8000u, 0x37A2B676u, 0x3F0C0000u}, 234 {0x3F1C5400u, 0x38442E33u, 0x3F0B0000u}, 235 {0x3F1E3000u, 0x3680C36Cu, 0x3F0A0000u}, 236 {0x3F200C00u, 0x3761092Eu, 0x3F090000u}, 237 {0x3F23D000u, 0x37293F45u, 0x3F070000u}, 238 {0x3F25B400u, 0x387ADF2Eu, 0x3F060000u}, 239 {0x3F27A000u, 0x383506B5u, 0x3F050000u}, 240 {0x3F299000u, 0x37B89A1Au, 0x3F040000u}, 241 {0x3F2B8000u, 0x38744D77u, 0x3F030000u}, 242 {0x3F2D7800u, 0x3800B86Du, 0x3F020000u}, 243 {0x3F2F7400u, 0x35AA8906u, 0x3F010000u}, 244 {0x3F317000u, 0x3805FDF4u, 0x3F000000u}, 245 }, 246 0x80800000u, 247 0xFF000000u, 248 0x007fffffu, 249 0x3f800000u, 250 0x007F0000u, 251 0x00010000u, 252 0x7fffff00u, 253 0xBE800000u, 254 0x3EAAAAABu, 255 0xBF000000u, 256 0x0000007fu, 257 0x407e0000u, 258 0x7ffc0000u, 259 0x3F317000u, 260 0x3805FDF4u, 261 262 0x3FB8AA3Bu, 263 0x4b400000u, 264 0x3F317200u, 265 0x35BFBE8Eu, 266 0x3F800000u, 267 0x3F7FFFFEu, 268 0x3EFFFF34u, 269 0x3E2AACACu, 270 0x3D2B8392u, 271 0x3C07D9FEu, 272 0x0000007fu, 273 0x7fffffffu, 274 0x42819f00u, 275 0x33800000u, 276 0x7f7fffffu, 277 0x00800000u 278}; 279 280static __constant _iml_v2_sp_union_t __spowr_la_CoutTab[380] = { 281 282 0x3F800000, 283 0x3F640000, 284 0x3F4C0000, 285 0x3F3A0000, 286 0x3F2A0000, 287 0x3F1E0000, 288 0x3F120000, 289 0x3F080000, 290 0x3F000000, 291 292 0x00000000, 293 0x00000000, 294 0x3E2B1E00, 295 0x36614FFD, 296 0x3EA7B700, 297 0x36DD9676, 298 0x3EEBF300, 299 0x3640ABC3, 300 0xBED19B00, 301 0xB6B053FB, 302 0xBE9B8900, 303 0xB599D49A, 304 0xBE426000, 305 0xB6AF40BC, 306 0xBDB31C00, 307 0xB6EDF592, 308 0x00000000, 309 0x00000000, 310 311 0x3F800000, 312 0x3F780000, 313 0x3F700000, 314 0x3F8A0000, 315 0x3F880000, 316 0x3F860000, 317 0x3F840000, 318 0x3F820000, 319 0x3F800000, 320 321 0x00000000, 322 0x00000000, 323 0x3D3B9800, 324 0x3694C9D9, 325 0x3DBEB000, 326 0x3492D9F7, 327 0xBDDE4000, 328 0xB684815B, 329 0xBDB31C00, 330 0xB6EDF592, 331 0xBD875800, 332 0xB6627E8A, 333 0xBD35D000, 334 0xB6D3758F, 335 0xBCB73000, 336 0xB6CB42E1, 337 0x00000000, 338 0x00000000, 339 340 0x3FB88000, 341 0x3FB7C000, 342 0x3FB70000, 343 0x3FB64000, 344 0x3FB5C000, 345 0x3FBC8000, 346 0x3FBC4000, 347 0x3FBBC000, 348 0x3FBB8000, 349 0x3FBB0000, 350 0x3FBAC000, 351 0x3FBA4000, 352 0x3FBA0000, 353 0x3FB98000, 354 0x3FB94000, 355 0x3FB8C000, 356 0x3FB88000, 357 358 0x00000000, 359 0x00000000, 360 0x3BC08000, 361 0x3601B0EA, 362 0x3C40E000, 363 0x36A82CE1, 364 0x3C910000, 365 0x35F27427, 366 0x3CB17000, 367 0x36BBF0CC, 368 0xBCFD7000, 369 0xB6DA84F4, 370 0xBCEDC000, 371 0xB6E53CD7, 372 0xBCCE5000, 373 0xB6FA51D3, 374 0xBCBEA000, 375 0xB4074B50, 376 0xBC9F1000, 377 0xB52D128E, 378 0xBC8F4000, 379 0xB5655E44, 380 0xBC5F2000, 381 0xB59903D9, 382 0xBC3F6000, 383 0xB5A1551A, 384 0xBBFF8000, 385 0xB5979427, 386 0xBBBFC000, 387 0xB5839E88, 388 0xBB000000, 389 0xB4E32477, 390 0x00000000, 391 0x00000000, 392 393 0x3F800000, 394 0x00000000, 395 0x3F80B1EE, 396 0xB3B02666, 397 0x3F8164D2, 398 0xB1C43FD0, 399 0x3F8218B0, 400 0xB3BC8C04, 401 0x3F82CD86, 402 0x3398AC2C, 403 0x3F83835A, 404 0xB3B11049, 405 0x3F843A28, 406 0x33C3ACDE, 407 0x3F84F1F6, 408 0x332C6F38, 409 0x3F85AAC4, 410 0xB39833B8, 411 0x3F866492, 412 0xB3A46DC0, 413 0x3F871F62, 414 0xB352C2E6, 415 0x3F87DB36, 416 0xB3800967, 417 0x3F88980E, 418 0x338092DB, 419 0x3F8955EE, 420 0x30D86398, 421 0x3F8A14D6, 422 0xB38AB691, 423 0x3F8AD4C6, 424 0x330A58E5, 425 0x3F8B95C2, 426 0xB260ABA1, 427 0x3F8C57CA, 428 0xB2EE6E43, 429 0x3F8D1AE0, 430 0xB3A481A4, 431 0x3F8DDF04, 432 0x32808B9A, 433 0x3F8EA43A, 434 0xB3697465, 435 0x3F8F6A82, 436 0xB3E81937, 437 0x3F9031DC, 438 0x330628CD, 439 0x3F90FA4C, 440 0x338BEEE5, 441 0x3F91C3D4, 442 0xB38C54EE, 443 0x3F928E72, 444 0x337B2A64, 445 0x3F935A2C, 446 0xB3D0EC19, 447 0x3F942700, 448 0xB3F054E4, 449 0x3F94F4F0, 450 0xB32E0212, 451 0x3F95C3FE, 452 0x3386D6CC, 453 0x3F96942E, 454 0xB3C8DFE8, 455 0x3F97657E, 456 0xB3B60E85, 457 0x3F9837F0, 458 0x33231B71, 459 0x3F990B88, 460 0xB26CC9F4, 461 0x3F99E046, 462 0xB359BE90, 463 0x3F9AB62A, 464 0x33FC9500, 465 0x3F9B8D3A, 466 0xB30C5563, 467 0x3F9C6574, 468 0xB397D13D, 469 0x3F9D3EDA, 470 0xB331A601, 471 0x3F9E196E, 472 0x3244EA39, 473 0x3F9EF532, 474 0x33412342, 475 0x3F9FD228, 476 0x32959003, 477 0x3FA0B052, 478 0xB3F0468F, 479 0x3FA18FAE, 480 0x33CA8545, 481 0x3FA27044, 482 0xB3FCF3B7, 483 0x3FA35210, 484 0xB39717FD, 485 0x3FA43516, 486 0xB323EC33, 487 0x3FA51958, 488 0xB37282C2, 489 0x3FA5FED6, 490 0x33A9B151, 491 0x3FA6E594, 492 0x33CFEEE8, 493 0x3FA7CD94, 494 0xB3162D36, 495 0x3FA8B6D6, 496 0xB3E984CE, 497 0x3FA9A15A, 498 0x33B4EA7C, 499 0x3FAA8D26, 500 0x3325D921, 501 0x3FAB7A3A, 502 0xB314AD82, 503 0x3FAC6896, 504 0x33A4BE40, 505 0x3FAD583E, 506 0x33EA42A1, 507 0x3FAE4934, 508 0x3325946B, 509 0x3FAF3B78, 510 0x33AD690A, 511 0x3FB02F0E, 512 0xB2D1247F, 513 0x3FB123F6, 514 0xB37C5AA8, 515 0x3FB21A32, 516 0xB33333CE, 517 0x3FB311C4, 518 0x32154889, 519 0x3FB40AAE, 520 0x33A2654C, 521 0x3FB504F4, 522 0xB3CC0622, 523 0x3FB60094, 524 0xB32F4254, 525 0x3FB6FD92, 526 0xB266B974, 527 0x3FB7FBF0, 528 0xB2D5CD70, 529 0x3FB8FBB0, 530 0xB3B89D04, 531 0x3FB9FCD2, 532 0x330A5817, 533 0x3FBAFF5A, 534 0x33B2133E, 535 0x3FBC034A, 536 0x337DE5D4, 537 0x3FBD08A4, 538 0xB3414FE8, 539 0x3FBE0F68, 540 0x31986099, 541 0x3FBF179A, 542 0xB3130B1A, 543 0x3FC0213A, 544 0x33A1F0D1, 545 0x3FC12C4C, 546 0x33CA6671, 547 0x3FC238D2, 548 0x32C478F6, 549 0x3FC346CC, 550 0x33DA2497, 551 0x3FC4563E, 552 0x33CC5335, 553 0x3FC5672A, 554 0x320AA837, 555 0x3FC67990, 556 0x33B5AA24, 557 0x3FC78D74, 558 0x33C8ABBA, 559 0x3FC8A2D8, 560 0x33391FFC, 561 0x3FC9B9BE, 562 0xB37323A2, 563 0x3FCAD226, 564 0x333C8521, 565 0x3FCBEC14, 566 0x33FEF272, 567 0x3FCD078C, 568 0xB3735F84, 569 0x3FCE248C, 570 0x3228FC24, 571 0x3FCF4318, 572 0x33CF1919, 573 0x3FD06334, 574 0xB2944353, 575 0x3FD184E0, 576 0xB39DAE96, 577 0x3FD2A81E, 578 0xB35C1DAA, 579 0x3FD3CCF0, 580 0x3399859B, 581 0x3FD4F35A, 582 0x33ABCFEE, 583 0x3FD61B5E, 584 0xB0303219, 585 0x3FD744FC, 586 0x33CAD69D, 587 0x3FD8703A, 588 0xB3B3924D, 589 0x3FD99D16, 590 0xB2F61D41, 591 0x3FDACB94, 592 0x335E5594, 593 0x3FDBFBB8, 594 0xB3504A1C, 595 0x3FDD2D82, 596 0xB375EF9B, 597 0x3FDE60F4, 598 0x33825E0F, 599 0x3FDF9612, 600 0x33DEB8F0, 601 0x3FE0CCDE, 602 0x33EC2A95, 603 0x3FE2055A, 604 0x33FFFE84, 605 0x3FE33F8A, 606 0xB38D4176, 607 0x3FE47B6C, 608 0x33A0373E, 609 0x3FE5B906, 610 0x33E77C83, 611 0x3FE6F85A, 612 0x33AAEE20, 613 0x3FE8396A, 614 0x33207898, 615 0x3FE97C38, 616 0x3300D89F, 617 0x3FEAC0C6, 618 0x33E7DD24, 619 0x3FEC0718, 620 0x33B64C1D, 621 0x3FED4F30, 622 0x3276CCA1, 623 0x3FEE9910, 624 0xB34FE4BA, 625 0x3FEFE4BA, 626 0xB348464A, 627 0x3FF13230, 628 0x33A7AD09, 629 0x3FF28178, 630 0xB3C3A600, 631 0x3FF3D290, 632 0xB2871670, 633 0x3FF5257E, 634 0xB3EADB79, 635 0x3FF67A42, 636 0xB3938CC0, 637 0x3FF7D0E0, 638 0xB38CF52F, 639 0x3FF9295A, 640 0xB3094457, 641 0x3FFA83B2, 642 0x33DB722A, 643 0x3FFBDFEE, 644 0xB3931A0F, 645 0x3FFD3E0C, 646 0x31CF486C, 647 0x3FFE9E12, 648 0xB3A38470, 649 650 0x3A6A6369, 651 0xBEB1C35D, 652 0x3E246F69, 653 0xBDAB1EA1, 654 655 0x3F317218, 656 0x3E75FDF0, 657 0x3D635847, 658 659 0x7F000000, 660 0x00800000, 661 0x00000000, 662 0x3F800000, 663 0xBF800000, 664 665 0x47C00000, 666 667 0x3FB88000, 668 669 0x45800800, 670 671 0x5F800000, 672 0x1F800000, 673 0x00000000, 674 0x80000000, 675}; 676 677static int __spowr_la_TestIntFunc (float a) 678{ 679 int x = (*(int *) &a) & 0x7fffffff; 680 int e; 681 682 if ((x < 0x3f800000) || (x >= 0x7f800000)) 683 { 684 return 0; 685 } 686 if (x >= 0x4B800000) 687 { 688 return 2; 689 } 690 691 e = ((x & 0x7f800000) - 0x3f800000) >> 23; 692 x = x << e; 693 if ((x << 9) != 0) 694 { 695 return 0; 696 } 697 if ((x << 8) == 0x80000000) 698 { 699 return 1; 700 } 701 702 return 2; 703} 704 705__attribute__((always_inline)) 706inline int __internal_spowr_la_cout (float *a, float *b, float *r) 707{ 708 int nRet = 0; 709 710 float flVTmp1, flVTmp2, flVPHH, flVPHL; 711 float flAX, flSignRes, flX1, flRcp1, flL1Hi, flL1Lo, flX2, flRcp2, flL2Hi, flL2Lo, 712 flX3, flRcp3C, flL3Hi, flL3Lo, flK, flT, flD, flR1, flCQ, flRcpC, flX1Hi, flX1Lo, 713 flRcpCHi, flRcpCLo, flTmp1, flE, flT_CQHi, flCQLo, flR, flLogPart3, flLog2Poly, 714 flHH, flHL, flHLL, flYHi, flYLo, flTmp2, flTmp3, flPH, flPL, flPLL, flZ, 715 flExp2Poly, flExp2PolyT, flResLo, flResHi, flRes, flTwoPowN, flAY, flAi, flBi; 716 float flT_lo_1, flT_lo_2, flT_lo_3; 717 718 int i, iEXB, iEYB, iSignX, iSignY, iYHi, iYLo, iYIsFinite, iEY, iYIsInt, iXIsFinite, 719 iDenoExpAdd, iXHi, k, i1, i2, i3, iELogAX, iN, j, iERes, iSign, iIsSigZeroX, iIsSigZeroY, iYMantissa, iEX; 720 721 flAi = *a; 722 flBi = *b; 723 724 iEXB = ((((_iml_v2_sp_union_t *) & flAi)->hex[0] >> 23) & 0xFF); 725 iEYB = ((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF); 726 727 iEX = iEXB - 0x7F; 728 iEY = iEYB - 0x7F; 729 730 iSignX = (((_iml_v2_sp_union_t *) & flAi)->hex[0] >> 31); 731 iSignY = (((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 31); 732 733 iIsSigZeroX = ((((_iml_v2_sp_union_t *) & flAi)->hex[0] & 0x007FFFFF) == 0); 734 iIsSigZeroY = ((((_iml_v2_sp_union_t *) & flBi)->hex[0] & 0x007FFFFF) == 0); 735 736 iYIsFinite = (((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF) != 0xFF); 737 738 { 739 int iXisZero = ((iEXB == 0) && (iIsSigZeroX)); 740 int iYisZero = ((iEYB == 0) && (iIsSigZeroY)); 741 int iXisNAN = (!((((((_iml_v2_sp_union_t *) & flAi)->hex[0] >> 23) & 0xFF) != 0xFF))) && (!(iIsSigZeroX)); 742 int iYisNAN = (!((((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF) != 0xFF))) && (!(iIsSigZeroY)); 743 int iXisINF = (!((((((_iml_v2_sp_union_t *) & flAi)->hex[0] >> 23) & 0xFF) != 0xFF))) && ((iIsSigZeroX)); 744 int iYisINF = (!((((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF) != 0xFF))) && ((iIsSigZeroY)); 745 746 if (iXisNAN) 747 { 748 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 749 flVTmp1 = (flVTmp1 / flVTmp1); 750 *r = flVTmp1; 751 return nRet; 752 753 } 754 755 if ((iXisINF) && (!iSignX) && (iYisZero)) 756 { 757 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 758 flVTmp1 = flVTmp1 / flVTmp1; 759 *r = flVTmp1; 760 return nRet; 761 762 } 763 764 if (iXisZero) 765 { 766 767 if (iYisZero) 768 { 769 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 770 flVTmp1 = flVTmp1 / flVTmp1; 771 *r = flVTmp1; 772 return nRet; 773 774 } 775 776 if (iYisINF && (!iSignY)) 777 { 778 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 779 *r = flVTmp1; 780 return nRet; 781 782 } 783 784 if (iYisINF && iSignY) 785 { 786 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 787 flVTmp1 = 1.0f / flVTmp1; 788 *r = flVTmp1; 789 return nRet; 790 791 } 792 793 if (((((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF) != 0xFF)) && iSignY) 794 { 795 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 796 flVTmp1 = 1.0f / flVTmp1; 797 *r = flVTmp1; 798 return nRet; 799 800 } 801 802 if (((((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF) != 0xFF)) && (!iSignY)) 803 { 804 *r = ((__constant float *) __spowr_la_CoutTab)[370]; 805 return nRet; 806 807 } 808 } 809 810 if (flAi == ((__constant float *) __spowr_la_CoutTab)[371]) 811 { 812 813 if (((((((_iml_v2_sp_union_t *) & flBi)->hex[0] >> 23) & 0xFF) != 0xFF))) 814 { 815 *r = ((__constant float *) __spowr_la_CoutTab)[371]; 816 return nRet; 817 818 } 819 820 if (iYisNAN || iYisINF) 821 { 822 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 823 flVTmp1 = flVTmp1 / flVTmp1; 824 *r = flVTmp1; 825 return nRet; 826 827 } 828 } 829 830 if (iSignX) 831 { 832 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 833 flVTmp1 = flVTmp1 / flVTmp1; 834 *r = flVTmp1; 835 nRet = 1; 836 return nRet; 837 838 } 839 } 840 841 iYMantissa = (((_iml_v2_sp_union_t *) & flBi)->hex[0] & 0x007FFFFF); 842 843 iYIsInt = __spowr_la_TestIntFunc (flBi); 844 845 if (!((iSignX == 0) && (iEXB == 0x7F) && iIsSigZeroX) && !((iEYB == 0) && iIsSigZeroY)) 846 { 847 ; 848 849 iXIsFinite = (((((_iml_v2_sp_union_t *) & flAi)->hex[0] >> 23) & 0xFF) != 0xFF); 850 851 if ((iXIsFinite || iIsSigZeroX) && (iYIsFinite || iIsSigZeroY)) 852 { 853 ; 854 855 if (flAi != ((__constant float *) __spowr_la_CoutTab)[370]) 856 { 857 ; 858 859 if (!((flAi == ((__constant float *) __spowr_la_CoutTab)[372]) && (iYIsInt || !iYIsFinite))) 860 { 861 ; 862 863 if (iXIsFinite && iYIsFinite) 864 { 865 ; 866 867 if ((flAi > ((__constant float *) __spowr_la_CoutTab)[370]) || iYIsInt) 868 { 869 ; 870 871 flSignRes = ((__constant float *) __spowr_la_CoutTab)[371 + (iSignX & iYIsInt)]; 872 iDenoExpAdd = 0; 873 flAX = flAi; 874 (((_iml_v2_sp_union_t *) & flAX)->hex[0] = 875 (((_iml_v2_sp_union_t *) & flAX)->hex[0] & 0x7FFFFFFF) | ((_iml_uint32_t) (0) << 31)); 876 877 if (iEXB == 0) 878 { 879 880 flAX = flAX * ((__constant float *) __spowr_la_CoutTab)[376]; 881 iDenoExpAdd = iDenoExpAdd - 64; 882 } 883 884 flX1 = flAX; 885 (((_iml_v2_sp_union_t *) & flX1)->hex[0] = 886 (((_iml_v2_sp_union_t *) & flX1)->hex[0] & 0x807FFFFF) | (((_iml_uint32_t) (0x7F) & 0xFF) << 23)); 887 888 iXHi = ((((_iml_v2_sp_union_t *) & flAX)->hex[0] >> 23) & 0xFF); 889 iXHi = iXHi << 23; 890 iXHi = iXHi | (((_iml_v2_sp_union_t *) & flAX)->hex[0] & 0x007FFFFF); 891 892 k = iXHi - 0x3F380000; 893 k = k >> 23; 894 k = k + iDenoExpAdd; 895 896 i1 = (((_iml_v2_sp_union_t *) & flX1)->hex[0] & 0x007FFFFF); 897 i1 = i1 & 0x780000; 898 i1 = i1 + 0x80000; 899 i1 = i1 >> 20; 900 901 flRcp1 = ((__constant float *) __spowr_la_CoutTab)[0 + i1]; 902 903 flL1Hi = ((__constant float *) __spowr_la_CoutTab)[9 + 2 * (i1) + 0]; 904 flL1Lo = ((__constant float *) __spowr_la_CoutTab)[9 + 2 * (i1) + 1]; 905 906 flX2 = flX1 * flRcp1; 907 908 i2 = (((_iml_v2_sp_union_t *) & flX2)->hex[0] & 0x007FFFFF); 909 i2 = i2 & 0x1E0000; 910 i2 = i2 + 0x20000; 911 i2 = i2 >> 18; 912 913 flRcp2 = ((__constant float *) __spowr_la_CoutTab)[27 + i2]; 914 915 flL2Hi = ((__constant float *) __spowr_la_CoutTab)[36 + 2 * (i2) + 0]; 916 flL2Lo = ((__constant float *) __spowr_la_CoutTab)[36 + 2 * (i2) + 1]; 917 918 flX3 = (flX2 * flRcp2); 919 920 i3 = (((_iml_v2_sp_union_t *) & flX3)->hex[0] & 0x007FFFFF); 921 i3 = i3 & 0x7C000; 922 i3 = i3 + 0x4000; 923 i3 = i3 >> 15; 924 925 flRcp3C = ((__constant float *) __spowr_la_CoutTab)[54 + i3]; 926 927 flL3Hi = ((__constant float *) __spowr_la_CoutTab)[71 + 2 * (i3) + 0]; 928 flL3Lo = ((__constant float *) __spowr_la_CoutTab)[71 + 2 * (i3) + 1]; 929 930 flK = (float) k; 931 flVTmp1 = ((flK) + (flL1Hi)); 932 flTmp1 = ((flK) - flVTmp1); 933 flVTmp2 = (flTmp1 + (flL1Hi)); 934 flT = flVTmp1; 935 flT_lo_1 = flVTmp2; 936 937 flVTmp1 = ((flT) + (flL2Hi)); 938 flTmp1 = ((flT) - flVTmp1); 939 flVTmp2 = (flTmp1 + (flL2Hi)); 940 flT = flVTmp1; 941 flT_lo_2 = flVTmp2; 942 943 flVTmp1 = ((flT) + (flL3Hi)); 944 flTmp1 = ((flT) - flVTmp1); 945 flVTmp2 = (flTmp1 + (flL3Hi)); 946 flT = flVTmp1; 947 flT_lo_3 = flVTmp2; 948 949 flD = (flT_lo_1 + flT_lo_2); 950 flD = (flD + flT_lo_3); 951 flD = (flD + flL1Lo); 952 flD = (flD + flL2Lo); 953 flD = (flD + flL3Lo); 954 955 flR1 = (flX3 * flRcp3C); 956 flCQ = (flR1 - ((__constant float *) __spowr_la_CoutTab)[374]); 957 958 flRcpC = (flRcp1 * flRcp2); 959 flRcpC = (flRcpC * flRcp3C); 960 961 flVTmp1 = ((flX1) * (((__constant float *) __spowr_la_CoutTab)[375])); 962 flVTmp2 = (flVTmp1 - (flX1)); 963 flVTmp1 = (flVTmp1 - flVTmp2); 964 flVTmp2 = ((flX1) - flVTmp1); 965 flX1Hi = flVTmp1; 966 flX1Lo = flVTmp2; 967 968 flVTmp1 = ((flRcpC) * (((__constant float *) __spowr_la_CoutTab)[375])); 969 flVTmp2 = (flVTmp1 - (flRcpC)); 970 flVTmp1 = (flVTmp1 - flVTmp2); 971 flVTmp2 = ((flRcpC) - flVTmp1); 972 flRcpCHi = flVTmp1; 973 flRcpCLo = flVTmp2; 974 975 flTmp1 = (flX1Hi * flRcpCHi); 976 flE = (flTmp1 - flR1); 977 flTmp1 = (flX1Lo * flRcpCHi); 978 flE = (flE + flTmp1); 979 flTmp1 = (flX1Hi * flRcpCLo); 980 flE = (flE + flTmp1); 981 flTmp1 = (flX1Lo * flRcpCLo); 982 flE = (flE + flTmp1); 983 984 flVTmp1 = ((flT) + (flCQ)); 985 flTmp1 = ((flT) - flVTmp1); 986 flVTmp2 = (flTmp1 + (flCQ)); 987 flT_CQHi = flVTmp1; 988 flCQLo = flVTmp2; 989 990 iELogAX = ((((_iml_v2_sp_union_t *) & flT_CQHi)->hex[0] >> 23) & 0xFF); 991 992 if (iELogAX + iEYB < 11 + 2 * 0x7F) 993 { 994 ; 995 996 if (iELogAX + iEYB > -62 + 2 * 0x7F) 997 { 998 ; 999 1000 flR = (flCQ + flE); 1001 1002 flLog2Poly = 1003 ((((((__constant float *) __spowr_la_CoutTab)[364]) * flR + 1004 ((__constant float *) __spowr_la_CoutTab)[363]) * flR + 1005 ((__constant float *) __spowr_la_CoutTab)[362]) * flR + 1006 ((__constant float *) __spowr_la_CoutTab)[361]) * flR; 1007 1008 flLogPart3 = (flCQLo + flE); 1009 flLogPart3 = (flD + flLogPart3); 1010 1011 flVTmp1 = ((flT_CQHi) + (flLog2Poly)); 1012 flTmp1 = ((flT_CQHi) - flVTmp1); 1013 flVTmp2 = (flTmp1 + (flLog2Poly)); 1014 flHH = flVTmp1; 1015 flHL = flVTmp2; 1016 1017 flVTmp1 = ((flHH) + (flLogPart3)); 1018 flTmp1 = ((flHH) - flVTmp1); 1019 flVTmp2 = (flTmp1 + (flLogPart3)); 1020 flHH = flVTmp1; 1021 flHLL = flVTmp2; 1022 1023 flHLL = (flHLL + flHL); 1024 1025 flVTmp1 = ((flHH) * (((__constant float *) __spowr_la_CoutTab)[375])); 1026 flVTmp2 = (flVTmp1 - (flHH)); 1027 flVTmp1 = (flVTmp1 - flVTmp2); 1028 flVTmp2 = ((flHH) - flVTmp1); 1029 flHH = flVTmp1; 1030 flHL = flVTmp2; 1031 1032 flVTmp1 = ((flBi) * (((__constant float *) __spowr_la_CoutTab)[375])); 1033 flVTmp2 = (flVTmp1 - (flBi)); 1034 flVTmp1 = (flVTmp1 - flVTmp2); 1035 flVTmp2 = ((flBi) - flVTmp1); 1036 flYHi = flVTmp1; 1037 flYLo = flVTmp2; 1038 1039 flTmp1 = ((flYHi) * (flHH)); 1040 flTmp2 = ((flYLo) * (flHL)); 1041 flTmp2 = (flTmp2 + (flYHi) * (flHL)); 1042 flTmp3 = (flTmp2 + (flYLo) * (flHH)); 1043 flPH = flTmp1; 1044 flPL = flTmp3; 1045 1046 flPLL = (flBi * flHLL); 1047 1048 flVTmp1 = (flPH + ((__constant float *) __spowr_la_CoutTab)[373]); 1049 flVPHH = (flVTmp1 - ((__constant float *) __spowr_la_CoutTab)[373]); 1050 iN = (((_iml_v2_sp_union_t *) & flVTmp1)->hex[0] & 0x007FFFFF); 1051 j = iN & 0x7F; 1052 1053 iN = iN << 10; 1054 iN = iN >> (7 + 10); 1055 flVPHL = (flPH - flVPHH); 1056 1057 flZ = (flPLL + flPL); 1058 flZ = (flZ + flVPHL); 1059 1060 flExp2Poly = 1061 (((((__constant float *) __spowr_la_CoutTab)[367]) * flZ + 1062 ((__constant float *) __spowr_la_CoutTab)[366]) * flZ + 1063 ((__constant float *) __spowr_la_CoutTab)[365]) * flZ; 1064 1065 flExp2PolyT = (flExp2Poly * ((__constant float *) __spowr_la_CoutTab)[105 + 2 * (j) + 0]); 1066 flResLo = (flExp2PolyT + ((__constant float *) __spowr_la_CoutTab)[105 + 2 * (j) + 1]); 1067 flResHi = ((__constant float *) __spowr_la_CoutTab)[105 + 2 * (j) + 0]; 1068 1069 flRes = (flResHi + flResLo); 1070 iERes = ((((_iml_v2_sp_union_t *) & flRes)->hex[0] >> 23) & 0xFF); 1071 iERes = (iERes - 0x7F); 1072 iERes = (iERes + iN); 1073 1074 if (iERes < 128) 1075 { 1076 ; 1077 if (iERes >= -126) 1078 { 1079 ; 1080 1081 (((_iml_v2_sp_union_t *) & flRes)->hex[0] = 1082 (((_iml_v2_sp_union_t *) & flRes)->hex[0] & 0x807FFFFF) | (((_iml_uint32_t) (iERes + 0x7F) & 0xFF) << 23)); 1083 1084 flRes = (flRes * flSignRes); 1085 *r = flRes; 1086 } 1087 else 1088 { 1089 1090 if (iERes >= -126 - 10) 1091 { 1092 ; 1093 1094 flVTmp1 = ((flResHi) + (flResLo)); 1095 flTmp1 = ((flResHi) - flVTmp1); 1096 flVTmp2 = (flTmp1 + (flResLo)); 1097 flResHi = flVTmp1; 1098 flResLo = flVTmp2; 1099 1100 flVTmp1 = ((flResHi) * (((__constant float *) __spowr_la_CoutTab)[375])); 1101 flVTmp2 = (flVTmp1 - (flResHi)); 1102 flVTmp1 = (flVTmp1 - flVTmp2); 1103 flVTmp2 = ((flResHi) - flVTmp1); 1104 flResHi = flVTmp1; 1105 flTmp2 = flVTmp2; 1106 1107 flResLo = (flResLo + flTmp2); 1108 1109 flSignRes *= ((__constant float *) __spowr_la_CoutTab)[377]; 1110 iN = (iN + 64); 1111 1112 flTwoPowN = ((__constant float *) __spowr_la_CoutTab)[371]; 1113 (((_iml_v2_sp_union_t *) & flTwoPowN)->hex[0] = 1114 (((_iml_v2_sp_union_t *) & flTwoPowN)-> 1115 hex[0] & 0x807FFFFF) | (((_iml_uint32_t) (iN + 0x7F) & 0xFF) << 23)); 1116 1117 flResHi = (flResHi * flTwoPowN); 1118 flResHi = (flResHi * flSignRes); 1119 1120 flResLo = (flResLo * flTwoPowN); 1121 flVTmp1 = (flResLo * flSignRes); 1122 1123 flRes = (flResHi + flVTmp1); 1124 1125 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[369]; 1126 flVTmp1 = (flVTmp1 * flVTmp1); 1127 flRes = (flRes + flVTmp1); 1128 1129 *r = flRes; 1130 } 1131 else 1132 { 1133 ; 1134 1135 if (iERes >= -149 - 10) 1136 { 1137 1138 flSignRes *= ((__constant float *) __spowr_la_CoutTab)[377]; 1139 iN = iN + 64; 1140 1141 flTwoPowN = ((__constant float *) __spowr_la_CoutTab)[371]; 1142 (((_iml_v2_sp_union_t *) & flTwoPowN)->hex[0] = 1143 (((_iml_v2_sp_union_t *) & flTwoPowN)-> 1144 hex[0] & 0x807FFFFF) | (((_iml_uint32_t) (iN + 0x7F) & 0xFF) << 23)); 1145 1146 flRes = (flRes * flTwoPowN); 1147 flRes = (flRes * flSignRes); 1148 1149 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[369]; 1150 flVTmp1 *= flVTmp1; 1151 flRes = (flRes + flVTmp1); 1152 1153 *r = flRes; 1154 nRet = 4; 1155 } 1156 1157 else 1158 { 1159 ; 1160 1161 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[369]; 1162 flVTmp1 *= flVTmp1; 1163 flRes = (flVTmp1 * flSignRes); 1164 *r = flRes; 1165 nRet = 4; 1166 } 1167 } 1168 } 1169 } 1170 1171 else 1172 { 1173 ; 1174 1175 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[368]; 1176 flVTmp1 = (flVTmp1 * flVTmp1); 1177 flRes = (flVTmp1 * flSignRes); 1178 *r = flRes; 1179 nRet = 3; 1180 } 1181 } 1182 else 1183 { 1184 ; 1185 1186 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[371]; 1187 flVTmp1 = (flVTmp1 + ((__constant float *) __spowr_la_CoutTab)[369]); 1188 *r = (flVTmp1 * flSignRes); 1189 } 1190 } 1191 else 1192 { 1193 ; 1194 1195 iSign = iSignY ^ (((_iml_v2_sp_union_t *) & flT_CQHi)->hex[0] >> 31); 1196 1197 flTmp1 = ((__constant float *) __spowr_la_CoutTab)[368 + (iSign)]; 1198 1199 flTmp1 = (flTmp1 * flTmp1); 1200 1201 flTmp1 = (flTmp1 * flSignRes); 1202 *r = flTmp1; 1203 nRet = (iSign ? 4 : 3); 1204 } 1205 } 1206 else 1207 { 1208 ; 1209 1210 flVTmp1 = ((__constant float *) __spowr_la_CoutTab)[370]; 1211 flVTmp1 = (flVTmp1 / flVTmp1); 1212 *r = flVTmp1; 1213 nRet = 1; 1214 } 1215 } 1216 else 1217 { 1218 ; 1219 1220 if (iEXB < 0x7F) 1221 { 1222 ; 1223 1224 if (iSignY) 1225 { 1226 ; 1227 1228 *r = (flBi * flBi); 1229 } 1230 else 1231 { 1232 ; 1233 1234 *r = ((__constant float *) __spowr_la_CoutTab)[370]; 1235 } 1236 } 1237 else 1238 { 1239 ; 1240 1241 if (iSignY) 1242 { 1243 ; 1244 1245 flRes = ((__constant float *) __spowr_la_CoutTab)[378 + (iYIsInt & iSignX)]; 1246 *r = flRes; 1247 } 1248 else 1249 { 1250 ; 1251 1252 flTmp1 = (flAi * flAi); 1253 flTmp1 = (flTmp1 * flBi); 1254 *r = flTmp1 * ((__constant float *) __spowr_la_CoutTab)[371 + (iYIsInt & iSignX)]; 1255 } 1256 } 1257 } 1258 } 1259 else 1260 { 1261 ; 1262 1263 *r = ((__constant float *) __spowr_la_CoutTab)[371 + (iYIsInt & 1)]; 1264 } 1265 } 1266 else 1267 { 1268 ; 1269 1270 flTmp1 = flAi * flAi; 1271 1272 if (iSignY) 1273 { 1274 ; 1275 1276 *r = ((__constant float *) __spowr_la_CoutTab)[371 + (iYIsInt & iSignX)] / flTmp1; 1277 nRet = 1; 1278 1279 } 1280 else 1281 { 1282 ; 1283 1284 *r = ((__constant float *) __spowr_la_CoutTab)[371 + (iYIsInt & iSignX)] * flTmp1; 1285 } 1286 } 1287 } 1288 else 1289 { 1290 ; 1291 1292 *r = *a + *b; 1293 } 1294 } 1295 1296 else 1297 { 1298 ; 1299 1300 flVTmp1 = flAi + flBi; 1301 iSign = (((_iml_v2_sp_union_t *) & flVTmp1)->hex[0] >> 31); 1302 flVTmp2 = ((__constant float *) __spowr_la_CoutTab)[371]; 1303 (((_iml_v2_sp_union_t *) & flVTmp2)->hex[0] = (((_iml_v2_sp_union_t *) & flVTmp2)->hex[0] & 0x7FFFFFFF) | ((_iml_uint32_t) (iSign) << 31)); 1304 1305 *r = flVTmp2 * flVTmp2; 1306 } 1307 1308 return nRet; 1309} 1310 1311float __ocl_svml_powrf (float a, float b) 1312{ 1313 1314 float va1; 1315 float va2; 1316 float vr1; 1317 unsigned int vm; 1318 1319 float r; 1320 1321 va1 = a; 1322 va2 = b; 1323 1324 { 1325 1326 float sHiMask; 1327 float sRSValue; 1328 float sZ[2]; 1329 float sZ0[2]; 1330 float sL[2]; 1331 float sY[2]; 1332 unsigned int _NMINNORM; 1333 unsigned int _NMAXVAL; 1334 unsigned int _INF; 1335 unsigned int iSpecX; 1336 unsigned int iSpecY; 1337 unsigned int LFR_iY; 1338 unsigned int iRangeMask; 1339 1340 unsigned int LFR_iX; 1341 unsigned int LFR_iXBadSub; 1342 unsigned int LFR_iXBad; 1343 float LFR_sXMant; 1344 float LFR_sM; 1345 unsigned int LFR_iInd; 1346 float LFR_sLnRcprYHi; 1347 float LFR_sLnRcprYLo; 1348 float LFR_sRcprY; 1349 float LFR_sYHi; 1350 float LFR_sYLo; 1351 float LFR_sRHi; 1352 float LFR_sTRHi; 1353 float LFR_sRLo; 1354 float LFR_sR; 1355 float LFR_sP; 1356 float LFR_sR2; 1357 unsigned int LFR_iN; 1358 float LFR_sN; 1359 float LFR_sNLog2Hi; 1360 float LFR_sNLog2Lo; 1361 float LFR_sWLo; 1362 float LFR_sResHi; 1363 float LFR_sResLo; 1364 unsigned int LFR_I_CHK_WORK_SUB; 1365 unsigned int LFR_I_CHK_WORK_CMP; 1366 float S_MANT_MASK; 1367 float S_ONE; 1368 unsigned int LFR_I_INDEX_MASK; 1369 unsigned int LFR_I_INDEX_ADD; 1370 float S_HI10BITS_MASK; 1371 float LFR_S_P4; 1372 float LFR_S_P3; 1373 float LFR_S_P2; 1374 unsigned int I_BIAS; 1375 float S_LOG2_HI; 1376 float S_LOG2_LO; 1377 float sR2; 1378 float sRHL[2]; 1379 1380 unsigned int iHiDelta; 1381 unsigned int iLoRange; 1382 unsigned int iBrkValue; 1383 unsigned int iOffExpoMask; 1384 float sOne; 1385 float sLn2Hi; 1386 float sLn2Lo; 1387 float sPoly[7]; 1388 unsigned int iX; 1389 unsigned int iXTest; 1390 float sN; 1391 unsigned int iN; 1392 float sR; 1393 unsigned int iR; 1394 float sP; 1395 float sM; 1396 float s2N; 1397 unsigned int iAbsZ; 1398 unsigned int iRes; 1399 unsigned int iP; 1400 unsigned int iM; 1401 float sInvLn2; 1402 float sShifter; 1403 float sLn2hi; 1404 float sLn2lo; 1405 unsigned int iBias; 1406 unsigned int iAbsMask; 1407 unsigned int iDomainRange; 1408 float sPC[6]; 1409 float stmp; 1410 1411 LFR_iX = as_uint (va1); 1412 LFR_iY = as_uint (va2); 1413 1414 _NMINNORM = (__internal_spowr_la_data.NMINNORM); 1415 _NMAXVAL = (__internal_spowr_la_data.NMAXVAL); 1416 _INF = (__internal_spowr_la_data.INF); 1417 iAbsMask = (__internal_spowr_la_data._iAbsMask); 1418 1419 iSpecX = (LFR_iX - _NMINNORM); 1420 iSpecX = ((unsigned int) (-(signed int) ((signed int) iSpecX >= (signed int) _NMAXVAL))); 1421 iSpecY = (LFR_iY & iAbsMask); 1422 iSpecY = ((unsigned int) (-(signed int) ((signed int) iSpecY >= (signed int) _INF))); 1423 iRangeMask = (iSpecX | iSpecY); 1424 1425 S_MANT_MASK = as_float (__internal_spowr_la_data.S_MANT_MASK); 1426 1427 LFR_sXMant = as_float ((as_uint (va1) & as_uint (S_MANT_MASK))); 1428 S_ONE = as_float (__internal_spowr_la_data.S_ONE); 1429 LFR_sM = as_float ((as_uint (LFR_sXMant) | as_uint (S_ONE))); 1430 LFR_iN = ((unsigned int) (LFR_iX) >> (23)); 1431 I_BIAS = (__internal_spowr_la_data.I_BIAS); 1432 LFR_iN = (LFR_iN - I_BIAS); 1433 LFR_sN = ((float) ((int) (LFR_iN))); 1434 1435 LFR_I_INDEX_MASK = (__internal_spowr_la_data.LFR_I_INDEX_MASK); 1436 LFR_iInd = (LFR_iX & LFR_I_INDEX_MASK); 1437 1438 LFR_I_INDEX_ADD = (__internal_spowr_la_data.LFR_I_INDEX_ADD); 1439 1440 LFR_iInd = (LFR_iInd + LFR_I_INDEX_ADD); 1441 1442 LFR_iInd = ((unsigned int) (LFR_iInd) >> (17)); 1443 LFR_sLnRcprYHi = as_float (((__constant unsigned int *) (__internal_spowr_la_data.LFR_TBL))[(((0 + LFR_iInd) * (3 * 4)) >> (2)) + 0]); 1444 LFR_sLnRcprYLo = as_float (((__constant unsigned int *) (__internal_spowr_la_data.LFR_TBL))[(((0 + LFR_iInd) * (3 * 4)) >> (2)) + 1]); 1445 LFR_sRcprY = as_float (((__constant unsigned int *) (__internal_spowr_la_data.LFR_TBL))[(((0 + LFR_iInd) * (3 * 4)) >> (2)) + 2]); 1446 1447 S_HI10BITS_MASK = as_float (__internal_spowr_la_data.S_HI10BITS_MASK); 1448 1449 LFR_sYHi = as_float ((as_uint (LFR_sM) & as_uint (S_HI10BITS_MASK))); 1450 LFR_sYLo = (LFR_sM - LFR_sYHi); 1451 1452 LFR_sRHi = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (LFR_sYHi, LFR_sRcprY, -(S_ONE)); 1453 LFR_sRLo = (LFR_sYLo * LFR_sRcprY); 1454 LFR_sR = (LFR_sRHi + LFR_sRLo); 1455 1456 LFR_S_P4 = as_float (__internal_spowr_la_data.LFR_S_P4); 1457 LFR_S_P3 = as_float (__internal_spowr_la_data.LFR_S_P3); 1458 LFR_sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (LFR_S_P4, LFR_sR, LFR_S_P3); 1459 1460 LFR_S_P2 = as_float (__internal_spowr_la_data.LFR_S_P2); 1461 LFR_sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (LFR_sP, LFR_sR, LFR_S_P2); 1462 1463 LFR_sR2 = (LFR_sR * LFR_sR); 1464 LFR_sP = (LFR_sP * LFR_sR2); 1465 1466 S_LOG2_HI = as_float (__internal_spowr_la_data.S_LOG2_HI); 1467 1468 LFR_sNLog2Hi = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (LFR_sN, S_LOG2_HI, LFR_sLnRcprYHi); 1469 1470 S_LOG2_LO = as_float (__internal_spowr_la_data.S_LOG2_LO); 1471 LFR_sNLog2Lo = (LFR_sN * S_LOG2_LO); 1472 1473 LFR_sResHi = (LFR_sNLog2Hi + LFR_sRHi); 1474 1475 stmp = (LFR_sResHi - LFR_sNLog2Hi); 1476 LFR_sRHi = (LFR_sRHi - stmp); 1477 1478 LFR_sWLo = (LFR_sNLog2Lo + LFR_sLnRcprYLo); 1479 1480 LFR_sResLo = (LFR_sP + LFR_sWLo); 1481 1482 LFR_sResLo = (LFR_sResLo + LFR_sRHi); 1483 1484 sL[0] = (LFR_sResHi + LFR_sRLo); 1485 stmp = (sL[0] - LFR_sResHi); 1486 LFR_sRLo = (LFR_sRLo - stmp); 1487 1488 sL[1] = (LFR_sResLo + LFR_sRLo); 1489 1490 sRSValue = as_float (__internal_spowr_la_data.sRSValue); 1491 sHiMask = as_float (__internal_spowr_la_data.sHiMask); 1492 1493 { 1494 float V1; 1495 float V2;; 1496 V1 = (sL[0] + sL[1]); 1497 V2 = (V1 * sRSValue); 1498 V1 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (V1, 1.0f, V2); 1499 V2 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (V1, 1.0f, -(V2)); 1500 V1 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sL[0], 1.0f, -(V2)); 1501 V1 = (sL[1] + V1);; 1502 sL[0] = V2; 1503 sL[1] = V1; 1504 }; 1505 1506 { 1507 float V1; 1508 float V2;; 1509 V1 = (va2 * sRSValue); 1510 V2 = (V1 - va2); 1511 V1 = (V1 - V2); 1512 V2 = (va2 - V1);; 1513 sY[0] = V1; 1514 sY[1] = V2; 1515 }; 1516 { 1517 float V1; 1518 float V2;; 1519 V1 = (sL[0] * sY[0]); 1520 V2 = (sL[1] * sY[1]); 1521 V2 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sL[0], sY[1], V2); 1522 V2 = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sL[1], sY[0], V2);; 1523 sZ0[0] = V1; 1524 sZ0[1] = V2; 1525 }; 1526 1527 sZ[0] = (sZ0[0] + sZ0[1]); 1528 sZ0[0] = (sZ0[0] - sZ[0]); 1529 sZ[1] = (sZ0[1] + sZ0[0]); 1530 1531 sInvLn2 = as_float (__internal_spowr_la_data._sInvLn2); 1532 sShifter = as_float (__internal_spowr_la_data._sShifter); 1533 1534 sM = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sZ[0], sInvLn2, sShifter); 1535 sN = (sM - sShifter); 1536 1537 iAbsZ = as_uint (sZ[0]); 1538 1539 iAbsZ = (iAbsZ & iAbsMask); 1540 iDomainRange = (__internal_spowr_la_data._iDomainRange); 1541 iAbsZ = ((unsigned int) (-(signed int) ((signed int) iAbsZ > (signed int) iDomainRange))); 1542 iRangeMask = (iRangeMask | iAbsZ); 1543 vm = 0; 1544 vm = iRangeMask; 1545 1546 iM = as_uint (sM); 1547 1548 iM = ((unsigned int) (iM) << (23)); 1549 1550 sLn2hi = as_float (__internal_spowr_la_data._sLn2hi); 1551 sR = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-(sN), sLn2hi, sZ[0]); 1552 sLn2lo = as_float (__internal_spowr_la_data._sLn2lo); 1553 sR = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (-(sN), sLn2lo, sR); 1554 sR = (sR + sZ[1]); 1555 sPC[4] = as_float (__internal_spowr_la_data._sPC4); 1556 sPC[5] = as_float (__internal_spowr_la_data._sPC5); 1557 1558 sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sPC[5], sR, sPC[4]); 1559 sPC[3] = as_float (__internal_spowr_la_data._sPC3); 1560 sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sP, sR, sPC[3]); 1561 sPC[2] = as_float (__internal_spowr_la_data._sPC2); 1562 sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sP, sR, sPC[2]); 1563 sPC[1] = as_float (__internal_spowr_la_data._sPC1); 1564 sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sP, sR, sPC[1]); 1565 sPC[0] = as_float (__internal_spowr_la_data._sPC0); 1566 sP = SPIRV_OCL_BUILTIN(fma, _f32_f32_f32, ) (sP, sR, sPC[0]); 1567 iP = as_uint (sP); 1568 1569 iRes = (iM + iP); 1570 vr1 = as_float (iRes); 1571 } 1572 1573 if ((vm) != 0) 1574 { 1575 float _vapi_arg1[1]; 1576 float _vapi_arg2[1]; 1577 float _vapi_res1[1]; 1578 ((float *) _vapi_arg1)[0] = va1; 1579 ((float *) _vapi_arg2)[0] = va2; 1580 ((float *) _vapi_res1)[0] = vr1; 1581 __internal_spowr_la_cout (_vapi_arg1, _vapi_arg2, _vapi_res1); 1582 vr1 = ((float *) _vapi_res1)[0]; 1583 }; 1584 r = vr1;; 1585 1586 return r; 1587 1588} 1589