1 /* Producing binary form of HSA BRIG from our internal representation. 2 Copyright (C) 2013-2018 Free Software Foundation, Inc. 3 Contributed by Martin Jambor <mjambor@suse.cz> and 4 Martin Liska <mliska@suse.cz>. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "target.h" 27 #include "memmodel.h" 28 #include "tm_p.h" 29 #include "is-a.h" 30 #include "vec.h" 31 #include "hash-table.h" 32 #include "hash-map.h" 33 #include "tree.h" 34 #include "tree-iterator.h" 35 #include "stor-layout.h" 36 #include "output.h" 37 #include "basic-block.h" 38 #include "cfg.h" 39 #include "function.h" 40 #include "fold-const.h" 41 #include "stringpool.h" 42 #include "gimple-pretty-print.h" 43 #include "diagnostic-core.h" 44 #include "cgraph.h" 45 #include "dumpfile.h" 46 #include "print-tree.h" 47 #include "symbol-summary.h" 48 #include "hsa-common.h" 49 #include "gomp-constants.h" 50 51 /* Convert VAL to little endian form, if necessary. */ 52 53 static uint16_t 54 lendian16 (uint16_t val) 55 { 56 #if GCC_VERSION >= 4008 57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 58 return val; 59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 60 return __builtin_bswap16 (val); 61 #else /* __ORDER_PDP_ENDIAN__ */ 62 return val; 63 #endif 64 #else 65 // provide a safe slower default, with shifts and masking 66 #ifndef WORDS_BIGENDIAN 67 return val; 68 #else 69 return (val >> 8) | (val << 8); 70 #endif 71 #endif 72 } 73 74 /* Convert VAL to little endian form, if necessary. */ 75 76 static uint32_t 77 lendian32 (uint32_t val) 78 { 79 #if GCC_VERSION >= 4006 80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 81 return val; 82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 83 return __builtin_bswap32 (val); 84 #else /* __ORDER_PDP_ENDIAN__ */ 85 return (val >> 16) | (val << 16); 86 #endif 87 #else 88 // provide a safe slower default, with shifts and masking 89 #ifndef WORDS_BIGENDIAN 90 return val; 91 #else 92 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8); 93 return (val >> 16) | (val << 16); 94 #endif 95 #endif 96 } 97 98 /* Convert VAL to little endian form, if necessary. */ 99 100 static uint64_t 101 lendian64 (uint64_t val) 102 { 103 #if GCC_VERSION >= 4006 104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 105 return val; 106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 107 return __builtin_bswap64 (val); 108 #else /* __ORDER_PDP_ENDIAN__ */ 109 return (((val & 0xffffll) << 48) 110 | ((val & 0xffff0000ll) << 16) 111 | ((val & 0xffff00000000ll) >> 16) 112 | ((val & 0xffff000000000000ll) >> 48)); 113 #endif 114 #else 115 // provide a safe slower default, with shifts and masking 116 #ifndef WORDS_BIGENDIAN 117 return val; 118 #else 119 val = (((val & 0xff00ff00ff00ff00ll) >> 8) 120 | ((val & 0x00ff00ff00ff00ffll) << 8)); 121 val = ((( val & 0xffff0000ffff0000ll) >> 16) 122 | (( val & 0x0000ffff0000ffffll) << 16)); 123 return (val >> 32) | (val << 32); 124 #endif 125 #endif 126 } 127 128 #define BRIG_ELF_SECTION_NAME ".brig" 129 #define BRIG_LABEL_STRING "hsa_brig" 130 #define BRIG_SECTION_DATA_NAME "hsa_data" 131 #define BRIG_SECTION_CODE_NAME "hsa_code" 132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand" 133 134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024) 135 136 /* Required HSA section alignment. */ 137 138 #define HSA_SECTION_ALIGNMENT 16 139 140 /* Chunks of BRIG binary data. */ 141 142 struct hsa_brig_data_chunk 143 { 144 /* Size of the data already stored into a chunk. */ 145 unsigned size; 146 147 /* Pointer to the data. */ 148 char *data; 149 }; 150 151 /* Structure representing a BRIG section, holding and writing its data. */ 152 153 class hsa_brig_section 154 { 155 public: 156 /* Section name that will be output to the BRIG. */ 157 const char *section_name; 158 /* Size in bytes of all data stored in the section. */ 159 unsigned total_size; 160 /* The size of the header of the section including padding. */ 161 unsigned header_byte_count; 162 /* The size of the header of the section without any padding. */ 163 unsigned header_byte_delta; 164 165 void init (const char *name); 166 void release (); 167 void output (); 168 unsigned add (const void *data, unsigned len, void **output = NULL); 169 void round_size_up (int factor); 170 void *get_ptr_by_offset (unsigned int offset); 171 172 private: 173 void allocate_new_chunk (); 174 175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */ 176 vec <struct hsa_brig_data_chunk> chunks; 177 178 /* More convenient access to the last chunk from the vector above. */ 179 struct hsa_brig_data_chunk *cur_chunk; 180 }; 181 182 static struct hsa_brig_section brig_data, brig_code, brig_operand; 183 static uint32_t brig_insn_count; 184 static bool brig_initialized = false; 185 186 /* Mapping between emitted HSA functions and their offset in code segment. */ 187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets; 188 189 /* Hash map of emitted function declarations. */ 190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; 191 192 /* Hash table of emitted internal function declaration offsets. */ 193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; 194 195 /* List of sbr instructions. */ 196 static vec <hsa_insn_sbr *> *switch_instructions; 197 198 struct function_linkage_pair 199 { 200 function_linkage_pair (tree decl, unsigned int off) 201 : function_decl (decl), offset (off) {} 202 203 /* Declaration of called function. */ 204 tree function_decl; 205 206 /* Offset in operand section. */ 207 unsigned int offset; 208 }; 209 210 /* Vector of function calls where we need to resolve function offsets. */ 211 static auto_vec <function_linkage_pair> function_call_linkage; 212 213 /* Add a new chunk, allocate data for it and initialize it. */ 214 215 void 216 hsa_brig_section::allocate_new_chunk () 217 { 218 struct hsa_brig_data_chunk new_chunk; 219 220 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE); 221 new_chunk.size = 0; 222 cur_chunk = chunks.safe_push (new_chunk); 223 } 224 225 /* Initialize the brig section. */ 226 227 void 228 hsa_brig_section::init (const char *name) 229 { 230 section_name = name; 231 /* While the following computation is basically wrong, because the intent 232 certainly wasn't to have the first character of name and padding, which 233 are a part of sizeof (BrigSectionHeader), included in the first addend, 234 this is what the disassembler expects. */ 235 total_size = sizeof (BrigSectionHeader) + strlen (section_name); 236 chunks.create (1); 237 allocate_new_chunk (); 238 header_byte_delta = total_size; 239 round_size_up (4); 240 header_byte_count = total_size; 241 } 242 243 /* Free all data in the section. */ 244 245 void 246 hsa_brig_section::release () 247 { 248 for (unsigned i = 0; i < chunks.length (); i++) 249 free (chunks[i].data); 250 chunks.release (); 251 cur_chunk = NULL; 252 } 253 254 /* Write the section to the output file to a section with the name given at 255 initialization. Switches the output section and does not restore it. */ 256 257 void 258 hsa_brig_section::output () 259 { 260 struct BrigSectionHeader section_header; 261 char padding[8]; 262 263 section_header.byteCount = lendian64 (total_size); 264 section_header.headerByteCount = lendian32 (header_byte_count); 265 section_header.nameLength = lendian32 (strlen (section_name)); 266 assemble_string ((const char *) §ion_header, 16); 267 assemble_string (section_name, (section_header.nameLength)); 268 memset (&padding, 0, sizeof (padding)); 269 /* This is also a consequence of the wrong header size computation described 270 in a comment in hsa_brig_section::init. */ 271 assemble_string (padding, 8); 272 for (unsigned i = 0; i < chunks.length (); i++) 273 assemble_string (chunks[i].data, chunks[i].size); 274 } 275 276 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at 277 which it was stored. If OUTPUT is not NULL, store into it the pointer to 278 the place where DATA was actually stored. */ 279 280 unsigned 281 hsa_brig_section::add (const void *data, unsigned len, void **output) 282 { 283 unsigned offset = total_size; 284 285 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE); 286 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len)) 287 allocate_new_chunk (); 288 289 char *dst = cur_chunk->data + cur_chunk->size; 290 memcpy (dst, data, len); 291 if (output) 292 *output = dst; 293 cur_chunk->size += len; 294 total_size += len; 295 296 return offset; 297 } 298 299 /* Add padding to section so that its size is divisible by FACTOR. */ 300 301 void 302 hsa_brig_section::round_size_up (int factor) 303 { 304 unsigned padding, res = total_size % factor; 305 306 if (res == 0) 307 return; 308 309 padding = factor - res; 310 total_size += padding; 311 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding)) 312 { 313 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size; 314 cur_chunk->size = BRIG_CHUNK_MAX_SIZE; 315 allocate_new_chunk (); 316 } 317 318 cur_chunk->size += padding; 319 } 320 321 /* Return pointer to data by global OFFSET in the section. */ 322 323 void * 324 hsa_brig_section::get_ptr_by_offset (unsigned int offset) 325 { 326 gcc_assert (offset < total_size); 327 offset -= header_byte_delta; 328 329 unsigned i; 330 for (i = 0; offset >= chunks[i].size; i++) 331 offset -= chunks[i].size; 332 333 return chunks[i].data + offset; 334 } 335 336 /* BRIG string data hashing. */ 337 338 struct brig_string_slot 339 { 340 const char *s; 341 char prefix; 342 int len; 343 uint32_t offset; 344 }; 345 346 /* Hash table helpers. */ 347 348 struct brig_string_slot_hasher : pointer_hash <brig_string_slot> 349 { 350 static inline hashval_t hash (const value_type); 351 static inline bool equal (const value_type, const compare_type); 352 static inline void remove (value_type); 353 }; 354 355 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string 356 to support strings that may not end in '\0'. */ 357 358 inline hashval_t 359 brig_string_slot_hasher::hash (const value_type ds) 360 { 361 hashval_t r = ds->len; 362 int i; 363 364 for (i = 0; i < ds->len; i++) 365 r = r * 67 + (unsigned) ds->s[i] - 113; 366 r = r * 67 + (unsigned) ds->prefix - 113; 367 return r; 368 } 369 370 /* Returns nonzero if DS1 and DS2 are equal. */ 371 372 inline bool 373 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2) 374 { 375 if (ds1->len == ds2->len) 376 return ds1->prefix == ds2->prefix 377 && memcmp (ds1->s, ds2->s, ds1->len) == 0; 378 379 return 0; 380 } 381 382 /* Deallocate memory for DS upon its removal. */ 383 384 inline void 385 brig_string_slot_hasher::remove (value_type ds) 386 { 387 free (const_cast<char *> (ds->s)); 388 free (ds); 389 } 390 391 /* Hash for strings we output in order not to duplicate them needlessly. */ 392 393 static hash_table<brig_string_slot_hasher> *brig_string_htab; 394 395 /* Emit a null terminated string STR to the data section and return its 396 offset in it. If PREFIX is non-zero, output it just before STR too. 397 Sanitize the string if SANITIZE option is set to true. */ 398 399 static unsigned 400 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true) 401 { 402 unsigned slen = strlen (str); 403 unsigned offset, len = slen + (prefix ? 1 : 0); 404 uint32_t hdr_len = lendian32 (len); 405 brig_string_slot s_slot; 406 brig_string_slot **slot; 407 char *str2; 408 409 str2 = xstrdup (str); 410 411 if (sanitize) 412 hsa_sanitize_name (str2); 413 s_slot.s = str2; 414 s_slot.len = slen; 415 s_slot.prefix = prefix; 416 s_slot.offset = 0; 417 418 slot = brig_string_htab->find_slot (&s_slot, INSERT); 419 if (*slot == NULL) 420 { 421 brig_string_slot *new_slot = XCNEW (brig_string_slot); 422 423 /* In theory we should fill in BrigData but that would mean copying 424 the string to a buffer for no reason, so we just emulate it. */ 425 offset = brig_data.add (&hdr_len, sizeof (hdr_len)); 426 if (prefix) 427 brig_data.add (&prefix, 1); 428 429 brig_data.add (str2, slen); 430 brig_data.round_size_up (4); 431 432 /* TODO: could use the string we just copied into 433 brig_string->cur_chunk */ 434 new_slot->s = str2; 435 new_slot->len = slen; 436 new_slot->prefix = prefix; 437 new_slot->offset = offset; 438 *slot = new_slot; 439 } 440 else 441 { 442 offset = (*slot)->offset; 443 free (str2); 444 } 445 446 return offset; 447 } 448 449 /* Linked list of queued operands. */ 450 451 static struct operand_queue 452 { 453 /* First from the chain of queued operands. */ 454 hsa_op_base *first_op, *last_op; 455 456 /* The offset at which the next operand will be enqueued. */ 457 unsigned projected_size; 458 459 } op_queue; 460 461 /* Unless already initialized, initialize infrastructure to produce BRIG. */ 462 463 static void 464 brig_init (void) 465 { 466 brig_insn_count = 0; 467 468 if (brig_initialized) 469 return; 470 471 brig_string_htab = new hash_table<brig_string_slot_hasher> (37); 472 brig_data.init (BRIG_SECTION_DATA_NAME); 473 brig_code.init (BRIG_SECTION_CODE_NAME); 474 brig_operand.init (BRIG_SECTION_OPERAND_NAME); 475 brig_initialized = true; 476 477 struct BrigDirectiveModule moddir; 478 memset (&moddir, 0, sizeof (moddir)); 479 moddir.base.byteCount = lendian16 (sizeof (moddir)); 480 481 char *modname; 482 if (main_input_filename && *main_input_filename != '\0') 483 { 484 const char *part = strrchr (main_input_filename, '/'); 485 if (!part) 486 part = main_input_filename; 487 else 488 part++; 489 modname = concat ("&__hsa_module_", part, NULL); 490 char *extension = strchr (modname, '.'); 491 if (extension) 492 *extension = '\0'; 493 494 /* As in LTO mode, we have to emit a different module names. */ 495 if (flag_ltrans) 496 { 497 part = strrchr (asm_file_name, '/'); 498 if (!part) 499 part = asm_file_name; 500 else 501 part++; 502 char *modname2; 503 modname2 = xasprintf ("%s_%s", modname, part); 504 free (modname); 505 modname = modname2; 506 } 507 508 hsa_sanitize_name (modname); 509 moddir.name = brig_emit_string (modname); 510 free (modname); 511 } 512 else 513 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&'); 514 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE); 515 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR); 516 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR); 517 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE; 518 if (hsa_machine_large_p ()) 519 moddir.machineModel = BRIG_MACHINE_LARGE; 520 else 521 moddir.machineModel = BRIG_MACHINE_SMALL; 522 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT; 523 brig_code.add (&moddir, sizeof (moddir)); 524 } 525 526 /* Free all BRIG data. */ 527 528 static void 529 brig_release_data (void) 530 { 531 delete brig_string_htab; 532 brig_data.release (); 533 brig_code.release (); 534 brig_operand.release (); 535 536 brig_initialized = 0; 537 } 538 539 /* Enqueue operation OP. Return the offset at which it will be stored. */ 540 541 static unsigned int 542 enqueue_op (hsa_op_base *op) 543 { 544 unsigned ret; 545 546 if (op->m_brig_op_offset) 547 return op->m_brig_op_offset; 548 549 ret = op_queue.projected_size; 550 op->m_brig_op_offset = op_queue.projected_size; 551 552 if (!op_queue.first_op) 553 op_queue.first_op = op; 554 else 555 op_queue.last_op->m_next = op; 556 op_queue.last_op = op; 557 558 if (is_a <hsa_op_immed *> (op)) 559 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes); 560 else if (is_a <hsa_op_reg *> (op)) 561 op_queue.projected_size += sizeof (struct BrigOperandRegister); 562 else if (is_a <hsa_op_address *> (op)) 563 op_queue.projected_size += sizeof (struct BrigOperandAddress); 564 else if (is_a <hsa_op_code_ref *> (op)) 565 op_queue.projected_size += sizeof (struct BrigOperandCodeRef); 566 else if (is_a <hsa_op_code_list *> (op)) 567 op_queue.projected_size += sizeof (struct BrigOperandCodeList); 568 else if (is_a <hsa_op_operand_list *> (op)) 569 op_queue.projected_size += sizeof (struct BrigOperandOperandList); 570 else 571 gcc_unreachable (); 572 return ret; 573 } 574 575 static void emit_immediate_operand (hsa_op_immed *imm); 576 577 /* Emit directive describing a symbol if it has not been emitted already. 578 Return the offset of the directive. */ 579 580 static unsigned 581 emit_directive_variable (struct hsa_symbol *symbol) 582 { 583 struct BrigDirectiveVariable dirvar; 584 unsigned name_offset; 585 static unsigned res_name_offset; 586 587 if (symbol->m_directive_offset) 588 return symbol->m_directive_offset; 589 590 memset (&dirvar, 0, sizeof (dirvar)); 591 dirvar.base.byteCount = lendian16 (sizeof (dirvar)); 592 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE); 593 dirvar.allocation = symbol->m_allocation; 594 595 char prefix = symbol->m_global_scope_p ? '&' : '%'; 596 597 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL) 598 { 599 if (res_name_offset == 0) 600 res_name_offset = brig_emit_string (symbol->m_name, '%'); 601 name_offset = res_name_offset; 602 } 603 else if (symbol->m_name) 604 name_offset = brig_emit_string (symbol->m_name, prefix); 605 else 606 { 607 char buf[64]; 608 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment), 609 symbol->m_name_number); 610 name_offset = brig_emit_string (buf, prefix); 611 } 612 613 dirvar.name = lendian32 (name_offset); 614 615 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL) 616 { 617 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl)); 618 dirvar.init = lendian32 (enqueue_op (tmp)); 619 } 620 else 621 dirvar.init = 0; 622 dirvar.type = lendian16 (symbol->m_type); 623 dirvar.segment = symbol->m_segment; 624 dirvar.align = symbol->m_align; 625 dirvar.linkage = symbol->m_linkage; 626 dirvar.dim.lo = symbol->m_dim; 627 dirvar.dim.hi = symbol->m_dim >> 32; 628 629 /* Global variables are just declared and linked via HSA runtime. */ 630 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM) 631 dirvar.modifier |= BRIG_VARIABLE_DEFINITION; 632 dirvar.reserved = 0; 633 634 if (symbol->m_cst_value) 635 { 636 dirvar.modifier |= BRIG_VARIABLE_CONST; 637 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value)); 638 } 639 640 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar)); 641 return symbol->m_directive_offset; 642 } 643 644 /* Emit directives describing either a function declaration or definition F and 645 return the produced BrigDirectiveExecutable structure. The function does 646 not take into account any instructions when calculating nextModuleEntry 647 field of the produced BrigDirectiveExecutable structure so when emitting 648 actual definitions, this field needs to be updated after all of the function 649 is actually added to the code section. */ 650 651 static BrigDirectiveExecutable * 652 emit_function_directives (hsa_function_representation *f, bool is_declaration) 653 { 654 struct BrigDirectiveExecutable fndir; 655 unsigned name_offset, inarg_off, scoped_off, next_toplev_off; 656 int count = 0; 657 void *ptr_to_fndir; 658 hsa_symbol *sym; 659 660 if (!f->m_declaration_p) 661 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++) 662 { 663 gcc_assert (!sym->m_emitted_to_brig); 664 sym->m_emitted_to_brig = true; 665 emit_directive_variable (sym); 666 brig_insn_count++; 667 } 668 669 name_offset = brig_emit_string (f->m_name, '&'); 670 inarg_off = brig_code.total_size + sizeof (fndir) 671 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0); 672 scoped_off = inarg_off 673 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable); 674 675 if (!f->m_declaration_p) 676 { 677 count += f->m_spill_symbols.length (); 678 count += f->m_private_variables.length (); 679 } 680 681 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable); 682 683 memset (&fndir, 0, sizeof (fndir)); 684 fndir.base.byteCount = lendian16 (sizeof (fndir)); 685 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL 686 : BRIG_KIND_DIRECTIVE_FUNCTION); 687 fndir.name = lendian32 (name_offset); 688 fndir.inArgCount = lendian16 (f->m_input_args.length ()); 689 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0); 690 fndir.firstInArg = lendian32 (inarg_off); 691 fndir.firstCodeBlockEntry = lendian32 (scoped_off); 692 fndir.nextModuleEntry = lendian32 (next_toplev_off); 693 fndir.linkage = f->get_linkage (); 694 if (!f->m_declaration_p) 695 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION; 696 memset (&fndir.reserved, 0, sizeof (fndir.reserved)); 697 698 /* Once we put a definition of function_offsets, we should not overwrite 699 it with a declaration of the function. */ 700 if (f->m_internal_fn == NULL) 701 { 702 if (!function_offsets->get (f->m_decl) || !is_declaration) 703 function_offsets->put (f->m_decl, brig_code.total_size); 704 } 705 else 706 { 707 /* Internal function. */ 708 hsa_internal_fn **slot 709 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT); 710 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); 711 int_fn->m_offset = brig_code.total_size; 712 *slot = int_fn; 713 } 714 715 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir); 716 717 if (f->m_output_arg) 718 emit_directive_variable (f->m_output_arg); 719 for (unsigned i = 0; i < f->m_input_args.length (); i++) 720 emit_directive_variable (f->m_input_args[i]); 721 722 if (!f->m_declaration_p) 723 { 724 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++) 725 { 726 emit_directive_variable (sym); 727 brig_insn_count++; 728 } 729 for (unsigned i = 0; i < f->m_private_variables.length (); i++) 730 { 731 emit_directive_variable (f->m_private_variables[i]); 732 brig_insn_count++; 733 } 734 } 735 736 return (BrigDirectiveExecutable *) ptr_to_fndir; 737 } 738 739 /* Emit a label directive for the given HBB. We assume it is about to start on 740 the current offset in the code section. */ 741 742 static void 743 emit_bb_label_directive (hsa_bb *hbb) 744 { 745 struct BrigDirectiveLabel lbldir; 746 747 lbldir.base.byteCount = lendian16 (sizeof (lbldir)); 748 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL); 749 char buf[32]; 750 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl), 751 hbb->m_index); 752 lbldir.name = lendian32 (brig_emit_string (buf, '@')); 753 754 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir, 755 sizeof (lbldir)); 756 brig_insn_count++; 757 } 758 759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand 760 holding such, for constants and registers. */ 761 762 static BrigType16_t 763 regtype_for_type (BrigType16_t t) 764 { 765 switch (t) 766 { 767 case BRIG_TYPE_B1: 768 return BRIG_TYPE_B1; 769 770 case BRIG_TYPE_U8: 771 case BRIG_TYPE_U16: 772 case BRIG_TYPE_U32: 773 case BRIG_TYPE_S8: 774 case BRIG_TYPE_S16: 775 case BRIG_TYPE_S32: 776 case BRIG_TYPE_B8: 777 case BRIG_TYPE_B16: 778 case BRIG_TYPE_B32: 779 case BRIG_TYPE_F16: 780 case BRIG_TYPE_F32: 781 case BRIG_TYPE_U8X4: 782 case BRIG_TYPE_U16X2: 783 case BRIG_TYPE_S8X4: 784 case BRIG_TYPE_S16X2: 785 case BRIG_TYPE_F16X2: 786 return BRIG_TYPE_B32; 787 788 case BRIG_TYPE_U64: 789 case BRIG_TYPE_S64: 790 case BRIG_TYPE_F64: 791 case BRIG_TYPE_B64: 792 case BRIG_TYPE_U8X8: 793 case BRIG_TYPE_U16X4: 794 case BRIG_TYPE_U32X2: 795 case BRIG_TYPE_S8X8: 796 case BRIG_TYPE_S16X4: 797 case BRIG_TYPE_S32X2: 798 case BRIG_TYPE_F16X4: 799 case BRIG_TYPE_F32X2: 800 return BRIG_TYPE_B64; 801 802 case BRIG_TYPE_B128: 803 case BRIG_TYPE_U8X16: 804 case BRIG_TYPE_U16X8: 805 case BRIG_TYPE_U32X4: 806 case BRIG_TYPE_U64X2: 807 case BRIG_TYPE_S8X16: 808 case BRIG_TYPE_S16X8: 809 case BRIG_TYPE_S32X4: 810 case BRIG_TYPE_S64X2: 811 case BRIG_TYPE_F16X8: 812 case BRIG_TYPE_F32X4: 813 case BRIG_TYPE_F64X2: 814 return BRIG_TYPE_B128; 815 816 default: 817 gcc_unreachable (); 818 } 819 } 820 821 /* Return the length of the BRIG type TYPE that is going to be streamed out as 822 an immediate constant (so it must not be B1). */ 823 824 unsigned 825 hsa_get_imm_brig_type_len (BrigType16_t type) 826 { 827 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; 828 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; 829 830 switch (pack_type) 831 { 832 case BRIG_TYPE_PACK_NONE: 833 break; 834 case BRIG_TYPE_PACK_32: 835 return 4; 836 case BRIG_TYPE_PACK_64: 837 return 8; 838 case BRIG_TYPE_PACK_128: 839 return 16; 840 default: 841 gcc_unreachable (); 842 } 843 844 switch (base_type) 845 { 846 case BRIG_TYPE_U8: 847 case BRIG_TYPE_S8: 848 case BRIG_TYPE_B8: 849 return 1; 850 case BRIG_TYPE_U16: 851 case BRIG_TYPE_S16: 852 case BRIG_TYPE_F16: 853 case BRIG_TYPE_B16: 854 return 2; 855 case BRIG_TYPE_U32: 856 case BRIG_TYPE_S32: 857 case BRIG_TYPE_F32: 858 case BRIG_TYPE_B32: 859 return 4; 860 case BRIG_TYPE_U64: 861 case BRIG_TYPE_S64: 862 case BRIG_TYPE_F64: 863 case BRIG_TYPE_B64: 864 return 8; 865 case BRIG_TYPE_B128: 866 return 16; 867 default: 868 gcc_unreachable (); 869 } 870 } 871 872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission. 873 If NEED_LEN is not equal to zero, shrink or extend the value 874 to NEED_LEN bytes. Return how many bytes were written. */ 875 876 static int 877 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len) 878 { 879 union hsa_bytes bytes; 880 881 memset (&bytes, 0, sizeof (bytes)); 882 tree type = TREE_TYPE (value); 883 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); 884 885 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; 886 if (INTEGRAL_TYPE_P (type) 887 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST)) 888 switch (data_len) 889 { 890 case 1: 891 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); 892 break; 893 case 2: 894 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); 895 break; 896 case 4: 897 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); 898 break; 899 case 8: 900 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value); 901 break; 902 default: 903 gcc_unreachable (); 904 } 905 else if (SCALAR_FLOAT_TYPE_P (type)) 906 { 907 if (data_len == 2) 908 { 909 sorry ("Support for HSA does not implement immediate 16 bit FPU " 910 "operands"); 911 return 2; 912 } 913 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type)); 914 /* There are always 32 bits in each long, no matter the size of 915 the hosts long. */ 916 long tmp[6]; 917 918 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); 919 920 if (int_len == 4) 921 bytes.b32 = (uint32_t) tmp[0]; 922 else 923 { 924 bytes.b64 = (uint64_t)(uint32_t) tmp[1]; 925 bytes.b64 <<= 32; 926 bytes.b64 |= (uint32_t) tmp[0]; 927 } 928 } 929 else 930 gcc_unreachable (); 931 932 int len; 933 if (need_len == 0) 934 len = data_len; 935 else 936 len = need_len; 937 938 memcpy (data, &bytes, len); 939 return len; 940 } 941 942 char * 943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size) 944 { 945 char *brig_repr; 946 *brig_repr_size = hsa_get_imm_brig_type_len (m_type); 947 948 if (m_tree_value != NULL_TREE) 949 { 950 /* Update brig_repr_size for special tree values. */ 951 if (TREE_CODE (m_tree_value) == STRING_CST) 952 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value); 953 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) 954 *brig_repr_size 955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value))); 956 957 unsigned total_len = *brig_repr_size; 958 959 /* As we can have a constructor with fewer elements, fill the memory 960 with zeros. */ 961 brig_repr = XCNEWVEC (char, total_len); 962 char *p = brig_repr; 963 964 if (TREE_CODE (m_tree_value) == VECTOR_CST) 965 { 966 /* Variable-length vectors aren't supported. */ 967 int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant (); 968 for (i = 0; i < num; i++) 969 { 970 tree v = VECTOR_CST_ELT (m_tree_value, i); 971 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); 972 total_len -= actual; 973 p += actual; 974 } 975 /* Vectors should have the exact size. */ 976 gcc_assert (total_len == 0); 977 } 978 else if (TREE_CODE (m_tree_value) == STRING_CST) 979 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value), 980 TREE_STRING_LENGTH (m_tree_value)); 981 else if (TREE_CODE (m_tree_value) == COMPLEX_CST) 982 { 983 gcc_assert (total_len % 2 == 0); 984 unsigned actual; 985 actual 986 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p, 987 total_len / 2); 988 989 gcc_assert (actual == total_len / 2); 990 p += actual; 991 992 actual 993 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p, 994 total_len / 2); 995 gcc_assert (actual == total_len / 2); 996 } 997 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) 998 { 999 unsigned len = CONSTRUCTOR_NELTS (m_tree_value); 1000 for (unsigned i = 0; i < len; i++) 1001 { 1002 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value; 1003 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); 1004 total_len -= actual; 1005 p += actual; 1006 } 1007 } 1008 else 1009 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len); 1010 } 1011 else 1012 { 1013 hsa_bytes bytes; 1014 1015 switch (*brig_repr_size) 1016 { 1017 case 1: 1018 bytes.b8 = (uint8_t) m_int_value; 1019 break; 1020 case 2: 1021 bytes.b16 = (uint16_t) m_int_value; 1022 break; 1023 case 4: 1024 bytes.b32 = (uint32_t) m_int_value; 1025 break; 1026 case 8: 1027 bytes.b64 = (uint64_t) m_int_value; 1028 break; 1029 default: 1030 gcc_unreachable (); 1031 } 1032 1033 brig_repr = XNEWVEC (char, *brig_repr_size); 1034 memcpy (brig_repr, &bytes, *brig_repr_size); 1035 } 1036 1037 return brig_repr; 1038 } 1039 1040 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might 1041 have been massaged to comply with various HSA/BRIG type requirements, so the 1042 only important aspect of that is the length (because HSAIL might expect 1043 smaller constants or become bit-data). The data should be represented 1044 according to what is in the tree representation. */ 1045 1046 static void 1047 emit_immediate_operand (hsa_op_immed *imm) 1048 { 1049 unsigned brig_repr_size; 1050 char *brig_repr = imm->emit_to_buffer (&brig_repr_size); 1051 struct BrigOperandConstantBytes out; 1052 1053 memset (&out, 0, sizeof (out)); 1054 out.base.byteCount = lendian16 (sizeof (out)); 1055 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); 1056 uint32_t byteCount = lendian32 (brig_repr_size); 1057 out.type = lendian16 (imm->m_type); 1058 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1059 brig_operand.add (&out, sizeof (out)); 1060 brig_data.add (brig_repr, brig_repr_size); 1061 brig_data.round_size_up (4); 1062 1063 free (brig_repr); 1064 } 1065 1066 /* Emit a register BRIG operand REG. */ 1067 1068 static void 1069 emit_register_operand (hsa_op_reg *reg) 1070 { 1071 struct BrigOperandRegister out; 1072 1073 out.base.byteCount = lendian16 (sizeof (out)); 1074 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER); 1075 out.regNum = lendian32 (reg->m_hard_num); 1076 1077 switch (regtype_for_type (reg->m_type)) 1078 { 1079 case BRIG_TYPE_B32: 1080 out.regKind = BRIG_REGISTER_KIND_SINGLE; 1081 break; 1082 case BRIG_TYPE_B64: 1083 out.regKind = BRIG_REGISTER_KIND_DOUBLE; 1084 break; 1085 case BRIG_TYPE_B128: 1086 out.regKind = BRIG_REGISTER_KIND_QUAD; 1087 break; 1088 case BRIG_TYPE_B1: 1089 out.regKind = BRIG_REGISTER_KIND_CONTROL; 1090 break; 1091 default: 1092 gcc_unreachable (); 1093 } 1094 1095 brig_operand.add (&out, sizeof (out)); 1096 } 1097 1098 /* Emit an address BRIG operand ADDR. */ 1099 1100 static void 1101 emit_address_operand (hsa_op_address *addr) 1102 { 1103 struct BrigOperandAddress out; 1104 1105 out.base.byteCount = lendian16 (sizeof (out)); 1106 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS); 1107 out.symbol = addr->m_symbol 1108 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0; 1109 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0; 1110 1111 if (sizeof (addr->m_imm_offset) == 8) 1112 { 1113 out.offset.lo = lendian32 (addr->m_imm_offset); 1114 out.offset.hi = lendian32 (addr->m_imm_offset >> 32); 1115 } 1116 else 1117 { 1118 gcc_assert (sizeof (addr->m_imm_offset) == 4); 1119 out.offset.lo = lendian32 (addr->m_imm_offset); 1120 out.offset.hi = 0; 1121 } 1122 1123 brig_operand.add (&out, sizeof (out)); 1124 } 1125 1126 /* Emit a code reference operand REF. */ 1127 1128 static void 1129 emit_code_ref_operand (hsa_op_code_ref *ref) 1130 { 1131 struct BrigOperandCodeRef out; 1132 1133 out.base.byteCount = lendian16 (sizeof (out)); 1134 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF); 1135 out.ref = lendian32 (ref->m_directive_offset); 1136 brig_operand.add (&out, sizeof (out)); 1137 } 1138 1139 /* Emit a code list operand CODE_LIST. */ 1140 1141 static void 1142 emit_code_list_operand (hsa_op_code_list *code_list) 1143 { 1144 struct BrigOperandCodeList out; 1145 unsigned args = code_list->m_offsets.length (); 1146 1147 for (unsigned i = 0; i < args; i++) 1148 gcc_assert (code_list->m_offsets[i]); 1149 1150 out.base.byteCount = lendian16 (sizeof (out)); 1151 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST); 1152 1153 uint32_t byteCount = lendian32 (4 * args); 1154 1155 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1156 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t)); 1157 brig_data.round_size_up (4); 1158 brig_operand.add (&out, sizeof (out)); 1159 } 1160 1161 /* Emit an operand list operand OPERAND_LIST. */ 1162 1163 static void 1164 emit_operand_list_operand (hsa_op_operand_list *operand_list) 1165 { 1166 struct BrigOperandOperandList out; 1167 unsigned args = operand_list->m_offsets.length (); 1168 1169 for (unsigned i = 0; i < args; i++) 1170 gcc_assert (operand_list->m_offsets[i]); 1171 1172 out.base.byteCount = lendian16 (sizeof (out)); 1173 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST); 1174 1175 uint32_t byteCount = lendian32 (4 * args); 1176 1177 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); 1178 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t)); 1179 brig_data.round_size_up (4); 1180 brig_operand.add (&out, sizeof (out)); 1181 } 1182 1183 /* Emit all operands queued for writing. */ 1184 1185 static void 1186 emit_queued_operands (void) 1187 { 1188 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next) 1189 { 1190 gcc_assert (op->m_brig_op_offset == brig_operand.total_size); 1191 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op)) 1192 emit_immediate_operand (imm); 1193 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) 1194 emit_register_operand (reg); 1195 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op)) 1196 emit_address_operand (addr); 1197 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op)) 1198 emit_code_ref_operand (ref); 1199 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op)) 1200 emit_code_list_operand (code_list); 1201 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op)) 1202 emit_operand_list_operand (l); 1203 else 1204 gcc_unreachable (); 1205 } 1206 } 1207 1208 /* Emit directives describing the function that is used for 1209 a function declaration. */ 1210 1211 static BrigDirectiveExecutable * 1212 emit_function_declaration (tree decl) 1213 { 1214 hsa_function_representation *f = hsa_generate_function_declaration (decl); 1215 1216 BrigDirectiveExecutable *e = emit_function_directives (f, true); 1217 emit_queued_operands (); 1218 1219 delete f; 1220 1221 return e; 1222 } 1223 1224 /* Emit directives describing the function that is used for 1225 an internal function declaration. */ 1226 1227 static BrigDirectiveExecutable * 1228 emit_internal_fn_decl (hsa_internal_fn *fn) 1229 { 1230 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); 1231 1232 BrigDirectiveExecutable *e = emit_function_directives (f, true); 1233 emit_queued_operands (); 1234 1235 delete f; 1236 1237 return e; 1238 } 1239 1240 /* Enqueue all operands of INSN and return offset to BRIG data section 1241 to list of operand offsets. */ 1242 1243 static unsigned 1244 emit_insn_operands (hsa_insn_basic *insn) 1245 { 1246 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> 1247 operand_offsets; 1248 1249 unsigned l = insn->operand_count (); 1250 1251 /* We have N operands so use 4 * N for the byte_count. */ 1252 uint32_t byte_count = lendian32 (4 * l); 1253 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); 1254 if (l > 0) 1255 { 1256 operand_offsets.safe_grow (l); 1257 for (unsigned i = 0; i < l; i++) 1258 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i))); 1259 1260 brig_data.add (operand_offsets.address (), 1261 l * sizeof (BrigOperandOffset32_t)); 1262 } 1263 brig_data.round_size_up (4); 1264 return offset; 1265 } 1266 1267 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset 1268 to BRIG data section to list of operand offsets. */ 1269 1270 static unsigned 1271 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL, 1272 hsa_op_base *op2 = NULL) 1273 { 1274 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> 1275 operand_offsets; 1276 1277 gcc_checking_assert (op0 != NULL); 1278 operand_offsets.safe_push (enqueue_op (op0)); 1279 1280 if (op1 != NULL) 1281 { 1282 operand_offsets.safe_push (enqueue_op (op1)); 1283 if (op2 != NULL) 1284 operand_offsets.safe_push (enqueue_op (op2)); 1285 } 1286 1287 unsigned l = operand_offsets.length (); 1288 1289 /* We have N operands so use 4 * N for the byte_count. */ 1290 uint32_t byte_count = lendian32 (4 * l); 1291 1292 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); 1293 brig_data.add (operand_offsets.address (), 1294 l * sizeof (BrigOperandOffset32_t)); 1295 1296 brig_data.round_size_up (4); 1297 1298 return offset; 1299 } 1300 1301 /* Emit an HSA memory instruction and all necessary directives, schedule 1302 necessary operands for writing. */ 1303 1304 static void 1305 emit_memory_insn (hsa_insn_mem *mem) 1306 { 1307 struct BrigInstMem repr; 1308 gcc_checking_assert (mem->operand_count () == 2); 1309 1310 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); 1311 1312 /* This is necessary because of the erroneous typedef of 1313 BrigMemoryModifier8_t which introduces padding which may then contain 1314 random stuff (which we do not want so that we can test things don't 1315 change). */ 1316 memset (&repr, 0, sizeof (repr)); 1317 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1318 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); 1319 repr.base.opcode = lendian16 (mem->m_opcode); 1320 repr.base.type = lendian16 (mem->m_type); 1321 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1322 1323 if (addr->m_symbol) 1324 repr.segment = addr->m_symbol->m_segment; 1325 else 1326 repr.segment = BRIG_SEGMENT_FLAT; 1327 repr.modifier = 0; 1328 repr.equivClass = mem->m_equiv_class; 1329 repr.align = mem->m_align; 1330 if (mem->m_opcode == BRIG_OPCODE_LD) 1331 repr.width = BRIG_WIDTH_1; 1332 else 1333 repr.width = BRIG_WIDTH_NONE; 1334 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1335 brig_code.add (&repr, sizeof (repr)); 1336 brig_insn_count++; 1337 } 1338 1339 /* Emit an HSA signal memory instruction and all necessary directives, schedule 1340 necessary operands for writing. */ 1341 1342 static void 1343 emit_signal_insn (hsa_insn_signal *mem) 1344 { 1345 struct BrigInstSignal repr; 1346 1347 memset (&repr, 0, sizeof (repr)); 1348 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1349 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL); 1350 repr.base.opcode = lendian16 (mem->m_opcode); 1351 repr.base.type = lendian16 (mem->m_type); 1352 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1353 1354 repr.memoryOrder = mem->m_memory_order; 1355 repr.signalOperation = mem->m_signalop; 1356 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32; 1357 1358 brig_code.add (&repr, sizeof (repr)); 1359 brig_insn_count++; 1360 } 1361 1362 /* Emit an HSA atomic memory instruction and all necessary directives, schedule 1363 necessary operands for writing. */ 1364 1365 static void 1366 emit_atomic_insn (hsa_insn_atomic *mem) 1367 { 1368 struct BrigInstAtomic repr; 1369 1370 /* Either operand[0] or operand[1] must be an address operand. */ 1371 hsa_op_address *addr = NULL; 1372 if (is_a <hsa_op_address *> (mem->get_op (0))) 1373 addr = as_a <hsa_op_address *> (mem->get_op (0)); 1374 else 1375 addr = as_a <hsa_op_address *> (mem->get_op (1)); 1376 1377 memset (&repr, 0, sizeof (repr)); 1378 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1379 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC); 1380 repr.base.opcode = lendian16 (mem->m_opcode); 1381 repr.base.type = lendian16 (mem->m_type); 1382 repr.base.operands = lendian32 (emit_insn_operands (mem)); 1383 1384 if (addr->m_symbol) 1385 repr.segment = addr->m_symbol->m_segment; 1386 else 1387 repr.segment = BRIG_SEGMENT_FLAT; 1388 repr.memoryOrder = mem->m_memoryorder; 1389 repr.memoryScope = mem->m_memoryscope; 1390 repr.atomicOperation = mem->m_atomicop; 1391 1392 brig_code.add (&repr, sizeof (repr)); 1393 brig_insn_count++; 1394 } 1395 1396 /* Emit an HSA LDA instruction and all necessary directives, schedule 1397 necessary operands for writing. */ 1398 1399 static void 1400 emit_addr_insn (hsa_insn_basic *insn) 1401 { 1402 struct BrigInstAddr repr; 1403 1404 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); 1405 1406 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1407 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR); 1408 repr.base.opcode = lendian16 (insn->m_opcode); 1409 repr.base.type = lendian16 (insn->m_type); 1410 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1411 1412 if (addr->m_symbol) 1413 repr.segment = addr->m_symbol->m_segment; 1414 else 1415 repr.segment = BRIG_SEGMENT_FLAT; 1416 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1417 1418 brig_code.add (&repr, sizeof (repr)); 1419 brig_insn_count++; 1420 } 1421 1422 /* Emit an HSA segment conversion instruction and all necessary directives, 1423 schedule necessary operands for writing. */ 1424 1425 static void 1426 emit_segment_insn (hsa_insn_seg *seg) 1427 { 1428 struct BrigInstSegCvt repr; 1429 1430 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1431 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT); 1432 repr.base.opcode = lendian16 (seg->m_opcode); 1433 repr.base.type = lendian16 (seg->m_type); 1434 repr.base.operands = lendian32 (emit_insn_operands (seg)); 1435 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type); 1436 repr.segment = seg->m_segment; 1437 repr.modifier = 0; 1438 1439 brig_code.add (&repr, sizeof (repr)); 1440 1441 brig_insn_count++; 1442 } 1443 1444 /* Emit an HSA alloca instruction and all necessary directives, 1445 schedule necessary operands for writing. */ 1446 1447 static void 1448 emit_alloca_insn (hsa_insn_alloca *alloca) 1449 { 1450 struct BrigInstMem repr; 1451 gcc_checking_assert (alloca->operand_count () == 2); 1452 1453 memset (&repr, 0, sizeof (repr)); 1454 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1455 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); 1456 repr.base.opcode = lendian16 (alloca->m_opcode); 1457 repr.base.type = lendian16 (alloca->m_type); 1458 repr.base.operands = lendian32 (emit_insn_operands (alloca)); 1459 repr.segment = BRIG_SEGMENT_PRIVATE; 1460 repr.modifier = 0; 1461 repr.equivClass = 0; 1462 repr.align = alloca->m_align; 1463 repr.width = BRIG_WIDTH_NONE; 1464 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1465 brig_code.add (&repr, sizeof (repr)); 1466 brig_insn_count++; 1467 } 1468 1469 /* Emit an HSA comparison instruction and all necessary directives, 1470 schedule necessary operands for writing. */ 1471 1472 static void 1473 emit_cmp_insn (hsa_insn_cmp *cmp) 1474 { 1475 struct BrigInstCmp repr; 1476 1477 memset (&repr, 0, sizeof (repr)); 1478 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1479 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP); 1480 repr.base.opcode = lendian16 (cmp->m_opcode); 1481 repr.base.type = lendian16 (cmp->m_type); 1482 repr.base.operands = lendian32 (emit_insn_operands (cmp)); 1483 1484 if (is_a <hsa_op_reg *> (cmp->get_op (1))) 1485 repr.sourceType 1486 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type); 1487 else 1488 repr.sourceType 1489 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type); 1490 repr.modifier = 0; 1491 repr.compare = cmp->m_compare; 1492 repr.pack = 0; 1493 1494 brig_code.add (&repr, sizeof (repr)); 1495 brig_insn_count++; 1496 } 1497 1498 /* Emit an HSA generic branching/sycnronization instruction. */ 1499 1500 static void 1501 emit_generic_branch_insn (hsa_insn_br *br) 1502 { 1503 struct BrigInstBr repr; 1504 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1505 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1506 repr.base.opcode = lendian16 (br->m_opcode); 1507 repr.width = br->m_width; 1508 repr.base.type = lendian16 (br->m_type); 1509 repr.base.operands = lendian32 (emit_insn_operands (br)); 1510 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1511 1512 brig_code.add (&repr, sizeof (repr)); 1513 brig_insn_count++; 1514 } 1515 1516 /* Emit an HSA conditional branching instruction and all necessary directives, 1517 schedule necessary operands for writing. */ 1518 1519 static void 1520 emit_cond_branch_insn (hsa_insn_cbr *br) 1521 { 1522 struct BrigInstBr repr; 1523 1524 basic_block target = NULL; 1525 edge_iterator ei; 1526 edge e; 1527 1528 /* At the moment we only handle direct conditional jumps. */ 1529 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR); 1530 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1531 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1532 repr.base.opcode = lendian16 (br->m_opcode); 1533 repr.width = br->m_width; 1534 /* For Conditional jumps the type is always B1. */ 1535 repr.base.type = lendian16 (BRIG_TYPE_B1); 1536 1537 FOR_EACH_EDGE (e, ei, br->m_bb->succs) 1538 if (e->flags & EDGE_TRUE_VALUE) 1539 { 1540 target = e->dest; 1541 break; 1542 } 1543 gcc_assert (target); 1544 1545 repr.base.operands 1546 = lendian32 (emit_operands (br->get_op (0), 1547 &hsa_bb_for_bb (target)->m_label_ref)); 1548 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1549 1550 brig_code.add (&repr, sizeof (repr)); 1551 brig_insn_count++; 1552 } 1553 1554 /* Emit an HSA unconditional jump branching instruction that points to 1555 a label REFERENCE. */ 1556 1557 static void 1558 emit_unconditional_jump (hsa_op_code_ref *reference) 1559 { 1560 struct BrigInstBr repr; 1561 1562 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1563 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1564 repr.base.opcode = lendian16 (BRIG_OPCODE_BR); 1565 repr.base.type = lendian16 (BRIG_TYPE_NONE); 1566 /* Direct branches to labels must be width(all). */ 1567 repr.width = BRIG_WIDTH_ALL; 1568 1569 repr.base.operands = lendian32 (emit_operands (reference)); 1570 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1571 brig_code.add (&repr, sizeof (repr)); 1572 brig_insn_count++; 1573 } 1574 1575 /* Emit an HSA switch jump instruction that uses a jump table to 1576 jump to a destination label. */ 1577 1578 static void 1579 emit_switch_insn (hsa_insn_sbr *sbr) 1580 { 1581 struct BrigInstBr repr; 1582 1583 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR); 1584 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1585 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1586 repr.base.opcode = lendian16 (sbr->m_opcode); 1587 repr.width = BRIG_WIDTH_1; 1588 /* For Conditional jumps the type is always B1. */ 1589 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0)); 1590 repr.base.type = lendian16 (index->m_type); 1591 repr.base.operands 1592 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list)); 1593 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1594 1595 brig_code.add (&repr, sizeof (repr)); 1596 brig_insn_count++; 1597 } 1598 1599 /* Emit a HSA convert instruction and all necessary directives, schedule 1600 necessary operands for writing. */ 1601 1602 static void 1603 emit_cvt_insn (hsa_insn_cvt *insn) 1604 { 1605 struct BrigInstCvt repr; 1606 BrigType16_t srctype; 1607 1608 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1609 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT); 1610 repr.base.opcode = lendian16 (insn->m_opcode); 1611 repr.base.type = lendian16 (insn->m_type); 1612 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1613 1614 if (is_a <hsa_op_reg *> (insn->get_op (1))) 1615 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type; 1616 else 1617 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type; 1618 repr.sourceType = lendian16 (srctype); 1619 repr.modifier = 0; 1620 /* float to smaller float requires a rounding setting (we default 1621 to 'near'. */ 1622 if (hsa_type_float_p (insn->m_type) 1623 && (!hsa_type_float_p (srctype) 1624 || ((insn->m_type & BRIG_TYPE_BASE_MASK) 1625 < (srctype & BRIG_TYPE_BASE_MASK)))) 1626 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; 1627 else if (hsa_type_integer_p (insn->m_type) && 1628 hsa_type_float_p (srctype)) 1629 repr.round = BRIG_ROUND_INTEGER_ZERO; 1630 else 1631 repr.round = BRIG_ROUND_NONE; 1632 brig_code.add (&repr, sizeof (repr)); 1633 brig_insn_count++; 1634 } 1635 1636 /* Emit call instruction INSN, where this instruction must be closed 1637 within a call block instruction. */ 1638 1639 static void 1640 emit_call_insn (hsa_insn_call *call) 1641 { 1642 struct BrigInstBr repr; 1643 1644 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1645 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); 1646 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL); 1647 repr.base.type = lendian16 (BRIG_TYPE_NONE); 1648 1649 repr.base.operands 1650 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func, 1651 call->m_args_code_list)); 1652 1653 /* Internal functions have not set m_called_function. */ 1654 if (call->m_called_function) 1655 { 1656 function_linkage_pair pair (call->m_called_function, 1657 call->m_func.m_brig_op_offset); 1658 function_call_linkage.safe_push (pair); 1659 } 1660 else 1661 { 1662 hsa_internal_fn *slot 1663 = hsa_emitted_internal_decls->find (call->m_called_internal_fn); 1664 gcc_assert (slot); 1665 gcc_assert (slot->m_offset > 0); 1666 call->m_func.m_directive_offset = slot->m_offset; 1667 } 1668 1669 repr.width = BRIG_WIDTH_ALL; 1670 memset (&repr.reserved, 0, sizeof (repr.reserved)); 1671 1672 brig_code.add (&repr, sizeof (repr)); 1673 brig_insn_count++; 1674 } 1675 1676 /* Emit argument block directive. */ 1677 1678 static void 1679 emit_arg_block_insn (hsa_insn_arg_block *insn) 1680 { 1681 switch (insn->m_kind) 1682 { 1683 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: 1684 { 1685 struct BrigDirectiveArgBlock repr; 1686 repr.base.byteCount = lendian16 (sizeof (repr)); 1687 repr.base.kind = lendian16 (insn->m_kind); 1688 brig_code.add (&repr, sizeof (repr)); 1689 1690 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++) 1691 { 1692 insn->m_call_insn->m_args_code_list->m_offsets[i] 1693 = lendian32 (emit_directive_variable 1694 (insn->m_call_insn->m_input_args[i])); 1695 brig_insn_count++; 1696 } 1697 1698 if (insn->m_call_insn->m_output_arg) 1699 { 1700 insn->m_call_insn->m_result_code_list->m_offsets[0] 1701 = lendian32 (emit_directive_variable 1702 (insn->m_call_insn->m_output_arg)); 1703 brig_insn_count++; 1704 } 1705 1706 break; 1707 } 1708 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: 1709 { 1710 struct BrigDirectiveArgBlock repr; 1711 repr.base.byteCount = lendian16 (sizeof (repr)); 1712 repr.base.kind = lendian16 (insn->m_kind); 1713 brig_code.add (&repr, sizeof (repr)); 1714 break; 1715 } 1716 default: 1717 gcc_unreachable (); 1718 } 1719 1720 brig_insn_count++; 1721 } 1722 1723 /* Emit comment directive. */ 1724 1725 static void 1726 emit_comment_insn (hsa_insn_comment *insn) 1727 { 1728 struct BrigDirectiveComment repr; 1729 memset (&repr, 0, sizeof (repr)); 1730 1731 repr.base.byteCount = lendian16 (sizeof (repr)); 1732 repr.base.kind = lendian16 (insn->m_opcode); 1733 repr.name = brig_emit_string (insn->m_comment, '\0', false); 1734 brig_code.add (&repr, sizeof (repr)); 1735 } 1736 1737 /* Emit queue instruction INSN. */ 1738 1739 static void 1740 emit_queue_insn (hsa_insn_queue *insn) 1741 { 1742 BrigInstQueue repr; 1743 memset (&repr, 0, sizeof (repr)); 1744 1745 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1746 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE); 1747 repr.base.opcode = lendian16 (insn->m_opcode); 1748 repr.base.type = lendian16 (insn->m_type); 1749 repr.segment = insn->m_segment; 1750 repr.memoryOrder = insn->m_memory_order; 1751 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1752 brig_data.round_size_up (4); 1753 brig_code.add (&repr, sizeof (repr)); 1754 1755 brig_insn_count++; 1756 } 1757 1758 /* Emit source type instruction INSN. */ 1759 1760 static void 1761 emit_srctype_insn (hsa_insn_srctype *insn) 1762 { 1763 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1764 struct BrigInstSourceType repr; 1765 unsigned operand_count = insn->operand_count (); 1766 gcc_checking_assert (operand_count >= 2); 1767 1768 memset (&repr, 0, sizeof (repr)); 1769 repr.sourceType = lendian16 (insn->m_source_type); 1770 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1771 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); 1772 repr.base.opcode = lendian16 (insn->m_opcode); 1773 repr.base.type = lendian16 (insn->m_type); 1774 1775 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1776 brig_code.add (&repr, sizeof (struct BrigInstSourceType)); 1777 brig_insn_count++; 1778 } 1779 1780 /* Emit packed instruction INSN. */ 1781 1782 static void 1783 emit_packed_insn (hsa_insn_packed *insn) 1784 { 1785 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1786 struct BrigInstSourceType repr; 1787 unsigned operand_count = insn->operand_count (); 1788 gcc_checking_assert (operand_count >= 2); 1789 1790 memset (&repr, 0, sizeof (repr)); 1791 repr.sourceType = lendian16 (insn->m_source_type); 1792 repr.base.base.byteCount = lendian16 (sizeof (repr)); 1793 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); 1794 repr.base.opcode = lendian16 (insn->m_opcode); 1795 repr.base.type = lendian16 (insn->m_type); 1796 1797 if (insn->m_opcode == BRIG_OPCODE_COMBINE) 1798 { 1799 /* Create operand list for packed type. */ 1800 for (unsigned i = 1; i < operand_count; i++) 1801 { 1802 gcc_checking_assert (insn->get_op (i)); 1803 insn->m_operand_list->m_offsets[i - 1] 1804 = lendian32 (enqueue_op (insn->get_op (i))); 1805 } 1806 1807 repr.base.operands = lendian32 (emit_operands (insn->get_op (0), 1808 insn->m_operand_list)); 1809 } 1810 else if (insn->m_opcode == BRIG_OPCODE_EXPAND) 1811 { 1812 /* Create operand list for packed type. */ 1813 for (unsigned i = 0; i < operand_count - 1; i++) 1814 { 1815 gcc_checking_assert (insn->get_op (i)); 1816 insn->m_operand_list->m_offsets[i] 1817 = lendian32 (enqueue_op (insn->get_op (i))); 1818 } 1819 1820 unsigned ops = emit_operands (insn->m_operand_list, 1821 insn->get_op (insn->operand_count () - 1)); 1822 repr.base.operands = lendian32 (ops); 1823 } 1824 1825 1826 brig_code.add (&repr, sizeof (struct BrigInstSourceType)); 1827 brig_insn_count++; 1828 } 1829 1830 /* Emit a basic HSA instruction and all necessary directives, schedule 1831 necessary operands for writing. */ 1832 1833 static void 1834 emit_basic_insn (hsa_insn_basic *insn) 1835 { 1836 /* We assume that BrigInstMod has a BrigInstBasic prefix. */ 1837 struct BrigInstMod repr; 1838 BrigType16_t type; 1839 1840 memset (&repr, 0, sizeof (repr)); 1841 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic)); 1842 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC); 1843 repr.base.opcode = lendian16 (insn->m_opcode); 1844 switch (insn->m_opcode) 1845 { 1846 /* And the bit-logical operations need bit types and whine about 1847 arithmetic types :-/ */ 1848 case BRIG_OPCODE_AND: 1849 case BRIG_OPCODE_OR: 1850 case BRIG_OPCODE_XOR: 1851 case BRIG_OPCODE_NOT: 1852 type = regtype_for_type (insn->m_type); 1853 break; 1854 default: 1855 type = insn->m_type; 1856 break; 1857 } 1858 repr.base.type = lendian16 (type); 1859 repr.base.operands = lendian32 (emit_insn_operands (insn)); 1860 1861 if (hsa_type_packed_p (type)) 1862 { 1863 if (hsa_type_float_p (type) 1864 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode)) 1865 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; 1866 else 1867 repr.round = 0; 1868 /* We assume that destination and sources agree in packing layout. */ 1869 if (insn->num_used_ops () >= 2) 1870 repr.pack = BRIG_PACK_PP; 1871 else 1872 repr.pack = BRIG_PACK_P; 1873 repr.reserved = 0; 1874 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod)); 1875 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD); 1876 brig_code.add (&repr, sizeof (struct BrigInstMod)); 1877 } 1878 else 1879 brig_code.add (&repr, sizeof (struct BrigInstBasic)); 1880 brig_insn_count++; 1881 } 1882 1883 /* Emit an HSA instruction and all necessary directives, schedule necessary 1884 operands for writing. */ 1885 1886 static void 1887 emit_insn (hsa_insn_basic *insn) 1888 { 1889 gcc_assert (!is_a <hsa_insn_phi *> (insn)); 1890 1891 insn->m_brig_offset = brig_code.total_size; 1892 1893 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn)) 1894 emit_signal_insn (signal); 1895 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn)) 1896 emit_atomic_insn (atom); 1897 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) 1898 emit_memory_insn (mem); 1899 else if (insn->m_opcode == BRIG_OPCODE_LDA) 1900 emit_addr_insn (insn); 1901 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) 1902 emit_segment_insn (seg); 1903 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) 1904 emit_cmp_insn (cmp); 1905 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) 1906 emit_cond_branch_insn (br); 1907 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) 1908 { 1909 if (switch_instructions == NULL) 1910 switch_instructions = new vec <hsa_insn_sbr *> (); 1911 1912 switch_instructions->safe_push (sbr); 1913 emit_switch_insn (sbr); 1914 } 1915 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) 1916 emit_generic_branch_insn (br); 1917 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) 1918 emit_arg_block_insn (block); 1919 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) 1920 emit_call_insn (call); 1921 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) 1922 emit_comment_insn (comment); 1923 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) 1924 emit_queue_insn (queue); 1925 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) 1926 emit_srctype_insn (srctype); 1927 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) 1928 emit_packed_insn (packed); 1929 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) 1930 emit_cvt_insn (cvt); 1931 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) 1932 emit_alloca_insn (alloca); 1933 else 1934 emit_basic_insn (insn); 1935 } 1936 1937 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL, 1938 or we are about to finish emitting code, if it is NULL. If the fall through 1939 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */ 1940 1941 static void 1942 perhaps_emit_branch (basic_block bb, basic_block next_bb) 1943 { 1944 basic_block t_bb = NULL, ff = NULL; 1945 1946 edge_iterator ei; 1947 edge e; 1948 1949 /* If the last instruction of BB is a switch, ignore emission of all 1950 edges. */ 1951 if (hsa_bb_for_bb (bb)->m_last_insn 1952 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn)) 1953 return; 1954 1955 FOR_EACH_EDGE (e, ei, bb->succs) 1956 if (e->flags & EDGE_TRUE_VALUE) 1957 { 1958 gcc_assert (!t_bb); 1959 t_bb = e->dest; 1960 } 1961 else 1962 { 1963 gcc_assert (!ff); 1964 ff = e->dest; 1965 } 1966 1967 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun)) 1968 return; 1969 1970 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref); 1971 } 1972 1973 /* Emit the a function with name NAME to the various brig sections. */ 1974 1975 void 1976 hsa_brig_emit_function (void) 1977 { 1978 basic_block bb, prev_bb; 1979 hsa_insn_basic *insn; 1980 BrigDirectiveExecutable *ptr_to_fndir; 1981 1982 brig_init (); 1983 1984 brig_insn_count = 0; 1985 memset (&op_queue, 0, sizeof (op_queue)); 1986 op_queue.projected_size = brig_operand.total_size; 1987 1988 if (!function_offsets) 1989 function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); 1990 1991 if (!emitted_declarations) 1992 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> (); 1993 1994 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++) 1995 { 1996 tree called = hsa_cfun->m_called_functions[i]; 1997 1998 /* If the function has no definition, emit a declaration. */ 1999 if (!emitted_declarations->get (called)) 2000 { 2001 BrigDirectiveExecutable *e = emit_function_declaration (called); 2002 emitted_declarations->put (called, e); 2003 } 2004 } 2005 2006 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) 2007 { 2008 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; 2009 emit_internal_fn_decl (called); 2010 } 2011 2012 ptr_to_fndir = emit_function_directives (hsa_cfun, false); 2013 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; 2014 insn; 2015 insn = insn->m_next) 2016 emit_insn (insn); 2017 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); 2018 FOR_EACH_BB_FN (bb, cfun) 2019 { 2020 perhaps_emit_branch (prev_bb, bb); 2021 emit_bb_label_directive (hsa_bb_for_bb (bb)); 2022 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next) 2023 emit_insn (insn); 2024 prev_bb = bb; 2025 } 2026 perhaps_emit_branch (prev_bb, NULL); 2027 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size); 2028 2029 /* Fill up label references for all sbr instructions. */ 2030 if (switch_instructions) 2031 { 2032 for (unsigned i = 0; i < switch_instructions->length (); i++) 2033 { 2034 hsa_insn_sbr *sbr = (*switch_instructions)[i]; 2035 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++) 2036 { 2037 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]); 2038 sbr->m_label_code_list->m_offsets[j] 2039 = hbb->m_label_ref.m_directive_offset; 2040 } 2041 } 2042 2043 switch_instructions->release (); 2044 delete switch_instructions; 2045 switch_instructions = NULL; 2046 } 2047 2048 if (dump_file) 2049 { 2050 fprintf (dump_file, "------- After BRIG emission: -------\n"); 2051 dump_hsa_cfun (dump_file); 2052 } 2053 2054 emit_queued_operands (); 2055 } 2056 2057 /* Emit all OMP symbols related to OMP. */ 2058 2059 void 2060 hsa_brig_emit_omp_symbols (void) 2061 { 2062 brig_init (); 2063 emit_directive_variable (hsa_num_threads); 2064 } 2065 2066 /* Create and return __hsa_global_variables symbol that contains 2067 all informations consumed by libgomp to link global variables 2068 with their string names used by an HSA kernel. */ 2069 2070 static tree 2071 hsa_output_global_variables () 2072 { 2073 unsigned l = hsa_global_variable_symbols->elements (); 2074 2075 tree variable_info_type = make_node (RECORD_TYPE); 2076 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2077 get_identifier ("name"), ptr_type_node); 2078 DECL_CHAIN (id_f1) = NULL_TREE; 2079 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2080 get_identifier ("omp_data_size"), 2081 ptr_type_node); 2082 DECL_CHAIN (id_f2) = id_f1; 2083 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2, 2084 NULL_TREE); 2085 2086 tree int_num_of_global_vars; 2087 int_num_of_global_vars = build_int_cst (uint32_type_node, l); 2088 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars); 2089 tree global_vars_array_type = build_array_type (variable_info_type, 2090 global_vars_num_index_type); 2091 TYPE_ARTIFICIAL (global_vars_array_type) = 1; 2092 2093 vec<constructor_elt, va_gc> *global_vars_vec = NULL; 2094 2095 for (hash_table <hsa_noop_symbol_hasher>::iterator it 2096 = hsa_global_variable_symbols->begin (); 2097 it != hsa_global_variable_symbols->end (); ++it) 2098 { 2099 unsigned len = strlen ((*it)->m_name); 2100 char *copy = XNEWVEC (char, len + 2); 2101 copy[0] = '&'; 2102 memcpy (copy + 1, (*it)->m_name, len); 2103 copy[len + 1] = '\0'; 2104 len++; 2105 hsa_sanitize_name (copy); 2106 2107 tree var_name = build_string (len, copy); 2108 TREE_TYPE (var_name) 2109 = build_array_type (char_type_node, build_index_type (size_int (len))); 2110 free (copy); 2111 2112 vec<constructor_elt, va_gc> *variable_info_vec = NULL; 2113 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, 2114 build1 (ADDR_EXPR, 2115 build_pointer_type (TREE_TYPE (var_name)), 2116 var_name)); 2117 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, 2118 build_fold_addr_expr ((*it)->m_decl)); 2119 2120 tree variable_info_ctor = build_constructor (variable_info_type, 2121 variable_info_vec); 2122 2123 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE, 2124 variable_info_ctor); 2125 } 2126 2127 tree global_vars_ctor = build_constructor (global_vars_array_type, 2128 global_vars_vec); 2129 2130 char tmp_name[64]; 2131 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1); 2132 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2133 get_identifier (tmp_name), 2134 global_vars_array_type); 2135 TREE_STATIC (global_vars_table) = 1; 2136 TREE_READONLY (global_vars_table) = 1; 2137 TREE_PUBLIC (global_vars_table) = 0; 2138 DECL_ARTIFICIAL (global_vars_table) = 1; 2139 DECL_IGNORED_P (global_vars_table) = 1; 2140 DECL_EXTERNAL (global_vars_table) = 0; 2141 TREE_CONSTANT (global_vars_table) = 1; 2142 DECL_INITIAL (global_vars_table) = global_vars_ctor; 2143 varpool_node::finalize_decl (global_vars_table); 2144 2145 return global_vars_table; 2146 } 2147 2148 /* Create __hsa_host_functions and __hsa_kernels that contain 2149 all informations consumed by libgomp to register all kernels 2150 in the BRIG binary. */ 2151 2152 static void 2153 hsa_output_kernels (tree *host_func_table, tree *kernels) 2154 { 2155 unsigned map_count = hsa_get_number_decl_kernel_mappings (); 2156 2157 tree int_num_of_kernels; 2158 int_num_of_kernels = build_int_cst (uint32_type_node, map_count); 2159 tree kernel_num_index_type = build_index_type (int_num_of_kernels); 2160 tree host_functions_array_type = build_array_type (ptr_type_node, 2161 kernel_num_index_type); 2162 TYPE_ARTIFICIAL (host_functions_array_type) = 1; 2163 2164 vec<constructor_elt, va_gc> *host_functions_vec = NULL; 2165 for (unsigned i = 0; i < map_count; ++i) 2166 { 2167 tree decl = hsa_get_decl_kernel_mapping_decl (i); 2168 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); 2169 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); 2170 } 2171 tree host_functions_ctor = build_constructor (host_functions_array_type, 2172 host_functions_vec); 2173 char tmp_name[64]; 2174 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); 2175 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2176 get_identifier (tmp_name), 2177 host_functions_array_type); 2178 TREE_STATIC (hsa_host_func_table) = 1; 2179 TREE_READONLY (hsa_host_func_table) = 1; 2180 TREE_PUBLIC (hsa_host_func_table) = 0; 2181 DECL_ARTIFICIAL (hsa_host_func_table) = 1; 2182 DECL_IGNORED_P (hsa_host_func_table) = 1; 2183 DECL_EXTERNAL (hsa_host_func_table) = 0; 2184 TREE_CONSTANT (hsa_host_func_table) = 1; 2185 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; 2186 varpool_node::finalize_decl (hsa_host_func_table); 2187 *host_func_table = hsa_host_func_table; 2188 2189 /* Following code emits list of kernel_info structures. */ 2190 2191 tree kernel_info_type = make_node (RECORD_TYPE); 2192 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2193 get_identifier ("name"), ptr_type_node); 2194 DECL_CHAIN (id_f1) = NULL_TREE; 2195 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2196 get_identifier ("omp_data_size"), 2197 unsigned_type_node); 2198 DECL_CHAIN (id_f2) = id_f1; 2199 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2200 get_identifier ("gridified_kernel_p"), 2201 boolean_type_node); 2202 DECL_CHAIN (id_f3) = id_f2; 2203 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2204 get_identifier ("kernel_dependencies_count"), 2205 unsigned_type_node); 2206 DECL_CHAIN (id_f4) = id_f3; 2207 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2208 get_identifier ("kernel_dependencies"), 2209 build_pointer_type (build_pointer_type 2210 (char_type_node))); 2211 DECL_CHAIN (id_f5) = id_f4; 2212 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, 2213 NULL_TREE); 2214 2215 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); 2216 tree kernel_info_vector_type 2217 = build_array_type (kernel_info_type, 2218 build_index_type (int_num_of_kernels)); 2219 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; 2220 2221 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; 2222 tree kernel_dependencies_vector_type = NULL; 2223 2224 for (unsigned i = 0; i < map_count; ++i) 2225 { 2226 tree kernel = hsa_get_decl_kernel_mapping_decl (i); 2227 char *name = hsa_get_decl_kernel_mapping_name (i); 2228 unsigned len = strlen (name); 2229 char *copy = XNEWVEC (char, len + 2); 2230 copy[0] = '&'; 2231 memcpy (copy + 1, name, len); 2232 copy[len + 1] = '\0'; 2233 len++; 2234 2235 tree kern_name = build_string (len, copy); 2236 TREE_TYPE (kern_name) 2237 = build_array_type (char_type_node, build_index_type (size_int (len))); 2238 free (copy); 2239 2240 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); 2241 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); 2242 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); 2243 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, 2244 gridified_kernel_p); 2245 unsigned count = 0; 2246 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; 2247 if (hsa_decl_kernel_dependencies) 2248 { 2249 vec<const char *> **slot; 2250 slot = hsa_decl_kernel_dependencies->get (kernel); 2251 if (slot) 2252 { 2253 vec <const char *> *dependencies = *slot; 2254 count = dependencies->length (); 2255 2256 kernel_dependencies_vector_type 2257 = build_array_type (build_pointer_type (char_type_node), 2258 build_index_type (size_int (count))); 2259 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; 2260 2261 for (unsigned j = 0; j < count; j++) 2262 { 2263 const char *d = (*dependencies)[j]; 2264 len = strlen (d); 2265 tree dependency_name = build_string (len, d); 2266 TREE_TYPE (dependency_name) 2267 = build_array_type (char_type_node, 2268 build_index_type (size_int (len))); 2269 2270 CONSTRUCTOR_APPEND_ELT 2271 (kernel_dependencies_vec, NULL_TREE, 2272 build1 (ADDR_EXPR, 2273 build_pointer_type (TREE_TYPE (dependency_name)), 2274 dependency_name)); 2275 } 2276 } 2277 } 2278 2279 tree dependencies_count = build_int_cstu (unsigned_type_node, count); 2280 2281 vec<constructor_elt, va_gc> *kernel_info_vec = NULL; 2282 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2283 build1 (ADDR_EXPR, 2284 build_pointer_type (TREE_TYPE 2285 (kern_name)), 2286 kern_name)); 2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); 2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2289 gridified_kernel_p_tree); 2290 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); 2291 2292 if (count > 0) 2293 { 2294 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); 2295 gcc_checking_assert (kernel_dependencies_vector_type); 2296 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2297 get_identifier (tmp_name), 2298 kernel_dependencies_vector_type); 2299 2300 TREE_STATIC (dependencies_list) = 1; 2301 TREE_READONLY (dependencies_list) = 1; 2302 TREE_PUBLIC (dependencies_list) = 0; 2303 DECL_ARTIFICIAL (dependencies_list) = 1; 2304 DECL_IGNORED_P (dependencies_list) = 1; 2305 DECL_EXTERNAL (dependencies_list) = 0; 2306 TREE_CONSTANT (dependencies_list) = 1; 2307 DECL_INITIAL (dependencies_list) 2308 = build_constructor (kernel_dependencies_vector_type, 2309 kernel_dependencies_vec); 2310 varpool_node::finalize_decl (dependencies_list); 2311 2312 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, 2313 build1 (ADDR_EXPR, 2314 build_pointer_type 2315 (TREE_TYPE (dependencies_list)), 2316 dependencies_list)); 2317 } 2318 else 2319 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); 2320 2321 tree kernel_info_ctor = build_constructor (kernel_info_type, 2322 kernel_info_vec); 2323 2324 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, 2325 kernel_info_ctor); 2326 } 2327 2328 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); 2329 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2330 get_identifier (tmp_name), 2331 kernel_info_vector_type); 2332 2333 TREE_STATIC (hsa_kernels) = 1; 2334 TREE_READONLY (hsa_kernels) = 1; 2335 TREE_PUBLIC (hsa_kernels) = 0; 2336 DECL_ARTIFICIAL (hsa_kernels) = 1; 2337 DECL_IGNORED_P (hsa_kernels) = 1; 2338 DECL_EXTERNAL (hsa_kernels) = 0; 2339 TREE_CONSTANT (hsa_kernels) = 1; 2340 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, 2341 kernel_info_vector_vec); 2342 varpool_node::finalize_decl (hsa_kernels); 2343 *kernels = hsa_kernels; 2344 } 2345 2346 /* Create a static constructor that will register out brig stuff with 2347 libgomp. */ 2348 2349 static void 2350 hsa_output_libgomp_mapping (tree brig_decl) 2351 { 2352 unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); 2353 unsigned global_variable_count = hsa_global_variable_symbols->elements (); 2354 2355 tree kernels; 2356 tree host_func_table; 2357 2358 hsa_output_kernels (&host_func_table, &kernels); 2359 tree global_vars = hsa_output_global_variables (); 2360 2361 tree hsa_image_desc_type = make_node (RECORD_TYPE); 2362 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2363 get_identifier ("brig_module"), ptr_type_node); 2364 DECL_CHAIN (id_f1) = NULL_TREE; 2365 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2366 get_identifier ("kernel_count"), 2367 unsigned_type_node); 2368 2369 DECL_CHAIN (id_f2) = id_f1; 2370 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2371 get_identifier ("hsa_kernel_infos"), 2372 ptr_type_node); 2373 DECL_CHAIN (id_f3) = id_f2; 2374 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2375 get_identifier ("global_variable_count"), 2376 unsigned_type_node); 2377 DECL_CHAIN (id_f4) = id_f3; 2378 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, 2379 get_identifier ("hsa_global_variable_infos"), 2380 ptr_type_node); 2381 DECL_CHAIN (id_f5) = id_f4; 2382 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, 2383 NULL_TREE); 2384 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; 2385 2386 vec<constructor_elt, va_gc> *img_desc_vec = NULL; 2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2388 build_fold_addr_expr (brig_decl)); 2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2390 build_int_cstu (unsigned_type_node, kernel_count)); 2391 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2392 build1 (ADDR_EXPR, 2393 build_pointer_type (TREE_TYPE (kernels)), 2394 kernels)); 2395 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2396 build_int_cstu (unsigned_type_node, 2397 global_variable_count)); 2398 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, 2399 build1 (ADDR_EXPR, 2400 build_pointer_type (TREE_TYPE (global_vars)), 2401 global_vars)); 2402 2403 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); 2404 2405 char tmp_name[64]; 2406 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1); 2407 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2408 get_identifier (tmp_name), 2409 hsa_image_desc_type); 2410 TREE_STATIC (hsa_img_descriptor) = 1; 2411 TREE_READONLY (hsa_img_descriptor) = 1; 2412 TREE_PUBLIC (hsa_img_descriptor) = 0; 2413 DECL_ARTIFICIAL (hsa_img_descriptor) = 1; 2414 DECL_IGNORED_P (hsa_img_descriptor) = 1; 2415 DECL_EXTERNAL (hsa_img_descriptor) = 0; 2416 TREE_CONSTANT (hsa_img_descriptor) = 1; 2417 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor; 2418 varpool_node::finalize_decl (hsa_img_descriptor); 2419 2420 /* Construct the "host_table" libgomp expects. */ 2421 tree index_type = build_index_type (build_int_cst (integer_type_node, 4)); 2422 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type); 2423 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1; 2424 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL; 2425 tree host_func_table_addr = build_fold_addr_expr (host_func_table); 2426 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, 2427 host_func_table_addr); 2428 offset_int func_table_size 2429 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; 2430 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, 2431 fold_build2 (POINTER_PLUS_EXPR, 2432 TREE_TYPE (host_func_table_addr), 2433 host_func_table_addr, 2434 build_int_cst (size_type_node, 2435 func_table_size.to_uhwi 2436 ()))); 2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); 2438 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); 2439 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type, 2440 libgomp_host_table_vec); 2441 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1); 2442 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, 2443 get_identifier (tmp_name), 2444 libgomp_host_table_type); 2445 2446 TREE_STATIC (hsa_libgomp_host_table) = 1; 2447 TREE_READONLY (hsa_libgomp_host_table) = 1; 2448 TREE_PUBLIC (hsa_libgomp_host_table) = 0; 2449 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1; 2450 DECL_IGNORED_P (hsa_libgomp_host_table) = 1; 2451 DECL_EXTERNAL (hsa_libgomp_host_table) = 0; 2452 TREE_CONSTANT (hsa_libgomp_host_table) = 1; 2453 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor; 2454 varpool_node::finalize_decl (hsa_libgomp_host_table); 2455 2456 /* Generate an initializer with a call to the registration routine. */ 2457 2458 tree offload_register 2459 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER); 2460 gcc_checking_assert (offload_register); 2461 2462 tree *hsa_ctor_stmts = hsa_get_ctor_statements (); 2463 append_to_statement_list 2464 (build_call_expr (offload_register, 4, 2465 build_int_cstu (unsigned_type_node, 2466 GOMP_VERSION_PACK (GOMP_VERSION, 2467 GOMP_VERSION_HSA)), 2468 build_fold_addr_expr (hsa_libgomp_host_table), 2469 build_int_cst (integer_type_node, GOMP_DEVICE_HSA), 2470 build_fold_addr_expr (hsa_img_descriptor)), 2471 hsa_ctor_stmts); 2472 2473 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY); 2474 2475 tree offload_unregister 2476 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER); 2477 gcc_checking_assert (offload_unregister); 2478 2479 tree *hsa_dtor_stmts = hsa_get_dtor_statements (); 2480 append_to_statement_list 2481 (build_call_expr (offload_unregister, 4, 2482 build_int_cstu (unsigned_type_node, 2483 GOMP_VERSION_PACK (GOMP_VERSION, 2484 GOMP_VERSION_HSA)), 2485 build_fold_addr_expr (hsa_libgomp_host_table), 2486 build_int_cst (integer_type_node, GOMP_DEVICE_HSA), 2487 build_fold_addr_expr (hsa_img_descriptor)), 2488 hsa_dtor_stmts); 2489 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY); 2490 } 2491 2492 /* Emit the brig module we have compiled to a section in the final assembly and 2493 also create a compile unit static constructor that will register the brig 2494 module with libgomp. */ 2495 2496 void 2497 hsa_output_brig (void) 2498 { 2499 section *saved_section; 2500 2501 if (!brig_initialized) 2502 return; 2503 2504 for (unsigned i = 0; i < function_call_linkage.length (); i++) 2505 { 2506 function_linkage_pair p = function_call_linkage[i]; 2507 2508 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); 2509 gcc_assert (*func_offset); 2510 BrigOperandCodeRef *code_ref 2511 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset)); 2512 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); 2513 code_ref->ref = lendian32 (*func_offset); 2514 } 2515 2516 /* Iterate all function declarations and if we meet a function that should 2517 have module linkage and we are unable to emit HSAIL for the function, 2518 then change the linkage to program linkage. Doing so, we will emit 2519 a valid BRIG image. */ 2520 if (hsa_failed_functions != NULL && emitted_declarations != NULL) 2521 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it 2522 = emitted_declarations->begin (); 2523 it != emitted_declarations->end (); 2524 ++it) 2525 { 2526 if (hsa_failed_functions->contains ((*it).first)) 2527 (*it).second->linkage = BRIG_LINKAGE_PROGRAM; 2528 } 2529 2530 saved_section = in_section; 2531 2532 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); 2533 char tmp_name[64]; 2534 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1); 2535 ASM_OUTPUT_LABEL (asm_out_file, tmp_name); 2536 tree brig_id = get_identifier (tmp_name); 2537 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id, 2538 char_type_node); 2539 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id); 2540 TREE_ADDRESSABLE (brig_decl) = 1; 2541 TREE_READONLY (brig_decl) = 1; 2542 DECL_ARTIFICIAL (brig_decl) = 1; 2543 DECL_IGNORED_P (brig_decl) = 1; 2544 TREE_STATIC (brig_decl) = 1; 2545 TREE_PUBLIC (brig_decl) = 0; 2546 TREE_USED (brig_decl) = 1; 2547 DECL_INITIAL (brig_decl) = brig_decl; 2548 TREE_ASM_WRITTEN (brig_decl) = 1; 2549 2550 BrigModuleHeader module_header; 2551 memcpy (&module_header.identification, "HSA BRIG", 2552 sizeof (module_header.identification)); 2553 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR); 2554 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR); 2555 uint64_t section_index[3]; 2556 2557 int data_padding, code_padding, operand_padding; 2558 data_padding = HSA_SECTION_ALIGNMENT 2559 - brig_data.total_size % HSA_SECTION_ALIGNMENT; 2560 code_padding = HSA_SECTION_ALIGNMENT 2561 - brig_code.total_size % HSA_SECTION_ALIGNMENT; 2562 operand_padding = HSA_SECTION_ALIGNMENT 2563 - brig_operand.total_size % HSA_SECTION_ALIGNMENT; 2564 2565 uint64_t module_size = sizeof (module_header) 2566 + sizeof (section_index) 2567 + brig_data.total_size 2568 + data_padding 2569 + brig_code.total_size 2570 + code_padding 2571 + brig_operand.total_size 2572 + operand_padding; 2573 gcc_assert ((module_size % 16) == 0); 2574 module_header.byteCount = lendian64 (module_size); 2575 memset (&module_header.hash, 0, sizeof (module_header.hash)); 2576 module_header.reserved = 0; 2577 module_header.sectionCount = lendian32 (3); 2578 module_header.sectionIndex = lendian64 (sizeof (module_header)); 2579 assemble_string ((const char *) &module_header, sizeof (module_header)); 2580 uint64_t off = sizeof (module_header) + sizeof (section_index); 2581 section_index[0] = lendian64 (off); 2582 off += brig_data.total_size + data_padding; 2583 section_index[1] = lendian64 (off); 2584 off += brig_code.total_size + code_padding; 2585 section_index[2] = lendian64 (off); 2586 assemble_string ((const char *) §ion_index, sizeof (section_index)); 2587 2588 char padding[HSA_SECTION_ALIGNMENT]; 2589 memset (padding, 0, sizeof (padding)); 2590 2591 brig_data.output (); 2592 assemble_string (padding, data_padding); 2593 brig_code.output (); 2594 assemble_string (padding, code_padding); 2595 brig_operand.output (); 2596 assemble_string (padding, operand_padding); 2597 2598 if (saved_section) 2599 switch_to_section (saved_section); 2600 2601 hsa_output_libgomp_mapping (brig_decl); 2602 2603 hsa_free_decl_kernel_mapping (); 2604 brig_release_data (); 2605 hsa_deinit_compilation_unit_data (); 2606 2607 delete emitted_declarations; 2608 emitted_declarations = NULL; 2609 delete function_offsets; 2610 function_offsets = NULL; 2611 } 2612