Lines Matching refs:inst

39    is_byte_raw_mov(const fs_inst *inst)  in is_byte_raw_mov()  argument
41 return type_sz(inst->dst.type) == 1 && in is_byte_raw_mov()
42 inst->opcode == BRW_OPCODE_MOV && in is_byte_raw_mov()
43 inst->src[0].type == inst->dst.type && in is_byte_raw_mov()
44 !inst->saturate && in is_byte_raw_mov()
45 !inst->src[0].negate && in is_byte_raw_mov()
46 !inst->src[0].abs; in is_byte_raw_mov()
54 required_dst_byte_stride(const fs_inst *inst) in required_dst_byte_stride() argument
56 if (inst->dst.is_accumulator()) { in required_dst_byte_stride()
69 return inst->dst.stride * type_sz(inst->dst.type); in required_dst_byte_stride()
70 } else if (type_sz(inst->dst.type) < get_exec_type_size(inst) && in required_dst_byte_stride()
71 !is_byte_raw_mov(inst)) { in required_dst_byte_stride()
72 return get_exec_type_size(inst); in required_dst_byte_stride()
78 unsigned max_stride = inst->dst.stride * type_sz(inst->dst.type); in required_dst_byte_stride()
79 unsigned min_size = type_sz(inst->dst.type); in required_dst_byte_stride()
80 unsigned max_size = type_sz(inst->dst.type); in required_dst_byte_stride()
82 for (unsigned i = 0; i < inst->sources; i++) { in required_dst_byte_stride()
83 if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) { in required_dst_byte_stride()
84 const unsigned size = type_sz(inst->src[i].type); in required_dst_byte_stride()
85 max_stride = MAX2(max_stride, inst->src[i].stride * size); in required_dst_byte_stride()
110 required_dst_byte_offset(const fs_inst *inst) in required_dst_byte_offset() argument
112 for (unsigned i = 0; i < inst->sources; i++) { in required_dst_byte_offset()
113 if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) in required_dst_byte_offset()
114 if (reg_offset(inst->src[i]) % REG_SIZE != in required_dst_byte_offset()
115 reg_offset(inst->dst) % REG_SIZE) in required_dst_byte_offset()
119 return reg_offset(inst->dst) % REG_SIZE; in required_dst_byte_offset()
127 required_exec_type(const intel_device_info *devinfo, const fs_inst *inst) in required_exec_type() argument
129 const brw_reg_type t = get_exec_type(inst); in required_exec_type()
133 switch (inst->opcode) { in required_exec_type()
152 else if (has_dst_aligned_region_restriction(devinfo, inst)) in required_exec_type()
164 if (has_dst_aligned_region_restriction(devinfo, inst)) in required_exec_type()
190 devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) || in required_exec_type()
192 brw_reg_type_is_floating_point(inst->src[0].type))) in required_exec_type()
207 has_invalid_src_region(const intel_device_info *devinfo, const fs_inst *inst, in has_invalid_src_region() argument
210 if (is_unordered(inst) || inst->is_control_source(i)) in has_invalid_src_region()
224 inst->opcode == BRW_OPCODE_MAD && in has_invalid_src_region()
225 inst->src[i].type == BRW_REGISTER_TYPE_HF && in has_invalid_src_region()
226 reg_offset(inst->src[i]) % REG_SIZE > 0 && in has_invalid_src_region()
227 inst->src[i].stride != 0) { in has_invalid_src_region()
231 const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); in has_invalid_src_region()
232 const unsigned src_byte_stride = inst->src[i].stride * in has_invalid_src_region()
233 type_sz(inst->src[i].type); in has_invalid_src_region()
234 const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; in has_invalid_src_region()
235 const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE; in has_invalid_src_region()
237 return has_dst_aligned_region_restriction(devinfo, inst) && in has_invalid_src_region()
238 !is_uniform(inst->src[i]) && in has_invalid_src_region()
249 const fs_inst *inst) in has_invalid_dst_region() argument
251 if (is_unordered(inst)) { in has_invalid_dst_region()
254 const brw_reg_type exec_type = get_exec_type(inst); in has_invalid_dst_region()
255 const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; in has_invalid_dst_region()
256 const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); in has_invalid_dst_region()
257 const bool is_narrowing_conversion = !is_byte_raw_mov(inst) && in has_invalid_dst_region()
258 type_sz(inst->dst.type) < type_sz(exec_type); in has_invalid_dst_region()
260 return (has_dst_aligned_region_restriction(devinfo, inst) && in has_invalid_dst_region()
261 (required_dst_byte_stride(inst) != dst_byte_stride || in has_invalid_dst_region()
262 required_dst_byte_offset(inst) != dst_byte_offset)) || in has_invalid_dst_region()
264 required_dst_byte_stride(inst) != dst_byte_stride); in has_invalid_dst_region()
275 has_invalid_exec_type(const intel_device_info *devinfo, const fs_inst *inst) in has_invalid_exec_type() argument
277 if (required_exec_type(devinfo, inst) != get_exec_type(inst)) { in has_invalid_exec_type()
278 switch (inst->opcode) { in has_invalid_exec_type()
303 const fs_inst *inst, unsigned i) in has_invalid_src_modifiers() argument
305 return (!inst->can_do_source_mods(devinfo) && in has_invalid_src_modifiers()
306 (inst->src[i].negate || inst->src[i].abs)) || in has_invalid_src_modifiers()
307 ((has_invalid_exec_type(devinfo, inst) & (1u << i)) && in has_invalid_src_modifiers()
308 (inst->src[i].negate || inst->src[i].abs || in has_invalid_src_modifiers()
309 inst->src[i].type != get_exec_type(inst))); in has_invalid_src_modifiers()
317 has_invalid_conversion(const intel_device_info *devinfo, const fs_inst *inst) in has_invalid_conversion() argument
319 switch (inst->opcode) { in has_invalid_conversion()
323 return inst->dst.type != get_exec_type(inst); in has_invalid_conversion()
329 return has_invalid_exec_type(devinfo, inst) && in has_invalid_conversion()
330 inst->dst.type != get_exec_type(inst); in has_invalid_conversion()
338 has_invalid_dst_modifiers(const intel_device_info *devinfo, const fs_inst *inst) in has_invalid_dst_modifiers() argument
340 return (has_invalid_exec_type(devinfo, inst) && in has_invalid_dst_modifiers()
341 (inst->saturate || inst->conditional_mod)) || in has_invalid_dst_modifiers()
342 has_invalid_conversion(devinfo, inst); in has_invalid_dst_modifiers()
351 has_inconsistent_cmod(const fs_inst *inst) in has_inconsistent_cmod() argument
353 return inst->opcode == BRW_OPCODE_SEL || in has_inconsistent_cmod()
354 inst->opcode == BRW_OPCODE_CSEL || in has_inconsistent_cmod()
355 inst->opcode == BRW_OPCODE_IF || in has_inconsistent_cmod()
356 inst->opcode == BRW_OPCODE_WHILE; in has_inconsistent_cmod()
360 lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst);
371 lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) in lower_src_modifiers() argument
373 assert(inst->components_read(i) == 1); in lower_src_modifiers()
375 inst->opcode != BRW_OPCODE_MUL || in lower_src_modifiers()
376 brw_reg_type_is_floating_point(get_exec_type(inst)) || in lower_src_modifiers()
377 MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4 || in lower_src_modifiers()
378 type_sz(inst->src[i].type) == get_exec_type_size(inst)); in lower_src_modifiers()
380 const fs_builder ibld(v, block, inst); in lower_src_modifiers()
381 const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); in lower_src_modifiers()
383 lower_instruction(v, block, ibld.MOV(tmp, inst->src[i])); in lower_src_modifiers()
384 inst->src[i] = tmp; in lower_src_modifiers()
399 lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_dst_modifiers() argument
401 const fs_builder ibld(v, block, inst); in lower_dst_modifiers()
402 const brw_reg_type type = get_exec_type(inst); in lower_dst_modifiers()
410 type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 : in lower_dst_modifiers()
411 type_sz(inst->dst.type) * inst->dst.stride / type_sz(type); in lower_dst_modifiers()
417 fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp); in lower_dst_modifiers()
418 mov->saturate = inst->saturate; in lower_dst_modifiers()
419 if (!has_inconsistent_cmod(inst)) in lower_dst_modifiers()
420 mov->conditional_mod = inst->conditional_mod; in lower_dst_modifiers()
421 if (inst->opcode != BRW_OPCODE_SEL) { in lower_dst_modifiers()
422 mov->predicate = inst->predicate; in lower_dst_modifiers()
423 mov->predicate_inverse = inst->predicate_inverse; in lower_dst_modifiers()
425 mov->flag_subreg = inst->flag_subreg; in lower_dst_modifiers()
431 assert(inst->size_written == inst->dst.component_size(inst->exec_size)); in lower_dst_modifiers()
432 inst->dst = tmp; in lower_dst_modifiers()
433 inst->size_written = inst->dst.component_size(inst->exec_size); in lower_dst_modifiers()
434 inst->saturate = false; in lower_dst_modifiers()
435 if (!has_inconsistent_cmod(inst)) in lower_dst_modifiers()
436 inst->conditional_mod = BRW_CONDITIONAL_NONE; in lower_dst_modifiers()
438 assert(!inst->flags_written(v->devinfo) || !mov->predicate); in lower_dst_modifiers()
448 lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) in lower_src_region() argument
450 assert(inst->components_read(i) == 1); in lower_src_region()
451 const fs_builder ibld(v, block, inst); in lower_src_region()
452 const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride / in lower_src_region()
453 type_sz(inst->src[i].type); in lower_src_region()
455 fs_reg tmp = ibld.vgrf(inst->src[i].type, stride); in lower_src_region()
465 fs_reg raw_src = inst->src[i]; in lower_src_region()
476 lower_src.negate = inst->src[i].negate; in lower_src_region()
477 lower_src.abs = inst->src[i].abs; in lower_src_region()
478 inst->src[i] = lower_src; in lower_src_region()
490 lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_dst_region() argument
497 assert(inst->opcode != BRW_OPCODE_MUL || !inst->dst.is_accumulator() || in lower_dst_region()
498 brw_reg_type_is_floating_point(inst->dst.type)); in lower_dst_region()
500 const fs_builder ibld(v, block, inst); in lower_dst_region()
501 const unsigned stride = required_dst_byte_stride(inst) / in lower_dst_region()
502 type_sz(inst->dst.type); in lower_dst_region()
504 fs_reg tmp = ibld.vgrf(inst->dst.type, stride); in lower_dst_region()
515 if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) { in lower_dst_region()
524 subscript(inst->dst, raw_type, j)); in lower_dst_region()
528 ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j), in lower_dst_region()
534 assert(inst->size_written == inst->dst.component_size(inst->exec_size)); in lower_dst_region()
535 inst->dst = tmp; in lower_dst_region()
536 inst->size_written = inst->dst.component_size(inst->exec_size); in lower_dst_region()
548 lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_exec_type() argument
550 assert(inst->dst.type == get_exec_type(inst)); in lower_exec_type()
551 const unsigned mask = has_invalid_exec_type(v->devinfo, inst); in lower_exec_type()
552 const brw_reg_type raw_type = required_exec_type(v->devinfo, inst); in lower_exec_type()
553 const unsigned n = get_exec_type_size(inst) / type_sz(raw_type); in lower_exec_type()
554 const fs_builder ibld(v, block, inst); in lower_exec_type()
556 fs_reg tmp = ibld.vgrf(inst->dst.type, inst->dst.stride); in lower_exec_type()
558 tmp = horiz_stride(tmp, inst->dst.stride); in lower_exec_type()
561 fs_inst sub_inst = *inst; in lower_exec_type()
563 for (unsigned i = 0; i < inst->sources; i++) { in lower_exec_type()
565 assert(inst->src[i].type == inst->dst.type); in lower_exec_type()
566 sub_inst.src[i] = subscript(inst->src[i], raw_type, j); in lower_exec_type()
576 fs_inst *mov = ibld.MOV(subscript(inst->dst, raw_type, j), in lower_exec_type()
578 if (inst->opcode != BRW_OPCODE_SEL) { in lower_exec_type()
579 mov->predicate = inst->predicate; in lower_exec_type()
580 mov->predicate_inverse = inst->predicate_inverse; in lower_exec_type()
585 inst->remove(block); in lower_exec_type()
595 lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_instruction() argument
600 if (has_invalid_dst_modifiers(devinfo, inst)) in lower_instruction()
601 progress |= lower_dst_modifiers(v, block, inst); in lower_instruction()
603 if (has_invalid_dst_region(devinfo, inst)) in lower_instruction()
604 progress |= lower_dst_region(v, block, inst); in lower_instruction()
606 for (unsigned i = 0; i < inst->sources; i++) { in lower_instruction()
607 if (has_invalid_src_modifiers(devinfo, inst, i)) in lower_instruction()
608 progress |= lower_src_modifiers(v, block, inst, i); in lower_instruction()
610 if (has_invalid_src_region(devinfo, inst, i)) in lower_instruction()
611 progress |= lower_src_region(v, block, inst, i); in lower_instruction()
614 if (has_invalid_exec_type(devinfo, inst)) in lower_instruction()
615 progress |= lower_exec_type(v, block, inst); in lower_instruction()
626 foreach_block_and_inst_safe(block, fs_inst, inst, cfg) in lower_regioning()
627 progress |= lower_instruction(this, block, inst); in lower_regioning()