Lines Matching refs:inst

39    is_byte_raw_mov(const fs_inst *inst)  in is_byte_raw_mov()  argument
41 return type_sz(inst->dst.type) == 1 && in is_byte_raw_mov()
42 inst->opcode == BRW_OPCODE_MOV && in is_byte_raw_mov()
43 inst->src[0].type == inst->dst.type && in is_byte_raw_mov()
44 !inst->saturate && in is_byte_raw_mov()
45 !inst->src[0].negate && in is_byte_raw_mov()
46 !inst->src[0].abs; in is_byte_raw_mov()
54 required_dst_byte_stride(const fs_inst *inst) in required_dst_byte_stride() argument
56 if (inst->dst.is_accumulator()) { in required_dst_byte_stride()
69 return inst->dst.stride * type_sz(inst->dst.type); in required_dst_byte_stride()
70 } else if (type_sz(inst->dst.type) < get_exec_type_size(inst) && in required_dst_byte_stride()
71 !is_byte_raw_mov(inst)) { in required_dst_byte_stride()
72 return get_exec_type_size(inst); in required_dst_byte_stride()
78 unsigned max_stride = inst->dst.stride * type_sz(inst->dst.type); in required_dst_byte_stride()
79 unsigned min_size = type_sz(inst->dst.type); in required_dst_byte_stride()
80 unsigned max_size = type_sz(inst->dst.type); in required_dst_byte_stride()
82 for (unsigned i = 0; i < inst->sources; i++) { in required_dst_byte_stride()
83 if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) { in required_dst_byte_stride()
84 const unsigned size = type_sz(inst->src[i].type); in required_dst_byte_stride()
85 max_stride = MAX2(max_stride, inst->src[i].stride * size); in required_dst_byte_stride()
110 required_dst_byte_offset(const fs_inst *inst) in required_dst_byte_offset() argument
112 for (unsigned i = 0; i < inst->sources; i++) { in required_dst_byte_offset()
113 if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) in required_dst_byte_offset()
114 if (reg_offset(inst->src[i]) % REG_SIZE != in required_dst_byte_offset()
115 reg_offset(inst->dst) % REG_SIZE) in required_dst_byte_offset()
119 return reg_offset(inst->dst) % REG_SIZE; in required_dst_byte_offset()
127 has_invalid_src_region(const intel_device_info *devinfo, const fs_inst *inst, in has_invalid_src_region() argument
130 if (is_unordered(inst) || inst->is_control_source(i)) in has_invalid_src_region()
144 inst->opcode == BRW_OPCODE_MAD && in has_invalid_src_region()
145 inst->src[i].type == BRW_REGISTER_TYPE_HF && in has_invalid_src_region()
146 reg_offset(inst->src[i]) % REG_SIZE > 0 && in has_invalid_src_region()
147 inst->src[i].stride != 0) { in has_invalid_src_region()
151 const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); in has_invalid_src_region()
152 const unsigned src_byte_stride = inst->src[i].stride * in has_invalid_src_region()
153 type_sz(inst->src[i].type); in has_invalid_src_region()
154 const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; in has_invalid_src_region()
155 const unsigned src_byte_offset = reg_offset(inst->src[i]) % REG_SIZE; in has_invalid_src_region()
157 return has_dst_aligned_region_restriction(devinfo, inst) && in has_invalid_src_region()
158 !is_uniform(inst->src[i]) && in has_invalid_src_region()
169 const fs_inst *inst) in has_invalid_dst_region() argument
171 if (is_unordered(inst)) { in has_invalid_dst_region()
174 const brw_reg_type exec_type = get_exec_type(inst); in has_invalid_dst_region()
175 const unsigned dst_byte_offset = reg_offset(inst->dst) % REG_SIZE; in has_invalid_dst_region()
176 const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); in has_invalid_dst_region()
177 const bool is_narrowing_conversion = !is_byte_raw_mov(inst) && in has_invalid_dst_region()
178 type_sz(inst->dst.type) < type_sz(exec_type); in has_invalid_dst_region()
180 return (has_dst_aligned_region_restriction(devinfo, inst) && in has_invalid_dst_region()
181 (required_dst_byte_stride(inst) != dst_byte_stride || in has_invalid_dst_region()
182 required_dst_byte_offset(inst) != dst_byte_offset)) || in has_invalid_dst_region()
184 required_dst_byte_stride(inst) != dst_byte_stride); in has_invalid_dst_region()
195 has_invalid_exec_type(const intel_device_info *devinfo, const fs_inst *inst) in has_invalid_exec_type() argument
197 switch (inst->opcode) { in has_invalid_exec_type()
200 return has_dst_aligned_region_restriction(devinfo, inst) ? in has_invalid_exec_type()
207 devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) || in has_invalid_exec_type()
209 brw_reg_type_is_floating_point(inst->src[0].type)) ? in has_invalid_exec_type()
223 const fs_inst *inst, unsigned i) in has_invalid_src_modifiers() argument
225 return (!inst->can_do_source_mods(devinfo) && in has_invalid_src_modifiers()
226 (inst->src[i].negate || inst->src[i].abs)) || in has_invalid_src_modifiers()
227 ((has_invalid_exec_type(devinfo, inst) & (1u << i)) && in has_invalid_src_modifiers()
228 (inst->src[i].negate || inst->src[i].abs || in has_invalid_src_modifiers()
229 inst->src[i].type != get_exec_type(inst))); in has_invalid_src_modifiers()
237 has_invalid_conversion(const intel_device_info *devinfo, const fs_inst *inst) in has_invalid_conversion() argument
239 switch (inst->opcode) { in has_invalid_conversion()
243 return inst->dst.type != get_exec_type(inst); in has_invalid_conversion()
249 return has_invalid_exec_type(devinfo, inst) && in has_invalid_conversion()
250 inst->dst.type != get_exec_type(inst); in has_invalid_conversion()
258 has_invalid_dst_modifiers(const intel_device_info *devinfo, const fs_inst *inst) in has_invalid_dst_modifiers() argument
260 return (has_invalid_exec_type(devinfo, inst) && in has_invalid_dst_modifiers()
261 (inst->saturate || inst->conditional_mod)) || in has_invalid_dst_modifiers()
262 has_invalid_conversion(devinfo, inst); in has_invalid_dst_modifiers()
271 has_inconsistent_cmod(const fs_inst *inst) in has_inconsistent_cmod() argument
273 return inst->opcode == BRW_OPCODE_SEL || in has_inconsistent_cmod()
274 inst->opcode == BRW_OPCODE_CSEL || in has_inconsistent_cmod()
275 inst->opcode == BRW_OPCODE_IF || in has_inconsistent_cmod()
276 inst->opcode == BRW_OPCODE_WHILE; in has_inconsistent_cmod()
280 lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst);
291 lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) in lower_src_modifiers() argument
293 assert(inst->components_read(i) == 1); in lower_src_modifiers()
295 inst->opcode != BRW_OPCODE_MUL || in lower_src_modifiers()
296 brw_reg_type_is_floating_point(get_exec_type(inst)) || in lower_src_modifiers()
297 MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4 || in lower_src_modifiers()
298 type_sz(inst->src[i].type) == get_exec_type_size(inst)); in lower_src_modifiers()
300 const fs_builder ibld(v, block, inst); in lower_src_modifiers()
301 const fs_reg tmp = ibld.vgrf(get_exec_type(inst)); in lower_src_modifiers()
303 lower_instruction(v, block, ibld.MOV(tmp, inst->src[i])); in lower_src_modifiers()
304 inst->src[i] = tmp; in lower_src_modifiers()
319 lower_dst_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_dst_modifiers() argument
321 const fs_builder ibld(v, block, inst); in lower_dst_modifiers()
322 const brw_reg_type type = get_exec_type(inst); in lower_dst_modifiers()
330 type_sz(inst->dst.type) * inst->dst.stride <= type_sz(type) ? 1 : in lower_dst_modifiers()
331 type_sz(inst->dst.type) * inst->dst.stride / type_sz(type); in lower_dst_modifiers()
337 fs_inst *mov = ibld.at(block, inst->next).MOV(inst->dst, tmp); in lower_dst_modifiers()
338 mov->saturate = inst->saturate; in lower_dst_modifiers()
339 if (!has_inconsistent_cmod(inst)) in lower_dst_modifiers()
340 mov->conditional_mod = inst->conditional_mod; in lower_dst_modifiers()
341 if (inst->opcode != BRW_OPCODE_SEL) { in lower_dst_modifiers()
342 mov->predicate = inst->predicate; in lower_dst_modifiers()
343 mov->predicate_inverse = inst->predicate_inverse; in lower_dst_modifiers()
345 mov->flag_subreg = inst->flag_subreg; in lower_dst_modifiers()
351 assert(inst->size_written == inst->dst.component_size(inst->exec_size)); in lower_dst_modifiers()
352 inst->dst = tmp; in lower_dst_modifiers()
353 inst->size_written = inst->dst.component_size(inst->exec_size); in lower_dst_modifiers()
354 inst->saturate = false; in lower_dst_modifiers()
355 if (!has_inconsistent_cmod(inst)) in lower_dst_modifiers()
356 inst->conditional_mod = BRW_CONDITIONAL_NONE; in lower_dst_modifiers()
358 assert(!inst->flags_written(v->devinfo) || !mov->predicate); in lower_dst_modifiers()
368 lower_src_region(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i) in lower_src_region() argument
370 assert(inst->components_read(i) == 1); in lower_src_region()
371 const fs_builder ibld(v, block, inst); in lower_src_region()
372 const unsigned stride = type_sz(inst->dst.type) * inst->dst.stride / in lower_src_region()
373 type_sz(inst->src[i].type); in lower_src_region()
375 fs_reg tmp = ibld.vgrf(inst->src[i].type, stride); in lower_src_region()
385 fs_reg raw_src = inst->src[i]; in lower_src_region()
396 lower_src.negate = inst->src[i].negate; in lower_src_region()
397 lower_src.abs = inst->src[i].abs; in lower_src_region()
398 inst->src[i] = lower_src; in lower_src_region()
410 lower_dst_region(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_dst_region() argument
417 assert(inst->opcode != BRW_OPCODE_MUL || !inst->dst.is_accumulator() || in lower_dst_region()
418 brw_reg_type_is_floating_point(inst->dst.type)); in lower_dst_region()
420 const fs_builder ibld(v, block, inst); in lower_dst_region()
421 const unsigned stride = required_dst_byte_stride(inst) / in lower_dst_region()
422 type_sz(inst->dst.type); in lower_dst_region()
424 fs_reg tmp = ibld.vgrf(inst->dst.type, stride); in lower_dst_region()
435 if (inst->predicate && inst->opcode != BRW_OPCODE_SEL) { in lower_dst_region()
444 subscript(inst->dst, raw_type, j)); in lower_dst_region()
448 ibld.at(block, inst->next).MOV(subscript(inst->dst, raw_type, j), in lower_dst_region()
454 assert(inst->size_written == inst->dst.component_size(inst->exec_size)); in lower_dst_region()
455 inst->dst = tmp; in lower_dst_region()
456 inst->size_written = inst->dst.component_size(inst->exec_size); in lower_dst_region()
467 lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_exec_type() argument
469 assert(inst->dst.type == get_exec_type(inst)); in lower_exec_type()
470 const unsigned mask = has_invalid_exec_type(v->devinfo, inst); in lower_exec_type()
471 const brw_reg_type raw_type = brw_int_type(type_sz(inst->dst.type), false); in lower_exec_type()
473 for (unsigned i = 0; i < inst->sources; i++) { in lower_exec_type()
475 assert(inst->src[i].type == inst->dst.type); in lower_exec_type()
476 inst->src[i].type = raw_type; in lower_exec_type()
480 inst->dst.type = raw_type; in lower_exec_type()
490 lower_instruction(fs_visitor *v, bblock_t *block, fs_inst *inst) in lower_instruction() argument
495 if (has_invalid_dst_modifiers(devinfo, inst)) in lower_instruction()
496 progress |= lower_dst_modifiers(v, block, inst); in lower_instruction()
498 if (has_invalid_dst_region(devinfo, inst)) in lower_instruction()
499 progress |= lower_dst_region(v, block, inst); in lower_instruction()
501 for (unsigned i = 0; i < inst->sources; i++) { in lower_instruction()
502 if (has_invalid_src_modifiers(devinfo, inst, i)) in lower_instruction()
503 progress |= lower_src_modifiers(v, block, inst, i); in lower_instruction()
505 if (has_invalid_src_region(devinfo, inst, i)) in lower_instruction()
506 progress |= lower_src_region(v, block, inst, i); in lower_instruction()
509 if (has_invalid_exec_type(devinfo, inst)) in lower_instruction()
510 progress |= lower_exec_type(v, block, inst); in lower_instruction()
521 foreach_block_and_inst_safe(block, fs_inst, inst, cfg) in lower_regioning()
522 progress |= lower_instruction(this, block, inst); in lower_regioning()