1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2010-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #ifndef BRW_IR_FS_H
26 #define BRW_IR_FS_H
27 
28 #include "brw_shader.h"
29 
30 class fs_inst;
31 
32 class fs_reg : public backend_reg {
33 public:
34    DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
35 
36    void init();
37 
38    fs_reg();
39    fs_reg(struct ::brw_reg reg);
40    fs_reg(enum brw_reg_file file, int nr);
41    fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
42 
43    bool equals(const fs_reg &r) const;
44    bool negative_equals(const fs_reg &r) const;
45    bool is_contiguous() const;
46 
47    /**
48     * Return the size in bytes of a single logical component of the
49     * register assuming the given execution width.
50     */
51    unsigned component_size(unsigned width) const;
52 
53    /** Register region horizontal stride */
54    uint8_t stride;
55 };
56 
57 static inline fs_reg
negate(fs_reg reg)58 negate(fs_reg reg)
59 {
60    assert(reg.file != IMM);
61    reg.negate = !reg.negate;
62    return reg;
63 }
64 
65 static inline fs_reg
retype(fs_reg reg,enum brw_reg_type type)66 retype(fs_reg reg, enum brw_reg_type type)
67 {
68    reg.type = type;
69    return reg;
70 }
71 
72 static inline fs_reg
byte_offset(fs_reg reg,unsigned delta)73 byte_offset(fs_reg reg, unsigned delta)
74 {
75    switch (reg.file) {
76    case BAD_FILE:
77       break;
78    case VGRF:
79    case ATTR:
80    case UNIFORM:
81       reg.offset += delta;
82       break;
83    case MRF: {
84       const unsigned suboffset = reg.offset + delta;
85       reg.nr += suboffset / REG_SIZE;
86       reg.offset = suboffset % REG_SIZE;
87       break;
88    }
89    case ARF:
90    case FIXED_GRF: {
91       const unsigned suboffset = reg.subnr + delta;
92       reg.nr += suboffset / REG_SIZE;
93       reg.subnr = suboffset % REG_SIZE;
94       break;
95    }
96    case IMM:
97    default:
98       assert(delta == 0);
99    }
100    return reg;
101 }
102 
103 static inline fs_reg
horiz_offset(const fs_reg & reg,unsigned delta)104 horiz_offset(const fs_reg &reg, unsigned delta)
105 {
106    switch (reg.file) {
107    case BAD_FILE:
108    case UNIFORM:
109    case IMM:
110       /* These only have a single component that is implicitly splatted.  A
111        * horizontal offset should be a harmless no-op.
112        * XXX - Handle vector immediates correctly.
113        */
114       return reg;
115    case VGRF:
116    case MRF:
117    case ATTR:
118       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
119    case ARF:
120    case FIXED_GRF:
121       if (reg.is_null()) {
122          return reg;
123       } else {
124          const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
125          return byte_offset(reg, delta * stride * type_sz(reg.type));
126       }
127    }
128    unreachable("Invalid register file");
129 }
130 
131 static inline fs_reg
offset(fs_reg reg,unsigned width,unsigned delta)132 offset(fs_reg reg, unsigned width, unsigned delta)
133 {
134    switch (reg.file) {
135    case BAD_FILE:
136       break;
137    case ARF:
138    case FIXED_GRF:
139    case MRF:
140    case VGRF:
141    case ATTR:
142    case UNIFORM:
143       return byte_offset(reg, delta * reg.component_size(width));
144    case IMM:
145       assert(delta == 0);
146    }
147    return reg;
148 }
149 
150 /**
151  * Get the scalar channel of \p reg given by \p idx and replicate it to all
152  * channels of the result.
153  */
154 static inline fs_reg
component(fs_reg reg,unsigned idx)155 component(fs_reg reg, unsigned idx)
156 {
157    reg = horiz_offset(reg, idx);
158    reg.stride = 0;
159    return reg;
160 }
161 
162 /**
163  * Return an integer identifying the discrete address space a register is
164  * contained in.  A register is by definition fully contained in the single
165  * reg_space it belongs to, so two registers with different reg_space ids are
166  * guaranteed not to overlap.  Most register files are a single reg_space of
167  * its own, only the VGRF file is composed of multiple discrete address
168  * spaces, one for each VGRF allocation.
169  */
170 static inline uint32_t
reg_space(const fs_reg & r)171 reg_space(const fs_reg &r)
172 {
173    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
174 }
175 
176 /**
177  * Return the base offset in bytes of a register relative to the start of its
178  * reg_space().
179  */
180 static inline unsigned
reg_offset(const fs_reg & r)181 reg_offset(const fs_reg &r)
182 {
183    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
184           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
185           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
186 }
187 
188 /**
189  * Return the amount of padding in bytes left unused between individual
190  * components of register \p r due to a (horizontal) stride value greater than
191  * one, or zero if components are tightly packed in the register file.
192  */
193 static inline unsigned
reg_padding(const fs_reg & r)194 reg_padding(const fs_reg &r)
195 {
196    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
197                             r.hstride == 0 ? 0 :
198                             1 << (r.hstride - 1));
199    return (MAX2(1, stride) - 1) * type_sz(r.type);
200 }
201 
202 /**
203  * Return whether the register region starting at \p r and spanning \p dr
204  * bytes could potentially overlap the register region starting at \p s and
205  * spanning \p ds bytes.
206  */
207 static inline bool
regions_overlap(const fs_reg & r,unsigned dr,const fs_reg & s,unsigned ds)208 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
209 {
210    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
211       fs_reg t = r;
212       t.nr &= ~BRW_MRF_COMPR4;
213       /* COMPR4 regions are translated by the hardware during decompression
214        * into two separate half-regions 4 MRFs apart from each other.
215        */
216       return regions_overlap(t, dr / 2, s, ds) ||
217              regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
218 
219    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
220       return regions_overlap(s, ds, r, dr);
221 
222    } else {
223       return reg_space(r) == reg_space(s) &&
224              !(reg_offset(r) + dr <= reg_offset(s) ||
225                reg_offset(s) + ds <= reg_offset(r));
226    }
227 }
228 
229 /**
230  * Check that the register region given by r [r.offset, r.offset + dr[
231  * is fully contained inside the register region given by s
232  * [s.offset, s.offset + ds[.
233  */
234 static inline bool
region_contained_in(const fs_reg & r,unsigned dr,const fs_reg & s,unsigned ds)235 region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
236 {
237    return reg_space(r) == reg_space(s) &&
238           reg_offset(r) >= reg_offset(s) &&
239           reg_offset(r) + dr <= reg_offset(s) + ds;
240 }
241 
242 /**
243  * Return whether the given register region is n-periodic, i.e. whether the
244  * original region remains invariant after shifting it by \p n scalar
245  * channels.
246  */
247 static inline bool
is_periodic(const fs_reg & reg,unsigned n)248 is_periodic(const fs_reg &reg, unsigned n)
249 {
250    if (reg.file == BAD_FILE || reg.is_null()) {
251       return true;
252 
253    } else if (reg.file == IMM) {
254       const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
255                                reg.type == BRW_REGISTER_TYPE_V ? 8 :
256                                reg.type == BRW_REGISTER_TYPE_VF ? 4 :
257                                1);
258       return n % period == 0;
259 
260    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
261       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
262                                reg.vstride == 0 ? 1 << reg.width :
263                                ~0);
264       return n % period == 0;
265 
266    } else {
267       return reg.stride == 0;
268    }
269 }
270 
271 static inline bool
is_uniform(const fs_reg & reg)272 is_uniform(const fs_reg &reg)
273 {
274    return is_periodic(reg, 1);
275 }
276 
277 /**
278  * Get the specified 8-component quarter of a register.
279  */
280 static inline fs_reg
quarter(const fs_reg & reg,unsigned idx)281 quarter(const fs_reg &reg, unsigned idx)
282 {
283    assert(idx < 4);
284    return horiz_offset(reg, 8 * idx);
285 }
286 
287 /**
288  * Reinterpret each channel of register \p reg as a vector of values of the
289  * given smaller type and take the i-th subcomponent from each.
290  */
291 static inline fs_reg
subscript(fs_reg reg,brw_reg_type type,unsigned i)292 subscript(fs_reg reg, brw_reg_type type, unsigned i)
293 {
294    assert((i + 1) * type_sz(type) <= type_sz(reg.type));
295 
296    if (reg.file == ARF || reg.file == FIXED_GRF) {
297       /* The stride is encoded inconsistently for fixed GRF and ARF registers
298        * as the log2 of the actual vertical and horizontal strides.
299        */
300       const int delta = util_logbase2(type_sz(reg.type)) -
301                         util_logbase2(type_sz(type));
302       reg.hstride += (reg.hstride ? delta : 0);
303       reg.vstride += (reg.vstride ? delta : 0);
304 
305    } else if (reg.file == IMM) {
306       unsigned bit_size = type_sz(type) * 8;
307       reg.u64 >>= i * bit_size;
308       reg.u64 &= BITFIELD64_MASK(bit_size);
309       if (bit_size <= 16)
310          reg.u64 |= reg.u64 << 16;
311       return retype(reg, type);
312    } else {
313       reg.stride *= type_sz(reg.type) / type_sz(type);
314    }
315 
316    return byte_offset(retype(reg, type), i * type_sz(type));
317 }
318 
319 static inline fs_reg
horiz_stride(fs_reg reg,unsigned s)320 horiz_stride(fs_reg reg, unsigned s)
321 {
322    reg.stride *= s;
323    return reg;
324 }
325 
326 static const fs_reg reg_undef;
327 
328 class fs_inst : public backend_instruction {
329    fs_inst &operator=(const fs_inst &);
330 
331    void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
332              const fs_reg *src, unsigned sources);
333 
334 public:
335    DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
336 
337    fs_inst();
338    fs_inst(enum opcode opcode, uint8_t exec_size);
339    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
340    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
341            const fs_reg &src0);
342    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
343            const fs_reg &src0, const fs_reg &src1);
344    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
345            const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
346    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
347            const fs_reg src[], unsigned sources);
348    fs_inst(const fs_inst &that);
349    ~fs_inst();
350 
351    void resize_sources(uint8_t num_sources);
352 
353    bool is_send_from_grf() const;
354    bool is_payload(unsigned arg) const;
355    bool is_partial_write() const;
356    unsigned components_read(unsigned i) const;
357    unsigned size_read(int arg) const;
358    bool can_do_source_mods(const struct intel_device_info *devinfo) const;
359    bool can_do_cmod();
360    bool can_change_types() const;
361    bool has_source_and_destination_hazard() const;
362    unsigned implied_mrf_writes() const;
363 
364    /**
365     * Return whether \p arg is a control source of a virtual instruction which
366     * shouldn't contribute to the execution type and usual regioning
367     * restriction calculations of arithmetic instructions.
368     */
369    bool is_control_source(unsigned arg) const;
370 
371    /**
372     * Return the subset of flag registers read by the instruction as a bitset
373     * with byte granularity.
374     */
375    unsigned flags_read(const intel_device_info *devinfo) const;
376 
377    /**
378     * Return the subset of flag registers updated by the instruction (either
379     * partially or fully) as a bitset with byte granularity.
380     */
381    unsigned flags_written(const intel_device_info *devinfo) const;
382 
383    fs_reg dst;
384    fs_reg *src;
385 
386    uint8_t sources; /**< Number of fs_reg sources. */
387 
388    bool last_rt:1;
389    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
390 
391    tgl_swsb sched; /**< Scheduling info. */
392 };
393 
394 /**
395  * Make the execution of \p inst dependent on the evaluation of a possibly
396  * inverted predicate.
397  */
398 static inline fs_inst *
set_predicate_inv(enum brw_predicate pred,bool inverse,fs_inst * inst)399 set_predicate_inv(enum brw_predicate pred, bool inverse,
400                   fs_inst *inst)
401 {
402    inst->predicate = pred;
403    inst->predicate_inverse = inverse;
404    return inst;
405 }
406 
407 /**
408  * Make the execution of \p inst dependent on the evaluation of a predicate.
409  */
410 static inline fs_inst *
set_predicate(enum brw_predicate pred,fs_inst * inst)411 set_predicate(enum brw_predicate pred, fs_inst *inst)
412 {
413    return set_predicate_inv(pred, false, inst);
414 }
415 
416 /**
417  * Write the result of evaluating the condition given by \p mod to a flag
418  * register.
419  */
420 static inline fs_inst *
set_condmod(enum brw_conditional_mod mod,fs_inst * inst)421 set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
422 {
423    inst->conditional_mod = mod;
424    return inst;
425 }
426 
427 /**
428  * Clamp the result of \p inst to the saturation range of its destination
429  * datatype.
430  */
431 static inline fs_inst *
set_saturate(bool saturate,fs_inst * inst)432 set_saturate(bool saturate, fs_inst *inst)
433 {
434    inst->saturate = saturate;
435    return inst;
436 }
437 
438 /**
439  * Return the number of dataflow registers written by the instruction (either
440  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
441  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
442  * UNIFORM and IMM files and 32B for all other files.
443  */
444 inline unsigned
regs_written(const fs_inst * inst)445 regs_written(const fs_inst *inst)
446 {
447    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
448    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
449                        inst->size_written -
450                        MIN2(inst->size_written, reg_padding(inst->dst)),
451                        REG_SIZE);
452 }
453 
454 /**
455  * Return the number of dataflow registers read by the instruction (either
456  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
457  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
458  * UNIFORM files and 32B for all other files.
459  */
460 inline unsigned
regs_read(const fs_inst * inst,unsigned i)461 regs_read(const fs_inst *inst, unsigned i)
462 {
463    if (inst->src[i].file == IMM)
464       return 1;
465 
466    const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE;
467    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
468                        inst->size_read(i) -
469                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
470                        reg_size);
471 }
472 
473 static inline enum brw_reg_type
get_exec_type(const fs_inst * inst)474 get_exec_type(const fs_inst *inst)
475 {
476    brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
477 
478    for (int i = 0; i < inst->sources; i++) {
479       if (inst->src[i].file != BAD_FILE &&
480           !inst->is_control_source(i)) {
481          const brw_reg_type t = get_exec_type(inst->src[i].type);
482          if (type_sz(t) > type_sz(exec_type))
483             exec_type = t;
484          else if (type_sz(t) == type_sz(exec_type) &&
485                   brw_reg_type_is_floating_point(t))
486             exec_type = t;
487       }
488    }
489 
490    if (exec_type == BRW_REGISTER_TYPE_B)
491       exec_type = inst->dst.type;
492 
493    assert(exec_type != BRW_REGISTER_TYPE_B);
494 
495    /* Promotion of the execution type to 32-bit for conversions from or to
496     * half-float seems to be consistent with the following text from the
497     * Cherryview PRM Vol. 7, "Execution Data Type":
498     *
499     * "When single precision and half precision floats are mixed between
500     *  source operands or between source and destination operand [..] single
501     *  precision float is the execution datatype."
502     *
503     * and from "Register Region Restrictions":
504     *
505     * "Conversion between Integer and HF (Half Float) must be DWord aligned
506     *  and strided by a DWord on the destination."
507     */
508    if (type_sz(exec_type) == 2 &&
509        inst->dst.type != exec_type) {
510       if (exec_type == BRW_REGISTER_TYPE_HF)
511          exec_type = BRW_REGISTER_TYPE_F;
512       else if (inst->dst.type == BRW_REGISTER_TYPE_HF)
513          exec_type = BRW_REGISTER_TYPE_D;
514    }
515 
516    return exec_type;
517 }
518 
519 static inline unsigned
get_exec_type_size(const fs_inst * inst)520 get_exec_type_size(const fs_inst *inst)
521 {
522    return type_sz(get_exec_type(inst));
523 }
524 
525 static inline bool
is_send(const fs_inst * inst)526 is_send(const fs_inst *inst)
527 {
528    return inst->mlen || inst->is_send_from_grf();
529 }
530 
531 /**
532  * Return whether the instruction isn't an ALU instruction and cannot be
533  * assumed to complete in-order.
534  */
535 static inline bool
is_unordered(const fs_inst * inst)536 is_unordered(const fs_inst *inst)
537 {
538    return is_send(inst) || inst->is_math();
539 }
540 
541 /**
542  * Return whether the following regioning restriction applies to the specified
543  * instruction.  From the Cherryview PRM Vol 7. "Register Region
544  * Restrictions":
545  *
546  * "When source or destination datatype is 64b or operation is integer DWord
547  *  multiply, regioning in Align1 must follow these rules:
548  *
549  *  1. Source and Destination horizontal stride must be aligned to the same qword.
550  *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
551  *  3. Source and Destination offset must be the same, except the case of
552  *     scalar source."
553  */
554 static inline bool
has_dst_aligned_region_restriction(const intel_device_info * devinfo,const fs_inst * inst,brw_reg_type dst_type)555 has_dst_aligned_region_restriction(const intel_device_info *devinfo,
556                                    const fs_inst *inst,
557                                    brw_reg_type dst_type)
558 {
559    const brw_reg_type exec_type = get_exec_type(inst);
560    /* Even though the hardware spec claims that "integer DWord multiply"
561     * operations are restricted, empirical evidence and the behavior of the
562     * simulator suggest that only 32x32-bit integer multiplication is
563     * restricted.
564     */
565    const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) &&
566       ((inst->opcode == BRW_OPCODE_MUL &&
567         MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
568        (inst->opcode == BRW_OPCODE_MAD &&
569         MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
570 
571    if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 ||
572        (type_sz(exec_type) == 4 && is_dword_multiply))
573       return devinfo->is_cherryview || intel_device_info_is_9lp(devinfo) ||
574              devinfo->verx10 >= 125;
575 
576    else if (brw_reg_type_is_floating_point(dst_type))
577       return devinfo->verx10 >= 125;
578 
579    else
580       return false;
581 }
582 
583 static inline bool
has_dst_aligned_region_restriction(const intel_device_info * devinfo,const fs_inst * inst)584 has_dst_aligned_region_restriction(const intel_device_info *devinfo,
585                                    const fs_inst *inst)
586 {
587    return has_dst_aligned_region_restriction(devinfo, inst, inst->dst.type);
588 }
589 
590 /**
591  * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
592  * the specified register file into a VGRF.
593  *
594  * This implies identity register regions without any source-destination
595  * overlap, but otherwise has no implications on the location of sources and
596  * destination in the register file: Gathering any number of portions from
597  * multiple virtual registers in any order is allowed.
598  */
599 inline bool
is_copy_payload(brw_reg_file file,const fs_inst * inst)600 is_copy_payload(brw_reg_file file, const fs_inst *inst)
601 {
602    if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD ||
603        inst->is_partial_write() || inst->saturate ||
604        inst->dst.file != VGRF)
605       return false;
606 
607    for (unsigned i = 0; i < inst->sources; i++) {
608       if (inst->src[i].file != file ||
609           inst->src[i].abs || inst->src[i].negate)
610          return false;
611 
612       if (!inst->src[i].is_contiguous())
613          return false;
614 
615       if (regions_overlap(inst->dst, inst->size_written,
616                           inst->src[i], inst->size_read(i)))
617          return false;
618    }
619 
620    return true;
621 }
622 
623 /**
624  * Like is_copy_payload(), but the instruction is required to copy a single
625  * contiguous block of registers from the given register file into the
626  * destination without any reordering.
627  */
628 inline bool
is_identity_payload(brw_reg_file file,const fs_inst * inst)629 is_identity_payload(brw_reg_file file, const fs_inst *inst) {
630    if (is_copy_payload(file, inst)) {
631       fs_reg reg = inst->src[0];
632 
633       for (unsigned i = 0; i < inst->sources; i++) {
634          reg.type = inst->src[i].type;
635          if (!inst->src[i].equals(reg))
636             return false;
637 
638          reg = byte_offset(reg, inst->size_read(i));
639       }
640 
641       return true;
642    } else {
643       return false;
644    }
645 }
646 
647 /**
648  * Like is_copy_payload(), but the instruction is required to source data from
649  * at least two disjoint VGRFs.
650  *
651  * This doesn't necessarily rule out the elimination of this instruction
652  * through register coalescing, but due to limitations of the register
653  * coalesce pass it might be impossible to do so directly until a later stage,
654  * when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV
655  * instructions.
656  */
657 inline bool
is_multi_copy_payload(const fs_inst * inst)658 is_multi_copy_payload(const fs_inst *inst) {
659    if (is_copy_payload(VGRF, inst)) {
660       for (unsigned i = 0; i < inst->sources; i++) {
661             if (inst->src[i].nr != inst->src[0].nr)
662                return true;
663       }
664    }
665 
666    return false;
667 }
668 
669 /**
670  * Like is_identity_payload(), but the instruction is required to copy the
671  * whole contents of a single VGRF into the destination.
672  *
673  * This means that there is a good chance that the instruction will be
674  * eliminated through register coalescing, but it's neither a necessary nor a
675  * sufficient condition for that to happen -- E.g. consider the case where
676  * source and destination registers diverge due to other instructions in the
677  * program overwriting part of their contents, which isn't something we can
678  * predict up front based on a cheap strictly local test of the copy
679  * instruction.
680  */
681 inline bool
is_coalescing_payload(const brw::simple_allocator & alloc,const fs_inst * inst)682 is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
683 {
684    return is_identity_payload(VGRF, inst) &&
685           inst->src[0].offset == 0 &&
686           alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
687 }
688 
689 bool
690 has_bank_conflict(const intel_device_info *devinfo, const fs_inst *inst);
691 
692 #endif
693