1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef __LATENCY_TABLE_H 10 #define __LATENCY_TABLE_H 11 12 #include "../BuildIR.h" 13 14 namespace vISA 15 { 16 17 enum LegacyLatencies : uint16_t 18 { 19 // 20 // General instruction latencies 21 // 22 // To be comptabile with send cycles, don't normalized them to 1 23 UNCOMPR_LATENCY = 2, // Latency of an uncompressed instruction 24 COMPR_LATENCY = 4, // Latency of a compressed instruction 25 ACC_BUBBLE = 4, // Accumulator back-to-back stall 26 IVB_PIPELINE_LENGTH = 14, 27 EDGE_LATENCY_MATH = 22, 28 EDGE_LATENCY_MATH_TYPE2 = 30, 29 EDGE_LATENCY_SEND_WAR = 36 30 }; 31 32 // 33 // Message latencies 34 // 35 static const uint16_t LegacyFFLatency[] = { 36 2, // 0: SFID_NULL 37 2, // 1: Useless 38 300, // 2: SFID_SAMPLER 39 200, // 3: SFID_GATEWAY 40 400, // 4: SFID_DP_READ, SFID_DP_DC2 41 200, // 5: SFID_DP_WRITE 42 50, // 6: SFID_URB 43 50, // 7: SFID_SPAWNER 44 50, // 8: SFID_VME 45 60, // 9: SFID_DP_CC 46 400, //10: SFID_DP_DC 47 50, //11: SFID_DP_PI 48 400, //12: SFID_DP_DC1 49 200, //13: SFID_CRE 50 200 //14: unknown, SFID_NUM 51 }; 52 53 54 enum LatenciesXe : uint16_t 55 { 56 // 57 // General instruction latencies 58 // 59 FPU_ACC = 6, // SIMD8 latency if dst is acc. 60 FPU = 10, // SIMD8 latency for general FPU ops. 61 MATH = 17, // Math latency. 62 BRANCH = 23, // Latency for SIMD16 branch. 63 BARRIER = 30, // Latency for barrier. 64 DELTA = 1, // Extra cycles for wider SIMD sizes, compute only. 65 DELTA_MATH = 4, 66 ARF = 16, // latency for ARF dependencies (flag, address, etc.) 67 // Latency for dpas 8x1 68 // Latency for dpas 8x8 is 21 + 7 = 28 69 DPAS = 21, 70 71 // 72 // Message latencies 73 // 74 75 // Latency for SIMD16 SLM messages. If accessing 76 // the same location, it takes 28 cycles. For the 77 // sequential access pattern, it takes 26 cycles. 78 SLM = 28, 79 SEND_OTHERS = 50, // Latency for other messages. 80 DP_L3 = 146, // Dataport L3 hit 81 SAMPLER_L3 = 214, // Sampler L3 hit 82 SLM_FENCE = 23, // Fence SLM 83 LSC_UNTYPED_L1 = 45, // LSC untyped L1 cache hit 84 LSC_UNTYPED_L3 = 200, // LSC untyped L3 cache hit 85 LSC_UNTYPED_FENCE = 35, // LSC untyped fence (best case) 86 LSC_TYPED_L1 = 75, // LSC typed L1 cache hit 87 LSC_TYPED_L3 = 200, // LSC typed L3 cache hit 88 LSC_TYPED_FENCE = 60, // LSC typed fence 89 }; 90 91 92 class LatencyTable 93 { 94 public: LatencyTable(const IR_Builder * builder)95 explicit LatencyTable(const IR_Builder* builder) 96 : m_builder(builder) 97 { 98 } 99 // Functions to get latencies/occupancy based on platforms 100 uint16_t getOccupancy(G4_INST* Inst) const; 101 uint16_t getLatency(G4_INST* Inst) const; 102 uint16_t getDPAS8x8Latency() const; 103 104 private: 105 uint16_t getLatencyLegacy(G4_INST* Inst) const; 106 uint16_t getOccupancyLegacy(G4_INST* Inst) const; 107 108 uint16_t getLatencyG12(const G4_INST* Inst) const; 109 110 uint16_t getOccupancyG12(G4_INST* Inst) const; 111 112 const IR_Builder* m_builder; 113 }; 114 115 } // namespace vISA 116 117 #endif 118