1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef __LATENCY_TABLE_H
10 #define __LATENCY_TABLE_H
11 
12 #include "../BuildIR.h"
13 
14 namespace vISA
15 {
16 
17     enum LegacyLatencies : uint16_t
18     {
19         //
20         //  General instruction latencies
21         //
22         // To be comptabile with send cycles, don't normalized them to 1
23         UNCOMPR_LATENCY         = 2,    // Latency of an uncompressed instruction
24         COMPR_LATENCY           = 4,    // Latency of a compressed instruction
25         ACC_BUBBLE              = 4,    // Accumulator back-to-back stall
26         IVB_PIPELINE_LENGTH     = 14,
27         EDGE_LATENCY_MATH       = 22,
28         EDGE_LATENCY_MATH_TYPE2 = 30,
29         EDGE_LATENCY_SEND_WAR   = 36
30     };
31 
32     //
33     // Message latencies
34     //
35     static const uint16_t LegacyFFLatency[] = {
36         2,   // 0: SFID_NULL
37         2,   // 1: Useless
38         300, // 2: SFID_SAMPLER
39         200, // 3: SFID_GATEWAY
40         400, // 4: SFID_DP_READ, SFID_DP_DC2
41         200, // 5: SFID_DP_WRITE
42         50,  // 6: SFID_URB
43         50,  // 7: SFID_SPAWNER
44         50,  // 8: SFID_VME
45         60,  // 9: SFID_DP_CC
46         400, //10: SFID_DP_DC
47         50,  //11: SFID_DP_PI
48         400, //12: SFID_DP_DC1
49         200, //13: SFID_CRE
50         200  //14: unknown, SFID_NUM
51     };
52 
53 
54     enum LatenciesXe : uint16_t
55     {
56         //
57         // General instruction latencies
58         //
59         FPU_ACC                 = 6,    // SIMD8 latency if dst is acc.
60         FPU                     = 10,   // SIMD8 latency for general FPU ops.
61         MATH                    = 17,   // Math latency.
62         BRANCH                  = 23,   // Latency for SIMD16 branch.
63         BARRIER                 = 30,   // Latency for barrier.
64         DELTA                   = 1,    // Extra cycles for wider SIMD sizes, compute only.
65         DELTA_MATH              = 4,
66         ARF                     = 16,   // latency for ARF dependencies (flag, address, etc.)
67         // Latency for dpas 8x1
68         // Latency for dpas 8x8 is 21 + 7 = 28
69         DPAS = 21,
70 
71         //
72         // Message latencies
73         //
74 
75         // Latency for SIMD16 SLM messages. If accessing
76         // the same location, it takes 28 cycles. For the
77         // sequential access pattern, it takes 26 cycles.
78         SLM                     = 28,
79         SEND_OTHERS             = 50,   // Latency for other messages.
80         DP_L3                   = 146,  // Dataport L3 hit
81         SAMPLER_L3              = 214,  // Sampler L3 hit
82         SLM_FENCE               = 23,   // Fence SLM
83         LSC_UNTYPED_L1          = 45,   // LSC untyped L1 cache hit
84         LSC_UNTYPED_L3          = 200,  // LSC untyped L3 cache hit
85         LSC_UNTYPED_FENCE       = 35,   // LSC untyped fence (best case)
86         LSC_TYPED_L1            = 75,   // LSC typed L1 cache hit
87         LSC_TYPED_L3            = 200,  // LSC typed L3 cache hit
88         LSC_TYPED_FENCE         = 60,   // LSC typed fence
89     };
90 
91 
92     class LatencyTable
93     {
94     public:
LatencyTable(const IR_Builder * builder)95         explicit LatencyTable(const IR_Builder* builder)
96             : m_builder(builder)
97         {
98         }
99         // Functions to get latencies/occupancy based on platforms
100         uint16_t getOccupancy(G4_INST* Inst) const;
101         uint16_t getLatency(G4_INST* Inst) const;
102         uint16_t getDPAS8x8Latency() const;
103 
104     private:
105         uint16_t getLatencyLegacy(G4_INST* Inst) const;
106         uint16_t getOccupancyLegacy(G4_INST* Inst) const;
107 
108         uint16_t getLatencyG12(const G4_INST* Inst) const;
109 
110         uint16_t getOccupancyG12(G4_INST* Inst) const;
111 
112         const IR_Builder* m_builder;
113     };
114 
115 } // namespace vISA
116 
117 #endif
118