1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_callstack.h"
28 
29 namespace r600 {
30 
CallStack(r600_bytecode & bc)31 CallStack::CallStack(r600_bytecode& bc):
32    m_bc(bc)
33 {
34 
35 }
36 
~CallStack()37 CallStack::~CallStack()
38 {
39 }
40 
push(unsigned type)41 int CallStack::push(unsigned type)
42 {
43    switch (type) {
44    case FC_PUSH_VPM:
45       ++m_bc.stack.push;
46       break;
47    case FC_PUSH_WQM:
48       ++m_bc.stack.push_wqm;
49       break;
50    case FC_LOOP:
51       ++m_bc.stack.loop;
52       break;
53    default:
54       assert(0);
55 	}
56 
57    return update_max_depth(type);
58 }
59 
pop(unsigned type)60 void CallStack::pop(unsigned type)
61 {
62    switch(type) {
63    case FC_PUSH_VPM:
64       --m_bc.stack.push;
65       assert(m_bc.stack.push >= 0);
66       break;
67    case FC_PUSH_WQM:
68       --m_bc.stack.push_wqm;
69       assert(m_bc.stack.push_wqm >= 0);
70       break;
71    case FC_LOOP:
72       --m_bc.stack.loop;
73       assert(m_bc.stack.loop >= 0);
74       break;
75    default:
76       assert(0);
77       break;
78    }
79 }
80 
update_max_depth(unsigned type)81 int CallStack::update_max_depth(unsigned type)
82 {
83 
84    r600_stack_info& stack = m_bc.stack;
85    int elements;
86    int entries;
87 
88    int entry_size = stack.entry_size;
89 
90    elements = (stack.loop + stack.push_wqm ) * entry_size;
91    elements += stack.push;
92 
93    switch (m_bc.chip_class) {
94    case R600:
95    case R700:
96      /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
97       * the stack must be reserved to hold the current active/continue
98       * masks */
99      if (type == FC_PUSH_VPM || stack.push > 0) {
100        elements += 2;
101      }
102      break;
103    case CAYMAN:
104      /* r9xx: any stack operation on empty stack consumes 2 additional
105       * elements */
106      elements += 2;
107      break;
108    case EVERGREEN:
109      /* r8xx+: 2 extra elements are not always required, but one extra
110       * element must be added for each of the following cases:
111       * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
112       *    stack usage.
113       *    (Currently we don't use ALU_ELSE_AFTER.)
114       * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
115       *    PUSH instruction executed.
116       *
117       *    NOTE: it seems we also need to reserve additional element in some
118       *    other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
119       *    then STACK_SIZE should be 2 instead of 1 */
120      if (type == FC_PUSH_VPM || stack.push > 0) {
121        elements += 1;
122      }
123      break;
124    default:
125      assert(0);
126      break;
127    }
128 
129    entry_size = 4;
130 
131    entries = (elements + (entry_size - 1)) / entry_size;
132 
133    if (entries > stack.max_entries)
134       stack.max_entries = entries;
135 
136    return elements;
137 }
138 
139 }
140