1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Mika Kuoppala <mika.kuoppala@intel.com>
25  *
26  */
27 
28 #include "i915_drv.h"
29 #include "intel_renderstate.h"
30 
31 static const struct intel_renderstate_rodata *
32 render_state_get_rodata(const int gen)
33 {
34 	switch (gen) {
35 	case 6:
36 		return &gen6_null_state;
37 	case 7:
38 		return &gen7_null_state;
39 	case 8:
40 		return &gen8_null_state;
41 	case 9:
42 		return &gen9_null_state;
43 	}
44 
45 	return NULL;
46 }
47 
48 static int render_state_init(struct render_state *so,
49 			     struct drm_i915_private *dev_priv)
50 {
51 	int ret;
52 
53 	so->gen = INTEL_GEN(dev_priv);
54 	so->rodata = render_state_get_rodata(so->gen);
55 	if (so->rodata == NULL)
56 		return 0;
57 
58 	if (so->rodata->batch_items * 4 > 4096)
59 		return -EINVAL;
60 
61 	so->obj = i915_gem_object_create(&dev_priv->drm, 4096);
62 	if (IS_ERR(so->obj))
63 		return PTR_ERR(so->obj);
64 
65 	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
66 	if (ret)
67 		goto free_gem;
68 
69 	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
70 	return 0;
71 
72 free_gem:
73 	drm_gem_object_unreference(&so->obj->base);
74 	return ret;
75 }
76 
77 /*
78  * Macro to add commands to auxiliary batch.
79  * This macro only checks for page overflow before inserting the commands,
80  * this is sufficient as the null state generator makes the final batch
81  * with two passes to build command and state separately. At this point
82  * the size of both are known and it compacts them by relocating the state
83  * right after the commands taking care of aligment so we should sufficient
84  * space below them for adding new commands.
85  */
86 #define OUT_BATCH(batch, i, val)				\
87 	do {							\
88 		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\
89 			ret = -ENOSPC;				\
90 			goto err_out;				\
91 		}						\
92 		(batch)[(i)++] = (val);				\
93 	} while(0)
94 
95 static int render_state_setup(struct render_state *so)
96 {
97 	struct drm_device *dev = so->obj->base.dev;
98 	const struct intel_renderstate_rodata *rodata = so->rodata;
99 	unsigned int i = 0, reloc_index = 0;
100 	struct page *page;
101 	u32 *d;
102 	int ret;
103 
104 	ret = i915_gem_object_set_to_cpu_domain(so->obj, true);
105 	if (ret)
106 		return ret;
107 
108 	page = i915_gem_object_get_dirty_page(so->obj, 0);
109 	d = kmap(page);
110 
111 	while (i < rodata->batch_items) {
112 		u32 s = rodata->batch[i];
113 
114 		if (i * 4  == rodata->reloc[reloc_index]) {
115 			u64 r = s + so->ggtt_offset;
116 			s = lower_32_bits(r);
117 			if (so->gen >= 8) {
118 				if (i + 1 >= rodata->batch_items ||
119 				    rodata->batch[i + 1] != 0) {
120 					ret = -EINVAL;
121 					goto err_out;
122 				}
123 
124 				d[i++] = s;
125 				s = upper_32_bits(r);
126 			}
127 
128 			reloc_index++;
129 		}
130 
131 		d[i++] = s;
132 	}
133 
134 	while (i % CACHELINE_DWORDS)
135 		OUT_BATCH(d, i, MI_NOOP);
136 
137 	so->aux_batch_offset = i * sizeof(u32);
138 
139 	if (HAS_POOLED_EU(dev)) {
140 		/*
141 		 * We always program 3x6 pool config but depending upon which
142 		 * subslice is disabled HW drops down to appropriate config
143 		 * shown below.
144 		 *
145 		 * In the below table 2x6 config always refers to
146 		 * fused-down version, native 2x6 is not available and can
147 		 * be ignored
148 		 *
149 		 * SNo  subslices config                eu pool configuration
150 		 * -----------------------------------------------------------
151 		 * 1    3 subslices enabled (3x6)  -    0x00777000  (9+9)
152 		 * 2    ss0 disabled (2x6)         -    0x00777000  (3+9)
153 		 * 3    ss1 disabled (2x6)         -    0x00770000  (6+6)
154 		 * 4    ss2 disabled (2x6)         -    0x00007000  (9+3)
155 		 */
156 		u32 eu_pool_config = 0x00777000;
157 
158 		OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
159 		OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
160 		OUT_BATCH(d, i, eu_pool_config);
161 		OUT_BATCH(d, i, 0);
162 		OUT_BATCH(d, i, 0);
163 		OUT_BATCH(d, i, 0);
164 	}
165 
166 	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
167 	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
168 
169 	/*
170 	 * Since we are sending length, we need to strictly conform to
171 	 * all requirements. For Gen2 this must be a multiple of 8.
172 	 */
173 	so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
174 
175 	kunmap(page);
176 
177 	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
178 	if (ret)
179 		return ret;
180 
181 	if (rodata->reloc[reloc_index] != -1) {
182 		DRM_ERROR("only %d relocs resolved\n", reloc_index);
183 		return -EINVAL;
184 	}
185 
186 	return 0;
187 
188 err_out:
189 	kunmap(page);
190 	return ret;
191 }
192 
193 #undef OUT_BATCH
194 
195 void i915_gem_render_state_fini(struct render_state *so)
196 {
197 	i915_gem_object_ggtt_unpin(so->obj);
198 	drm_gem_object_unreference(&so->obj->base);
199 }
200 
201 int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
202 				  struct render_state *so)
203 {
204 	int ret;
205 
206 	if (WARN_ON(engine->id != RCS))
207 		return -ENOENT;
208 
209 	ret = render_state_init(so, engine->i915);
210 	if (ret)
211 		return ret;
212 
213 	if (so->rodata == NULL)
214 		return 0;
215 
216 	ret = render_state_setup(so);
217 	if (ret) {
218 		i915_gem_render_state_fini(so);
219 		return ret;
220 	}
221 
222 	return 0;
223 }
224 
225 int i915_gem_render_state_init(struct drm_i915_gem_request *req)
226 {
227 	struct render_state so;
228 	int ret;
229 
230 	ret = i915_gem_render_state_prepare(req->engine, &so);
231 	if (ret)
232 		return ret;
233 
234 	if (so.rodata == NULL)
235 		return 0;
236 
237 	ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
238 					     so.rodata->batch_items * 4,
239 					     I915_DISPATCH_SECURE);
240 	if (ret)
241 		goto out;
242 
243 	if (so.aux_batch_size > 8) {
244 		ret = req->engine->dispatch_execbuffer(req,
245 						     (so.ggtt_offset +
246 						      so.aux_batch_offset),
247 						     so.aux_batch_size,
248 						     I915_DISPATCH_SECURE);
249 		if (ret)
250 			goto out;
251 	}
252 
253 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
254 
255 out:
256 	i915_gem_render_state_fini(&so);
257 	return ret;
258 }
259