1 /*
2  * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3  * Copyright 2013 Christoph Bumiller
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #define NINE_STATE
25 
26 #include "device9.h"
27 #include "swapchain9.h"
28 #include "basetexture9.h"
29 #include "buffer9.h"
30 #include "indexbuffer9.h"
31 #include "surface9.h"
32 #include "vertexbuffer9.h"
33 #include "vertexdeclaration9.h"
34 #include "vertexshader9.h"
35 #include "pixelshader9.h"
36 #include "nine_pipe.h"
37 #include "nine_ff.h"
38 #include "nine_limits.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_state.h"
41 #include "cso_cache/cso_context.h"
42 #include "util/u_atomic.h"
43 #include "util/u_upload_mgr.h"
44 #include "util/u_math.h"
45 #include "util/u_box.h"
46 #include "util/u_simple_shaders.h"
47 #include "util/u_gen_mipmap.h"
48 
49 /* CSMT headers */
50 #include "nine_queue.h"
51 #include "nine_csmt_helper.h"
52 #include "os/os_thread.h"
53 
54 #define DBG_CHANNEL DBG_DEVICE
55 
56 /* Nine CSMT */
57 
58 struct csmt_instruction {
59     int (* func)(struct NineDevice9 *This, struct csmt_instruction *instr);
60 };
61 
62 struct csmt_context {
63     thrd_t worker;
64     struct nine_queue_pool* pool;
65     BOOL terminate;
66     cnd_t event_processed;
67     mtx_t mutex_processed;
68     struct NineDevice9 *device;
69     BOOL processed;
70     BOOL toPause;
71     BOOL hasPaused;
72     mtx_t thread_running;
73     mtx_t thread_resume;
74 };
75 
76 /* Wait for instruction to be processed.
77  * Caller has to ensure that only one thread waits at time.
78  */
79 static void
nine_csmt_wait_processed(struct csmt_context * ctx)80 nine_csmt_wait_processed(struct csmt_context *ctx)
81 {
82     mtx_lock(&ctx->mutex_processed);
83     while (!p_atomic_read(&ctx->processed)) {
84         cnd_wait(&ctx->event_processed, &ctx->mutex_processed);
85     }
86     mtx_unlock(&ctx->mutex_processed);
87 }
88 
89 /* CSMT worker thread */
90 static
91 int
nine_csmt_worker(void * arg)92 nine_csmt_worker(void *arg)
93 {
94     struct csmt_context *ctx = arg;
95     struct csmt_instruction *instr;
96     DBG("CSMT worker spawned\n");
97 
98     u_thread_setname("CSMT-Worker");
99 
100     while (1) {
101         nine_queue_wait_flush(ctx->pool);
102         mtx_lock(&ctx->thread_running);
103 
104         /* Get instruction. NULL on empty cmdbuf. */
105         while (!p_atomic_read(&ctx->terminate) &&
106                (instr = (struct csmt_instruction *)nine_queue_get(ctx->pool))) {
107 
108             /* decode */
109             if (instr->func(ctx->device, instr)) {
110                 mtx_lock(&ctx->mutex_processed);
111                 p_atomic_set(&ctx->processed, TRUE);
112                 cnd_signal(&ctx->event_processed);
113                 mtx_unlock(&ctx->mutex_processed);
114             }
115             if (p_atomic_read(&ctx->toPause)) {
116                 mtx_unlock(&ctx->thread_running);
117                 /* will wait here the thread can be resumed */
118                 mtx_lock(&ctx->thread_resume);
119                 mtx_lock(&ctx->thread_running);
120                 mtx_unlock(&ctx->thread_resume);
121             }
122         }
123 
124         mtx_unlock(&ctx->thread_running);
125         if (p_atomic_read(&ctx->terminate)) {
126             mtx_lock(&ctx->mutex_processed);
127             p_atomic_set(&ctx->processed, TRUE);
128             cnd_signal(&ctx->event_processed);
129             mtx_unlock(&ctx->mutex_processed);
130             break;
131         }
132     }
133 
134     DBG("CSMT worker destroyed\n");
135     return 0;
136 }
137 
138 /* Create a CSMT context.
139  * Spawns a worker thread.
140  */
141 struct csmt_context *
nine_csmt_create(struct NineDevice9 * This)142 nine_csmt_create( struct NineDevice9 *This )
143 {
144     struct csmt_context *ctx;
145 
146     ctx = CALLOC_STRUCT(csmt_context);
147     if (!ctx)
148         return NULL;
149 
150     ctx->pool = nine_queue_create();
151     if (!ctx->pool) {
152         FREE(ctx);
153         return NULL;
154     }
155     cnd_init(&ctx->event_processed);
156     (void) mtx_init(&ctx->mutex_processed, mtx_plain);
157     (void) mtx_init(&ctx->thread_running, mtx_plain);
158     (void) mtx_init(&ctx->thread_resume, mtx_plain);
159 
160 #if defined(DEBUG) || !defined(NDEBUG)
161     u_thread_setname("Main thread");
162 #endif
163 
164     ctx->device = This;
165 
166     ctx->worker = u_thread_create(nine_csmt_worker, ctx);
167     if (!ctx->worker) {
168         nine_queue_delete(ctx->pool);
169         FREE(ctx);
170         return NULL;
171     }
172 
173     DBG("Returning context %p\n", ctx);
174 
175     return ctx;
176 }
177 
178 static int
nop_func(struct NineDevice9 * This,struct csmt_instruction * instr)179 nop_func( struct NineDevice9 *This, struct csmt_instruction *instr )
180 {
181     (void) This;
182     (void) instr;
183 
184     return 1;
185 }
186 
187 /* Push nop instruction and flush the queue.
188  * Waits for the worker to complete. */
189 void
nine_csmt_process(struct NineDevice9 * device)190 nine_csmt_process( struct NineDevice9 *device )
191 {
192     struct csmt_instruction* instr;
193     struct csmt_context *ctx = device->csmt_ctx;
194 
195     if (!device->csmt_active)
196         return;
197 
198     if (nine_queue_isempty(ctx->pool))
199         return;
200 
201     DBG("device=%p\n", device);
202 
203     /* NOP */
204     instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
205     assert(instr);
206     instr->func = nop_func;
207 
208     p_atomic_set(&ctx->processed, FALSE);
209     nine_queue_flush(ctx->pool);
210 
211     nine_csmt_wait_processed(ctx);
212 }
213 
214 void
nine_csmt_flush(struct NineDevice9 * device)215 nine_csmt_flush( struct NineDevice9* device )
216 {
217     if (!device->csmt_active)
218         return;
219 
220     nine_queue_flush(device->csmt_ctx->pool);
221 }
222 
223 
224 /* Destroys a CSMT context.
225  * Waits for the worker thread to terminate.
226  */
227 void
nine_csmt_destroy(struct NineDevice9 * device,struct csmt_context * ctx)228 nine_csmt_destroy( struct NineDevice9 *device, struct csmt_context *ctx )
229 {
230     struct csmt_instruction* instr;
231     thrd_t render_thread = ctx->worker;
232 
233     DBG("device=%p ctx=%p\n", device, ctx);
234 
235     /* Push nop and flush the queue. */
236     instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
237     assert(instr);
238     instr->func = nop_func;
239 
240     p_atomic_set(&ctx->processed, FALSE);
241     /* Signal worker to terminate. */
242     p_atomic_set(&ctx->terminate, TRUE);
243     nine_queue_flush(ctx->pool);
244 
245     nine_csmt_wait_processed(ctx);
246     nine_queue_delete(ctx->pool);
247 
248     mtx_destroy(&ctx->thread_resume);
249     mtx_destroy(&ctx->thread_running);
250 
251     mtx_destroy(&ctx->mutex_processed);
252     cnd_destroy(&ctx->event_processed);
253 
254     FREE(ctx);
255 
256     thrd_join(render_thread, NULL);
257 }
258 
259 static void
nine_csmt_pause(struct NineDevice9 * device)260 nine_csmt_pause( struct NineDevice9 *device )
261 {
262     struct csmt_context *ctx = device->csmt_ctx;
263 
264     if (!device->csmt_active)
265         return;
266 
267     /* No need to pause the thread */
268     if (nine_queue_no_flushed_work(ctx->pool))
269         return;
270 
271     mtx_lock(&ctx->thread_resume);
272     p_atomic_set(&ctx->toPause, TRUE);
273 
274     /* Wait the thread is paused */
275     mtx_lock(&ctx->thread_running);
276     ctx->hasPaused = TRUE;
277     p_atomic_set(&ctx->toPause, FALSE);
278 }
279 
280 static void
nine_csmt_resume(struct NineDevice9 * device)281 nine_csmt_resume( struct NineDevice9 *device )
282 {
283     struct csmt_context *ctx = device->csmt_ctx;
284 
285     if (!device->csmt_active)
286         return;
287 
288     if (!ctx->hasPaused)
289         return;
290 
291     ctx->hasPaused = FALSE;
292     mtx_unlock(&ctx->thread_running);
293     mtx_unlock(&ctx->thread_resume);
294 }
295 
296 struct pipe_context *
nine_context_get_pipe(struct NineDevice9 * device)297 nine_context_get_pipe( struct NineDevice9 *device )
298 {
299     nine_csmt_process(device);
300     return device->context.pipe;
301 }
302 
303 struct pipe_context *
nine_context_get_pipe_multithread(struct NineDevice9 * device)304 nine_context_get_pipe_multithread( struct NineDevice9 *device )
305 {
306     struct csmt_context *ctx = device->csmt_ctx;
307 
308     if (!device->csmt_active)
309         return device->context.pipe;
310 
311     if (!u_thread_is_self(ctx->worker))
312         nine_csmt_process(device);
313 
314     return device->context.pipe;
315 }
316 
317 struct pipe_context *
nine_context_get_pipe_acquire(struct NineDevice9 * device)318 nine_context_get_pipe_acquire( struct NineDevice9 *device )
319 {
320     nine_csmt_pause(device);
321     return device->context.pipe;
322 }
323 
324 void
nine_context_get_pipe_release(struct NineDevice9 * device)325 nine_context_get_pipe_release( struct NineDevice9 *device )
326 {
327     nine_csmt_resume(device);
328 }
329 
330 bool
nine_context_is_worker(struct NineDevice9 * device)331 nine_context_is_worker( struct NineDevice9 *device )
332 {
333     struct csmt_context *ctx = device->csmt_ctx;
334 
335     if (!device->csmt_active)
336         return false;
337 
338     return u_thread_is_self(ctx->worker);
339 }
340 
341 /* Nine state functions */
342 
343 /* Check if some states need to be set dirty */
344 
345 static inline DWORD
check_multisample(struct NineDevice9 * device)346 check_multisample(struct NineDevice9 *device)
347 {
348     struct nine_context *context = &device->context;
349     DWORD *rs = context->rs;
350     struct NineSurface9 *rt0 = context->rt[0];
351     bool multisampled_target;
352     DWORD new_value;
353 
354     multisampled_target = rt0 && rt0->desc.MultiSampleType >= 1;
355     if (rt0 && rt0->desc.Format == D3DFMT_NULL && context->ds)
356         multisampled_target = context->ds->desc.MultiSampleType >= 1;
357     new_value = (multisampled_target && rs[D3DRS_MULTISAMPLEANTIALIAS]) ? 1 : 0;
358     if (rs[NINED3DRS_MULTISAMPLE] != new_value) {
359         rs[NINED3DRS_MULTISAMPLE] = new_value;
360         return NINE_STATE_RASTERIZER;
361     }
362     return 0;
363 }
364 
365 /* State preparation only */
366 
367 static inline void
prepare_blend(struct NineDevice9 * device)368 prepare_blend(struct NineDevice9 *device)
369 {
370     nine_convert_blend_state(&device->context.pipe_data.blend, device->context.rs);
371     device->context.commit |= NINE_STATE_COMMIT_BLEND;
372 }
373 
374 static inline void
prepare_dsa(struct NineDevice9 * device)375 prepare_dsa(struct NineDevice9 *device)
376 {
377     nine_convert_dsa_state(&device->context.pipe_data.dsa, device->context.rs);
378     device->context.commit |= NINE_STATE_COMMIT_DSA;
379 }
380 
381 static inline void
prepare_rasterizer(struct NineDevice9 * device)382 prepare_rasterizer(struct NineDevice9 *device)
383 {
384     nine_convert_rasterizer_state(device, &device->context.pipe_data.rast, device->context.rs);
385     device->context.commit |= NINE_STATE_COMMIT_RASTERIZER;
386 }
387 
388 static void
prepare_vs_constants_userbuf_swvp(struct NineDevice9 * device)389 prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
390 {
391     struct nine_context *context = &device->context;
392 
393     if (context->changed.vs_const_f || context->changed.group & NINE_STATE_SWVP) {
394         struct pipe_constant_buffer cb;
395 
396         cb.buffer_offset = 0;
397         cb.buffer_size = 4096 * sizeof(float[4]);
398         cb.user_buffer = context->vs_const_f_swvp;
399 
400         if (context->vs->lconstf.ranges) {
401             const struct nine_lconstf *lconstf = &(context->vs->lconstf);
402             const struct nine_range *r = lconstf->ranges;
403             unsigned n = 0;
404             float *dst = context->vs_lconstf_temp;
405             float *src = (float *)cb.user_buffer;
406             memcpy(dst, src, cb.buffer_size);
407             while (r) {
408                 unsigned p = r->bgn;
409                 unsigned c = r->end - r->bgn;
410                 memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
411                 n += c;
412                 r = r->next;
413             }
414             cb.user_buffer = dst;
415         }
416 
417         context->pipe_data.cb0_swvp.buffer_offset = cb.buffer_offset;
418         context->pipe_data.cb0_swvp.buffer_size = cb.buffer_size;
419         context->pipe_data.cb0_swvp.user_buffer = cb.user_buffer;
420 
421         cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]);
422         context->pipe_data.cb1_swvp.buffer_offset = cb.buffer_offset;
423         context->pipe_data.cb1_swvp.buffer_size = cb.buffer_size;
424         context->pipe_data.cb1_swvp.user_buffer = cb.user_buffer;
425 
426         context->changed.vs_const_f = 0;
427     }
428 
429     if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
430         struct pipe_constant_buffer cb;
431 
432         cb.buffer_offset = 0;
433         cb.buffer_size = 2048 * sizeof(float[4]);
434         cb.user_buffer = context->vs_const_i;
435 
436         context->pipe_data.cb2_swvp.buffer_offset = cb.buffer_offset;
437         context->pipe_data.cb2_swvp.buffer_size = cb.buffer_size;
438         context->pipe_data.cb2_swvp.user_buffer = cb.user_buffer;
439         context->changed.vs_const_i = 0;
440     }
441 
442     if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
443         struct pipe_constant_buffer cb;
444 
445         cb.buffer_offset = 0;
446         cb.buffer_size = 512 * sizeof(float[4]);
447         cb.user_buffer = context->vs_const_b;
448 
449         context->pipe_data.cb3_swvp.buffer_offset = cb.buffer_offset;
450         context->pipe_data.cb3_swvp.buffer_size = cb.buffer_size;
451         context->pipe_data.cb3_swvp.user_buffer = cb.user_buffer;
452         context->changed.vs_const_b = 0;
453     }
454 
455     context->changed.group &= ~NINE_STATE_VS_CONST;
456     context->commit |= NINE_STATE_COMMIT_CONST_VS;
457 }
458 
459 static void
prepare_vs_constants_userbuf(struct NineDevice9 * device)460 prepare_vs_constants_userbuf(struct NineDevice9 *device)
461 {
462     struct nine_context *context = &device->context;
463     uint8_t *upload_ptr = NULL;
464     struct pipe_constant_buffer cb;
465     cb.buffer = NULL;
466     cb.buffer_offset = 0;
467     cb.buffer_size = context->cso_shader.vs_const_used_size;
468     cb.user_buffer = context->vs_const_f;
469 
470     if (context->swvp) {
471         prepare_vs_constants_userbuf_swvp(device);
472         return;
473     }
474 
475     if (context->changed.vs_const_i || context->changed.group & NINE_STATE_SWVP) {
476         int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
477         memcpy(idst, context->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
478         context->changed.vs_const_i = 0;
479     }
480 
481     if (context->changed.vs_const_b || context->changed.group & NINE_STATE_SWVP) {
482         int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
483         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
484         memcpy(bdst, context->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
485         context->changed.vs_const_b = 0;
486     }
487 
488     if (!cb.buffer_size)
489         return;
490 
491     if (context->vs->lconstf.ranges) {
492         /* TODO: Can we make it so that we don't have to copy everything ? */
493         const struct nine_lconstf *lconstf =  &(context->vs->lconstf);
494         const struct nine_range *r = lconstf->ranges;
495         unsigned n = 0;
496         float *dst = context->vs_lconstf_temp;
497         float *src = (float *)cb.user_buffer;
498         memcpy(dst, src, cb.buffer_size);
499         while (r) {
500             unsigned p = r->bgn;
501             unsigned c = r->end - r->bgn;
502             memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
503             n += c;
504             r = r->next;
505         }
506         cb.user_buffer = dst;
507     }
508 
509     /* Note: We probably don't want to do separate memcpy to
510      * upload_ptr directly, if we have to copy some constants
511      * at random locations (context->vs->lconstf.ranges),
512      * to have efficient WC. Thus for this case we really want
513      * that intermediate buffer. */
514 
515     u_upload_alloc(context->pipe->const_uploader,
516                   0,
517                   cb.buffer_size,
518                   256, /* Be conservative about alignment */
519                   &(cb.buffer_offset),
520                   &(cb.buffer),
521                   (void**)&upload_ptr);
522 
523     assert(cb.buffer && upload_ptr);
524 
525     if (!context->cso_shader.vs_const_ranges) {
526         memcpy(upload_ptr, cb.user_buffer, cb.buffer_size);
527     } else {
528         unsigned i = 0;
529         unsigned offset = 0;
530         while (context->cso_shader.vs_const_ranges[i*2+1] != 0) {
531             memcpy(upload_ptr+offset,
532                    &((float*)cb.user_buffer)[4*context->cso_shader.vs_const_ranges[i*2]],
533                    context->cso_shader.vs_const_ranges[i*2+1] * sizeof(float[4]));
534             offset += context->cso_shader.vs_const_ranges[i*2+1] * sizeof(float[4]);
535             i++;
536         }
537     }
538 
539     u_upload_unmap(context->pipe->const_uploader);
540     cb.user_buffer = NULL;
541 
542     /* Free previous resource */
543     pipe_resource_reference(&context->pipe_data.cb_vs.buffer, NULL);
544 
545     context->pipe_data.cb_vs = cb;
546     context->changed.vs_const_f = 0;
547 
548     context->changed.group &= ~NINE_STATE_VS_CONST;
549     context->commit |= NINE_STATE_COMMIT_CONST_VS;
550 }
551 
552 static void
prepare_ps_constants_userbuf(struct NineDevice9 * device)553 prepare_ps_constants_userbuf(struct NineDevice9 *device)
554 {
555     struct nine_context *context = &device->context;
556     uint8_t *upload_ptr = NULL;
557     struct pipe_constant_buffer cb;
558     cb.buffer = NULL;
559     cb.buffer_offset = 0;
560     cb.buffer_size = context->cso_shader.ps_const_used_size;
561     cb.user_buffer = context->ps_const_f;
562 
563     if (context->changed.ps_const_i) {
564         int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
565         memcpy(idst, context->ps_const_i, sizeof(context->ps_const_i));
566         context->changed.ps_const_i = 0;
567     }
568     if (context->changed.ps_const_b) {
569         int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
570         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
571         memcpy(bdst, context->ps_const_b, sizeof(context->ps_const_b));
572         context->changed.ps_const_b = 0;
573     }
574 
575     /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
576     if (context->ps->bumpenvmat_needed) {
577         memcpy(context->ps_lconstf_temp, cb.user_buffer, 8 * sizeof(float[4]));
578         memcpy(&context->ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars));
579 
580         cb.user_buffer = context->ps_lconstf_temp;
581     }
582 
583     if (context->ps->byte_code.version < 0x30 &&
584         context->rs[D3DRS_FOGENABLE]) {
585         float *dst = &context->ps_lconstf_temp[4 * 32];
586         if (cb.user_buffer != context->ps_lconstf_temp) {
587             memcpy(context->ps_lconstf_temp, cb.user_buffer, 32 * sizeof(float[4]));
588             cb.user_buffer = context->ps_lconstf_temp;
589         }
590 
591         d3dcolor_to_rgba(dst, context->rs[D3DRS_FOGCOLOR]);
592         if (context->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) {
593             dst[4] = asfloat(context->rs[D3DRS_FOGEND]);
594             dst[5] = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
595         } else if (context->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) {
596             dst[4] = asfloat(context->rs[D3DRS_FOGDENSITY]);
597         }
598     }
599 
600     if (!cb.buffer_size)
601         return;
602 
603     u_upload_alloc(context->pipe->const_uploader,
604                   0,
605                   cb.buffer_size,
606                   256, /* Be conservative about alignment */
607                   &(cb.buffer_offset),
608                   &(cb.buffer),
609                   (void**)&upload_ptr);
610 
611     assert(cb.buffer && upload_ptr);
612 
613     if (!context->cso_shader.ps_const_ranges) {
614         memcpy(upload_ptr, cb.user_buffer, cb.buffer_size);
615     } else {
616         unsigned i = 0;
617         unsigned offset = 0;
618         while (context->cso_shader.ps_const_ranges[i*2+1] != 0) {
619             memcpy(upload_ptr+offset,
620                    &((float*)cb.user_buffer)[4*context->cso_shader.ps_const_ranges[i*2]],
621                    context->cso_shader.ps_const_ranges[i*2+1] * sizeof(float[4]));
622             offset += context->cso_shader.ps_const_ranges[i*2+1] * sizeof(float[4]);
623             i++;
624         }
625     }
626 
627     u_upload_unmap(context->pipe->const_uploader);
628     cb.user_buffer = NULL;
629 
630     /* Free previous resource */
631     pipe_resource_reference(&context->pipe_data.cb_ps.buffer, NULL);
632 
633     context->pipe_data.cb_ps = cb;
634     context->changed.ps_const_f = 0;
635 
636     context->changed.group &= ~NINE_STATE_PS_CONST;
637     context->commit |= NINE_STATE_COMMIT_CONST_PS;
638 }
639 
640 static inline uint32_t
prepare_vs(struct NineDevice9 * device,uint8_t shader_changed)641 prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
642 {
643     struct nine_context *context = &device->context;
644     struct NineVertexShader9 *vs = context->vs;
645     uint32_t changed_group = 0;
646     int has_key_changed = 0;
647 
648     if (likely(context->programmable_vs))
649         has_key_changed = NineVertexShader9_UpdateKey(vs, device);
650 
651     if (!shader_changed && !has_key_changed)
652         return 0;
653 
654     /* likely because we dislike FF */
655     if (likely(context->programmable_vs)) {
656         context->cso_shader.vs = NineVertexShader9_GetVariant(vs,
657                                                               &context->cso_shader.vs_const_ranges,
658                                                               &context->cso_shader.vs_const_used_size);
659     } else {
660         vs = device->ff.vs;
661         context->cso_shader.vs = vs->ff_cso;
662     }
663 
664     if (context->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
665         context->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
666         changed_group |= NINE_STATE_RASTERIZER;
667     }
668 
669     if ((context->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
670         /* Bound dummy sampler. */
671         changed_group |= NINE_STATE_SAMPLER;
672 
673     context->commit |= NINE_STATE_COMMIT_VS;
674     return changed_group;
675 }
676 
677 static inline uint32_t
prepare_ps(struct NineDevice9 * device,uint8_t shader_changed)678 prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
679 {
680     struct nine_context *context = &device->context;
681     struct NinePixelShader9 *ps = context->ps;
682     uint32_t changed_group = 0;
683     int has_key_changed = 0;
684 
685     if (likely(ps))
686         has_key_changed = NinePixelShader9_UpdateKey(ps, context);
687 
688     if (!shader_changed && !has_key_changed)
689         return 0;
690 
691     if (likely(ps)) {
692         context->cso_shader.ps = NinePixelShader9_GetVariant(ps,
693                                                              &context->cso_shader.ps_const_ranges,
694                                                              &context->cso_shader.ps_const_used_size);
695     } else {
696         ps = device->ff.ps;
697         context->cso_shader.ps = ps->ff_cso;
698     }
699 
700     if ((context->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
701         /* Bound dummy sampler. */
702         changed_group |= NINE_STATE_SAMPLER;
703 
704     context->commit |= NINE_STATE_COMMIT_PS;
705     return changed_group;
706 }
707 
708 /* State preparation incremental */
709 
710 /* State preparation + State commit */
711 
712 static void
update_framebuffer(struct NineDevice9 * device,bool is_clear)713 update_framebuffer(struct NineDevice9 *device, bool is_clear)
714 {
715     struct nine_context *context = &device->context;
716     struct pipe_context *pipe = context->pipe;
717     struct pipe_framebuffer_state *fb = &context->pipe_data.fb;
718     unsigned i;
719     struct NineSurface9 *rt0 = context->rt[0];
720     unsigned w = rt0->desc.Width;
721     unsigned h = rt0->desc.Height;
722     unsigned nr_samples = rt0->base.info.nr_samples;
723     unsigned ps_mask = context->ps ? context->ps->rt_mask : 1;
724     unsigned mask = is_clear ? 0xf : ps_mask;
725     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
726 
727     DBG("\n");
728 
729     context->rt_mask = 0x0;
730     fb->nr_cbufs = 0;
731 
732     /* all render targets must have the same size and the depth buffer must be
733      * bigger. Multisample has to match, according to spec. But some apps do
734      * things wrong there, and no error is returned. The behaviour they get
735      * apparently is that depth buffer is disabled if it doesn't match.
736      * Surely the same for render targets. */
737 
738     /* Special case: D3DFMT_NULL is used to bound no real render target,
739      * but render to depth buffer. We have to not take into account the render
740      * target info. TODO: know what should happen when there are several render targers
741      * and the first one is D3DFMT_NULL */
742     if (rt0->desc.Format == D3DFMT_NULL && context->ds) {
743         w = context->ds->desc.Width;
744         h = context->ds->desc.Height;
745         nr_samples = context->ds->base.info.nr_samples;
746     }
747 
748     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
749         struct NineSurface9 *rt = context->rt[i];
750 
751         if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
752             rt->desc.Width == w && rt->desc.Height == h &&
753             rt->base.info.nr_samples == nr_samples) {
754             fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
755             context->rt_mask |= 1 << i;
756             fb->nr_cbufs = i + 1;
757         } else {
758             /* Color outputs must match RT slot,
759              * drivers will have to handle NULL entries for GL, too.
760              */
761             fb->cbufs[i] = NULL;
762         }
763     }
764 
765     if (context->ds && context->ds->desc.Width >= w &&
766         context->ds->desc.Height >= h &&
767         context->ds->base.info.nr_samples == nr_samples) {
768         fb->zsbuf = NineSurface9_GetSurface(context->ds, 0);
769     } else {
770         fb->zsbuf = NULL;
771     }
772 
773     fb->width = w;
774     fb->height = h;
775 
776     pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
777 
778     if (is_clear && context->rt_mask == ps_mask)
779         context->changed.group &= ~NINE_STATE_FB;
780 }
781 
782 static void
update_viewport(struct NineDevice9 * device)783 update_viewport(struct NineDevice9 *device)
784 {
785     struct nine_context *context = &device->context;
786     const D3DVIEWPORT9 *vport = &context->viewport;
787     struct pipe_viewport_state pvport;
788 
789     /* D3D coordinates are:
790      * -1 .. +1 for X,Y and
791      *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
792      */
793     pvport.scale[0] = (float)vport->Width * 0.5f;
794     pvport.scale[1] = (float)vport->Height * -0.5f;
795     pvport.scale[2] = vport->MaxZ - vport->MinZ;
796     pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
797     pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
798     pvport.translate[2] = vport->MinZ;
799     pvport.swizzle_x = PIPE_VIEWPORT_SWIZZLE_POSITIVE_X;
800     pvport.swizzle_y = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Y;
801     pvport.swizzle_z = PIPE_VIEWPORT_SWIZZLE_POSITIVE_Z;
802     pvport.swizzle_w = PIPE_VIEWPORT_SWIZZLE_POSITIVE_W;
803 
804     /* We found R600 and SI cards have some imprecision
805      * on the barycentric coordinates used for interpolation.
806      * Some shaders rely on having something precise.
807      * We found that the proprietary driver has the imprecision issue,
808      * except when the render target width and height are powers of two.
809      * It is using some sort of workaround for these cases
810      * which covers likely all the cases the applications rely
811      * on something precise.
812      * We haven't found the workaround, but it seems like it's better
813      * for applications if the imprecision is biased towards infinity
814      * instead of -infinity (which is what measured). So shift slightly
815      * the viewport: not enough to change rasterization result (in particular
816      * for multisampling), but enough to make the imprecision biased
817      * towards infinity. We do this shift only if render target width and
818      * height are powers of two.
819      * Solves 'red shadows' bug on UE3 games.
820      */
821     if (device->driver_bugs.buggy_barycentrics &&
822         ((vport->Width & (vport->Width-1)) == 0) &&
823         ((vport->Height & (vport->Height-1)) == 0)) {
824         pvport.translate[0] -= 1.0f / 128.0f;
825         pvport.translate[1] -= 1.0f / 128.0f;
826     }
827 
828     cso_set_viewport(context->cso, &pvport);
829 }
830 
831 /* Loop through VS inputs and pick the vertex elements with the declared
832  * usage from the vertex declaration, then insert the instance divisor from
833  * the stream source frequency setting.
834  */
835 static void
update_vertex_elements(struct NineDevice9 * device)836 update_vertex_elements(struct NineDevice9 *device)
837 {
838     struct nine_context *context = &device->context;
839     const struct NineVertexDeclaration9 *vdecl = device->context.vdecl;
840     const struct NineVertexShader9 *vs;
841     unsigned n, b, i;
842     int index;
843     char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
844     uint16_t used_streams = 0;
845     int dummy_vbo_stream = -1;
846     BOOL need_dummy_vbo = FALSE;
847     struct cso_velems_state ve;
848 
849     context->stream_usage_mask = 0;
850     memset(vdecl_index_map, -1, 16);
851     vs = context->programmable_vs ? context->vs : device->ff.vs;
852 
853     if (vdecl) {
854         for (n = 0; n < vs->num_inputs; ++n) {
855             DBG("looking up input %u (usage %u) from vdecl(%p)\n",
856                 n, vs->input_map[n].ndecl, vdecl);
857 
858             for (i = 0; i < vdecl->nelems; i++) {
859                 if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
860                     vdecl_index_map[n] = i;
861                     used_streams |= BITFIELD_BIT(vdecl->elems[i].vertex_buffer_index);
862                     break;
863                 }
864             }
865             if (vdecl_index_map[n] < 0)
866                 need_dummy_vbo = TRUE;
867         }
868     } else {
869         /* No vertex declaration. Likely will never happen in practice,
870          * but we need not crash on this */
871         need_dummy_vbo = TRUE;
872     }
873 
874     if (need_dummy_vbo) {
875         u_foreach_bit(bit, BITFIELD_MASK(device->caps.MaxStreams) & ~used_streams) {
876                 dummy_vbo_stream = bit;
877                 break;
878         }
879     }
880     /* there are less vertex shader inputs than stream slots,
881      * so if we need a slot for the dummy vbo, we should have found one */
882     assert (!need_dummy_vbo || dummy_vbo_stream != -1);
883 
884     for (n = 0; n < vs->num_inputs; ++n) {
885         index = vdecl_index_map[n];
886         if (index >= 0) {
887             ve.velems[n] = vdecl->elems[index];
888             b = ve.velems[n].vertex_buffer_index;
889             context->stream_usage_mask |= 1 << b;
890             /* XXX wine just uses 1 here: */
891             if (context->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
892                 ve.velems[n].instance_divisor = context->stream_freq[b] & 0x7FFFFF;
893         } else {
894             /* if the vertex declaration is incomplete compared to what the
895              * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
896              * This is not precised by the spec, but is the behaviour
897              * tested on win */
898             ve.velems[n].vertex_buffer_index = dummy_vbo_stream;
899             ve.velems[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
900             ve.velems[n].src_offset = 0;
901             ve.velems[n].instance_divisor = 0;
902             ve.velems[n].dual_slot = false;
903         }
904     }
905 
906     if (context->dummy_vbo_bound_at != dummy_vbo_stream) {
907         if (context->dummy_vbo_bound_at >= 0)
908             context->changed.vtxbuf |= 1 << context->dummy_vbo_bound_at;
909         if (dummy_vbo_stream >= 0) {
910             context->changed.vtxbuf |= 1 << dummy_vbo_stream;
911             context->vbo_bound_done = FALSE;
912         }
913         context->dummy_vbo_bound_at = dummy_vbo_stream;
914     }
915 
916     ve.count = vs->num_inputs;
917     cso_set_vertex_elements(context->cso, &ve);
918 }
919 
920 static void
update_vertex_buffers(struct NineDevice9 * device)921 update_vertex_buffers(struct NineDevice9 *device)
922 {
923     struct nine_context *context = &device->context;
924     struct pipe_context *pipe = context->pipe;
925     struct pipe_vertex_buffer dummy_vtxbuf;
926     uint32_t mask = context->changed.vtxbuf;
927     unsigned i;
928 
929     DBG("mask=%x\n", mask);
930 
931     if (context->dummy_vbo_bound_at >= 0) {
932         if (!context->vbo_bound_done) {
933             dummy_vtxbuf.buffer.resource = device->dummy_vbo;
934             dummy_vtxbuf.stride = 0;
935             dummy_vtxbuf.is_user_buffer = false;
936             dummy_vtxbuf.buffer_offset = 0;
937             pipe->set_vertex_buffers(pipe, context->dummy_vbo_bound_at,
938                                      1, 0, false, &dummy_vtxbuf);
939             context->vbo_bound_done = TRUE;
940         }
941         mask &= ~(1 << context->dummy_vbo_bound_at);
942     }
943 
944     for (i = 0; mask; mask >>= 1, ++i) {
945         if (mask & 1) {
946             if (context->vtxbuf[i].buffer.resource)
947                 pipe->set_vertex_buffers(pipe, i, 1, 0, false, &context->vtxbuf[i]);
948             else
949                 pipe->set_vertex_buffers(pipe, i, 0, 1, false, NULL);
950         }
951     }
952 
953     context->changed.vtxbuf = 0;
954 }
955 
956 static inline boolean
update_sampler_derived(struct nine_context * context,unsigned s)957 update_sampler_derived(struct nine_context *context, unsigned s)
958 {
959     boolean changed = FALSE;
960 
961     if (context->samp[s][NINED3DSAMP_SHADOW] != context->texture[s].shadow) {
962         changed = TRUE;
963         context->samp[s][NINED3DSAMP_SHADOW] = context->texture[s].shadow;
964     }
965 
966     if (context->samp[s][NINED3DSAMP_CUBETEX] !=
967         (context->texture[s].type == D3DRTYPE_CUBETEXTURE)) {
968         changed = TRUE;
969         context->samp[s][NINED3DSAMP_CUBETEX] =
970                 context->texture[s].type == D3DRTYPE_CUBETEXTURE;
971     }
972 
973     if (context->samp[s][D3DSAMP_MIPFILTER] != D3DTEXF_NONE) {
974         int lod = context->samp[s][D3DSAMP_MAXMIPLEVEL] - context->texture[s].lod;
975         if (lod < 0)
976             lod = 0;
977         if (context->samp[s][NINED3DSAMP_MINLOD] != lod) {
978             changed = TRUE;
979             context->samp[s][NINED3DSAMP_MINLOD] = lod;
980         }
981     } else {
982         context->changed.sampler[s] &= ~0x300; /* lod changes irrelevant */
983     }
984 
985     return changed;
986 }
987 
988 /* TODO: add sRGB override to pipe_sampler_state ? */
989 static void
update_textures_and_samplers(struct NineDevice9 * device)990 update_textures_and_samplers(struct NineDevice9 *device)
991 {
992     struct nine_context *context = &device->context;
993     struct pipe_context *pipe = context->pipe;
994     struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
995     unsigned num_textures = 0;
996     boolean commit_samplers;
997     uint16_t sampler_mask = context->ps ? context->ps->sampler_mask :
998                             device->ff.ps->sampler_mask;
999 
1000     commit_samplers = FALSE;
1001     const uint16_t ps_mask = sampler_mask | context->enabled_samplers_mask_ps;
1002     context->bound_samplers_mask_ps = ps_mask;
1003     num_textures = util_last_bit(ps_mask) + 1;
1004     /* iterate over the enabled samplers */
1005     u_foreach_bit(i, context->enabled_samplers_mask_ps) {
1006         const unsigned s = NINE_SAMPLER_PS(i);
1007         int sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
1008 
1009         view[i] = context->texture[s].view[sRGB];
1010 
1011         if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
1012             context->changed.sampler[s] = 0;
1013             commit_samplers = TRUE;
1014             nine_convert_sampler_state(context->cso, s, context->samp[s]);
1015         }
1016     }
1017     /* iterate over the dummy samplers */
1018     u_foreach_bit(i, sampler_mask & ~context->enabled_samplers_mask_ps) {
1019         const unsigned s = NINE_SAMPLER_PS(i);
1020         /* Bind dummy sampler. We do not bind dummy sampler when
1021          * it is not needed because it could add overhead. The
1022          * dummy sampler should have r=g=b=0 and a=1. We do not
1023          * unbind dummy sampler directly when they are not needed
1024          * anymore, but they're going to be removed as long as texture
1025          * or sampler states are changed. */
1026         view[i] = device->dummy_sampler_view;
1027 
1028         cso_single_sampler(context->cso, PIPE_SHADER_FRAGMENT,
1029                            s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state);
1030 
1031         commit_samplers = TRUE;
1032         context->changed.sampler[s] = ~0;
1033     }
1034     /* fill in unused samplers */
1035     u_foreach_bit(i, BITFIELD_MASK(num_textures) & ~ps_mask)
1036        view[i] = NULL;
1037 
1038     pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, num_textures,
1039                             num_textures < context->enabled_sampler_count_ps ? context->enabled_sampler_count_ps - num_textures : 0,
1040                             false, view);
1041     context->enabled_sampler_count_ps = num_textures;
1042 
1043     if (commit_samplers)
1044         cso_single_sampler_done(context->cso, PIPE_SHADER_FRAGMENT);
1045 
1046     commit_samplers = FALSE;
1047     sampler_mask = context->programmable_vs ? context->vs->sampler_mask : 0;
1048     const uint16_t vs_mask = sampler_mask | context->enabled_samplers_mask_vs;
1049     context->bound_samplers_mask_vs = vs_mask;
1050     num_textures = util_last_bit(vs_mask) + 1;
1051     u_foreach_bit(i, context->enabled_samplers_mask_vs) {
1052         const unsigned s = NINE_SAMPLER_VS(i);
1053         int sRGB = context->samp[s][D3DSAMP_SRGBTEXTURE] ? 1 : 0;
1054 
1055         view[i] = context->texture[s].view[sRGB];
1056 
1057         if (update_sampler_derived(context, s) || (context->changed.sampler[s] & 0x05fe)) {
1058             context->changed.sampler[s] = 0;
1059             commit_samplers = TRUE;
1060             nine_convert_sampler_state(context->cso, s, context->samp[s]);
1061         }
1062     }
1063     u_foreach_bit(i, sampler_mask & ~context->enabled_samplers_mask_vs) {
1064         const unsigned s = NINE_SAMPLER_VS(i);
1065         /* Bind dummy sampler. We do not bind dummy sampler when
1066          * it is not needed because it could add overhead. The
1067          * dummy sampler should have r=g=b=0 and a=1. We do not
1068          * unbind dummy sampler directly when they are not needed
1069          * anymore, but they're going to be removed as long as texture
1070          * or sampler states are changed. */
1071         view[i] = device->dummy_sampler_view;
1072 
1073         cso_single_sampler(context->cso, PIPE_SHADER_VERTEX,
1074                            s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state);
1075 
1076         commit_samplers = TRUE;
1077         context->changed.sampler[s] = ~0;
1078     }
1079     /* fill in unused samplers */
1080     u_foreach_bit(i, BITFIELD_MASK(num_textures) & ~vs_mask)
1081        view[i] = NULL;
1082 
1083     pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, num_textures,
1084                             num_textures < context->enabled_sampler_count_vs ? context->enabled_sampler_count_vs - num_textures : 0,
1085                             false, view);
1086     context->enabled_sampler_count_vs = num_textures;
1087 
1088     if (commit_samplers)
1089         cso_single_sampler_done(context->cso, PIPE_SHADER_VERTEX);
1090 }
1091 
1092 /* State commit only */
1093 
1094 static inline void
commit_blend(struct NineDevice9 * device)1095 commit_blend(struct NineDevice9 *device)
1096 {
1097     struct nine_context *context = &device->context;
1098 
1099     cso_set_blend(context->cso, &context->pipe_data.blend);
1100 }
1101 
1102 static inline void
commit_dsa(struct NineDevice9 * device)1103 commit_dsa(struct NineDevice9 *device)
1104 {
1105     struct nine_context *context = &device->context;
1106 
1107     cso_set_depth_stencil_alpha(context->cso, &context->pipe_data.dsa);
1108 }
1109 
1110 static inline void
commit_scissor(struct NineDevice9 * device)1111 commit_scissor(struct NineDevice9 *device)
1112 {
1113     struct nine_context *context = &device->context;
1114     struct pipe_context *pipe = context->pipe;
1115 
1116     pipe->set_scissor_states(pipe, 0, 1, &context->scissor);
1117 }
1118 
1119 static inline void
commit_rasterizer(struct NineDevice9 * device)1120 commit_rasterizer(struct NineDevice9 *device)
1121 {
1122     struct nine_context *context = &device->context;
1123 
1124     cso_set_rasterizer(context->cso, &context->pipe_data.rast);
1125 }
1126 
1127 static inline void
commit_vs_constants(struct NineDevice9 * device)1128 commit_vs_constants(struct NineDevice9 *device)
1129 {
1130     struct nine_context *context = &device->context;
1131     struct pipe_context *pipe = context->pipe;
1132 
1133     if (unlikely(!context->programmable_vs))
1134         pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &context->pipe_data.cb_vs_ff);
1135     else {
1136         if (context->swvp) {
1137             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &context->pipe_data.cb0_swvp);
1138             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, false, &context->pipe_data.cb1_swvp);
1139             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, false, &context->pipe_data.cb2_swvp);
1140             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, false, &context->pipe_data.cb3_swvp);
1141         } else {
1142             pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, false, &context->pipe_data.cb_vs);
1143         }
1144     }
1145 }
1146 
1147 static inline void
commit_ps_constants(struct NineDevice9 * device)1148 commit_ps_constants(struct NineDevice9 *device)
1149 {
1150     struct nine_context *context = &device->context;
1151     struct pipe_context *pipe = context->pipe;
1152 
1153     if (unlikely(!context->ps))
1154         pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &context->pipe_data.cb_ps_ff);
1155     else
1156         pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, false, &context->pipe_data.cb_ps);
1157 }
1158 
1159 static inline void
commit_vs(struct NineDevice9 * device)1160 commit_vs(struct NineDevice9 *device)
1161 {
1162     struct nine_context *context = &device->context;
1163 
1164     context->pipe->bind_vs_state(context->pipe, context->cso_shader.vs);
1165 }
1166 
1167 
1168 static inline void
commit_ps(struct NineDevice9 * device)1169 commit_ps(struct NineDevice9 *device)
1170 {
1171     struct nine_context *context = &device->context;
1172 
1173     context->pipe->bind_fs_state(context->pipe, context->cso_shader.ps);
1174 }
1175 /* State Update */
1176 
1177 #define NINE_STATE_SHADER_CHANGE_VS \
1178    (NINE_STATE_VS |         \
1179     NINE_STATE_TEXTURE |    \
1180     NINE_STATE_VS_PARAMS_MISC | \
1181     NINE_STATE_SWVP)
1182 
1183 #define NINE_STATE_SHADER_CHANGE_PS \
1184    (NINE_STATE_PS |         \
1185     NINE_STATE_TEXTURE |    \
1186     NINE_STATE_PS_PARAMS_MISC)
1187 
1188 #define NINE_STATE_FREQUENT \
1189    (NINE_STATE_RASTERIZER | \
1190     NINE_STATE_TEXTURE |    \
1191     NINE_STATE_SAMPLER |    \
1192     NINE_STATE_VS_CONST |   \
1193     NINE_STATE_PS_CONST |   \
1194     NINE_STATE_MULTISAMPLE)
1195 
1196 #define NINE_STATE_COMMON \
1197    (NINE_STATE_FB |       \
1198     NINE_STATE_BLEND |    \
1199     NINE_STATE_DSA |      \
1200     NINE_STATE_VIEWPORT | \
1201     NINE_STATE_VDECL |    \
1202     NINE_STATE_IDXBUF |   \
1203     NINE_STATE_STREAMFREQ)
1204 
1205 #define NINE_STATE_RARE      \
1206    (NINE_STATE_SCISSOR |     \
1207     NINE_STATE_BLEND_COLOR | \
1208     NINE_STATE_STENCIL_REF | \
1209     NINE_STATE_SAMPLE_MASK)
1210 
1211 static void
nine_update_state(struct NineDevice9 * device)1212 nine_update_state(struct NineDevice9 *device)
1213 {
1214     struct nine_context *context = &device->context;
1215     struct pipe_context *pipe = context->pipe;
1216     uint32_t group;
1217 
1218     DBG("changed state groups: %x\n", context->changed.group);
1219 
1220     /* NOTE: We may want to use the cso cache for everything, or let
1221      * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't
1222      * have to care about state being clobbered here and could merge this back
1223      * into update_textures. Except, we also need to re-validate textures that
1224      * may be dirty anyway, even if no texture bindings changed.
1225      */
1226 
1227     /* ff_update may change VS/PS dirty bits */
1228     if (unlikely(!context->programmable_vs || !context->ps))
1229         nine_ff_update(device);
1230     group = context->changed.group;
1231 
1232     if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) {
1233         if (group & NINE_STATE_SHADER_CHANGE_VS)
1234             group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/
1235         if (group & NINE_STATE_SHADER_CHANGE_PS)
1236             group |= prepare_ps(device, (group & NINE_STATE_PS) != 0);
1237     }
1238 
1239     if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
1240         if (group & NINE_STATE_FB)
1241             update_framebuffer(device, FALSE);
1242         if (group & NINE_STATE_BLEND)
1243             prepare_blend(device);
1244         if (group & NINE_STATE_DSA)
1245             prepare_dsa(device);
1246         if (group & NINE_STATE_VIEWPORT)
1247             update_viewport(device);
1248         if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
1249             update_vertex_elements(device);
1250     }
1251 
1252     if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
1253         if (group & NINE_STATE_MULTISAMPLE)
1254             group |= check_multisample(device);
1255         if (group & NINE_STATE_RASTERIZER)
1256             prepare_rasterizer(device);
1257         if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
1258             update_textures_and_samplers(device);
1259         if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && context->programmable_vs)
1260             prepare_vs_constants_userbuf(device);
1261         if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && context->ps)
1262             prepare_ps_constants_userbuf(device);
1263     }
1264 
1265     if (context->changed.vtxbuf)
1266         update_vertex_buffers(device);
1267 
1268     if (context->commit & NINE_STATE_COMMIT_BLEND)
1269         commit_blend(device);
1270     if (context->commit & NINE_STATE_COMMIT_DSA)
1271         commit_dsa(device);
1272     if (context->commit & NINE_STATE_COMMIT_RASTERIZER)
1273         commit_rasterizer(device);
1274     if (context->commit & NINE_STATE_COMMIT_CONST_VS)
1275         commit_vs_constants(device);
1276     if (context->commit & NINE_STATE_COMMIT_CONST_PS)
1277         commit_ps_constants(device);
1278     if (context->commit & NINE_STATE_COMMIT_VS)
1279         commit_vs(device);
1280     if (context->commit & NINE_STATE_COMMIT_PS)
1281         commit_ps(device);
1282 
1283     context->commit = 0;
1284 
1285     if (unlikely(context->changed.ucp)) {
1286         pipe->set_clip_state(pipe, &context->clip);
1287         context->changed.ucp = FALSE;
1288     }
1289 
1290     if (unlikely(group & NINE_STATE_RARE)) {
1291         if (group & NINE_STATE_SCISSOR)
1292             commit_scissor(device);
1293         if (group & NINE_STATE_BLEND_COLOR) {
1294             struct pipe_blend_color color;
1295             d3dcolor_to_rgba(&color.color[0], context->rs[D3DRS_BLENDFACTOR]);
1296             pipe->set_blend_color(pipe, &color);
1297         }
1298         if (group & NINE_STATE_SAMPLE_MASK) {
1299             if (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) {
1300                 pipe->set_sample_mask(pipe, ~0);
1301             } else {
1302                 pipe->set_sample_mask(pipe, context->rs[D3DRS_MULTISAMPLEMASK]);
1303             }
1304         }
1305         if (group & NINE_STATE_STENCIL_REF) {
1306             struct pipe_stencil_ref ref;
1307             ref.ref_value[0] = context->rs[D3DRS_STENCILREF];
1308             ref.ref_value[1] = ref.ref_value[0];
1309             pipe->set_stencil_ref(pipe, ref);
1310         }
1311     }
1312 
1313     context->changed.group &=
1314         (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST);
1315 
1316     DBG("finished\n");
1317 }
1318 
1319 #define RESZ_CODE 0x7fa05000
1320 
1321 static void
NineDevice9_ResolveZ(struct NineDevice9 * device)1322 NineDevice9_ResolveZ( struct NineDevice9 *device )
1323 {
1324     struct nine_context *context = &device->context;
1325     const struct util_format_description *desc;
1326     struct NineSurface9 *source = context->ds;
1327     struct pipe_resource *src, *dst;
1328     struct pipe_blit_info blit;
1329 
1330     DBG("RESZ resolve\n");
1331 
1332     if (!source || !context->texture[0].enabled ||
1333         context->texture[0].type != D3DRTYPE_TEXTURE)
1334         return;
1335 
1336     src = source->base.resource;
1337     dst = context->texture[0].resource;
1338 
1339     if (!src || !dst)
1340         return;
1341 
1342     /* check dst is depth format. we know already for src */
1343     desc = util_format_description(dst->format);
1344     if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
1345         return;
1346 
1347     memset(&blit, 0, sizeof(blit));
1348     blit.src.resource = src;
1349     blit.src.level = 0;
1350     blit.src.format = src->format;
1351     blit.src.box.z = 0;
1352     blit.src.box.depth = 1;
1353     blit.src.box.x = 0;
1354     blit.src.box.y = 0;
1355     blit.src.box.width = src->width0;
1356     blit.src.box.height = src->height0;
1357 
1358     blit.dst.resource = dst;
1359     blit.dst.level = 0;
1360     blit.dst.format = dst->format;
1361     blit.dst.box.z = 0;
1362     blit.dst.box.depth = 1;
1363     blit.dst.box.x = 0;
1364     blit.dst.box.y = 0;
1365     blit.dst.box.width = dst->width0;
1366     blit.dst.box.height = dst->height0;
1367 
1368     blit.mask = PIPE_MASK_ZS;
1369     blit.filter = PIPE_TEX_FILTER_NEAREST;
1370     blit.scissor_enable = FALSE;
1371 
1372     context->pipe->blit(context->pipe, &blit);
1373 }
1374 
1375 #define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
1376 #define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
1377 #define FETCH4_ENABLE              MAKEFOURCC('G', 'E', 'T', '4')
1378 #define FETCH4_DISABLE             MAKEFOURCC('G', 'E', 'T', '1')
1379 
1380 /* Nine_context functions.
1381  * Serialized through CSMT macros.
1382  */
1383 
1384 static void
1385 nine_context_set_texture_apply(struct NineDevice9 *device,
1386                                DWORD stage,
1387                                DWORD fetch4_shadow_enabled,
1388                                DWORD lod,
1389                                D3DRESOURCETYPE type,
1390                                uint8_t pstype,
1391                                struct pipe_resource *res,
1392                                struct pipe_sampler_view *view0,
1393                                struct pipe_sampler_view *view1);
1394 
1395 static void
1396 nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
1397                                                      UINT StartRegister,
1398                                                      const int *pConstantData,
1399                                                      unsigned pConstantData_size,
1400                                                      UINT Vector4iCount);
1401 
CSMT_ITEM_NO_WAIT(nine_context_set_render_state,ARG_VAL (D3DRENDERSTATETYPE,State),ARG_VAL (DWORD,Value))1402 CSMT_ITEM_NO_WAIT(nine_context_set_render_state,
1403                   ARG_VAL(D3DRENDERSTATETYPE, State),
1404                   ARG_VAL(DWORD, Value))
1405 {
1406     struct nine_context *context = &device->context;
1407 
1408     /* Amd hacks (equivalent to GL extensions) */
1409     if (unlikely(State == D3DRS_POINTSIZE)) {
1410         if (Value == RESZ_CODE) {
1411             NineDevice9_ResolveZ(device);
1412             return;
1413         }
1414 
1415         /* NINED3DRS_ALPHACOVERAGE:
1416          * bit 0: NVIDIA alpha to coverage
1417          * bit 1: NVIDIA ATOC state active
1418          * bit 2: AMD alpha to coverage
1419          * These need to be separate else the set of states to
1420          * disable NVIDIA alpha to coverage can disable the AMD one */
1421         if (Value == ALPHA_TO_COVERAGE_ENABLE ||
1422             Value == ALPHA_TO_COVERAGE_DISABLE) {
1423             context->rs[NINED3DRS_ALPHACOVERAGE] &= 3;
1424             context->rs[NINED3DRS_ALPHACOVERAGE] |= (Value == ALPHA_TO_COVERAGE_ENABLE) ? 4 : 0;
1425             context->changed.group |= NINE_STATE_BLEND;
1426             return;
1427         }
1428     }
1429 
1430     /* NV hack */
1431     if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
1432         if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && context->rs[NINED3DRS_ALPHACOVERAGE] & 3)) {
1433             context->rs[NINED3DRS_ALPHACOVERAGE] &= 4;
1434             context->rs[NINED3DRS_ALPHACOVERAGE] |=
1435                 ((Value == D3DFMT_ATOC) ? 3 : 0) & (context->rs[D3DRS_ALPHATESTENABLE] ? 3 : 2);
1436             context->changed.group |= NINE_STATE_BLEND;
1437             return;
1438         }
1439     }
1440     if (unlikely(State == D3DRS_ALPHATESTENABLE && (context->rs[NINED3DRS_ALPHACOVERAGE] & 2))) {
1441         DWORD alphacoverage_prev = context->rs[NINED3DRS_ALPHACOVERAGE];
1442         context->rs[NINED3DRS_ALPHACOVERAGE] &= 6;
1443         context->rs[NINED3DRS_ALPHACOVERAGE] |= (context->rs[D3DRS_ALPHATESTENABLE] ? 1 : 0);
1444         if (context->rs[NINED3DRS_ALPHACOVERAGE] != alphacoverage_prev)
1445             context->changed.group |= NINE_STATE_BLEND;
1446     }
1447 
1448     context->rs[State] = nine_fix_render_state_value(State, Value);
1449     context->changed.group |= nine_render_state_group[State];
1450 }
1451 
CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,ARG_VAL (DWORD,stage),ARG_VAL (DWORD,fetch4_shadow_enabled),ARG_VAL (DWORD,lod),ARG_VAL (D3DRESOURCETYPE,type),ARG_VAL (uint8_t,pstype),ARG_BIND_RES (struct pipe_resource,res),ARG_BIND_VIEW (struct pipe_sampler_view,view0),ARG_BIND_VIEW (struct pipe_sampler_view,view1))1452 CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,
1453                   ARG_VAL(DWORD, stage),
1454                   ARG_VAL(DWORD, fetch4_shadow_enabled),
1455                   ARG_VAL(DWORD, lod),
1456                   ARG_VAL(D3DRESOURCETYPE, type),
1457                   ARG_VAL(uint8_t, pstype),
1458                   ARG_BIND_RES(struct pipe_resource, res),
1459                   ARG_BIND_VIEW(struct pipe_sampler_view, view0),
1460                   ARG_BIND_VIEW(struct pipe_sampler_view, view1))
1461 {
1462     struct nine_context *context = &device->context;
1463     uint enabled = fetch4_shadow_enabled & 1;
1464     uint shadow = (fetch4_shadow_enabled >> 1) & 1;
1465     uint fetch4_compatible = (fetch4_shadow_enabled >> 2) & 1;
1466 
1467     context->texture[stage].enabled = enabled;
1468     if (enabled) {
1469        if (stage < NINE_MAX_SAMPLERS_PS)
1470           context->enabled_samplers_mask_ps |= BITFIELD_BIT(stage - NINE_SAMPLER_PS(0));
1471        else if (stage >= NINE_SAMPLER_VS(0))
1472           context->enabled_samplers_mask_vs |= BITFIELD_BIT(stage - NINE_SAMPLER_VS(0));
1473     } else {
1474        if (stage < NINE_MAX_SAMPLERS_PS)
1475           context->enabled_samplers_mask_ps &= ~BITFIELD_BIT(stage - NINE_SAMPLER_PS(0));
1476        else if (stage >= NINE_SAMPLER_VS(0))
1477           context->enabled_samplers_mask_vs &= ~BITFIELD_BIT(stage - NINE_SAMPLER_VS(0));
1478     }
1479     context->samplers_shadow &= ~(1 << stage);
1480     context->samplers_shadow |= shadow << stage;
1481     context->samplers_fetch4 &= ~(1 << stage);
1482     context->samplers_fetch4 |= fetch4_compatible << stage;
1483     context->texture[stage].shadow = shadow;
1484     context->texture[stage].lod = lod;
1485     context->texture[stage].type = type;
1486     context->texture[stage].pstype = pstype;
1487     pipe_resource_reference(&context->texture[stage].resource, res);
1488     pipe_sampler_view_reference(&context->texture[stage].view[0], view0);
1489     pipe_sampler_view_reference(&context->texture[stage].view[1], view1);
1490 
1491     context->changed.group |= NINE_STATE_TEXTURE;
1492 }
1493 
1494 void
nine_context_set_texture(struct NineDevice9 * device,DWORD Stage,struct NineBaseTexture9 * tex)1495 nine_context_set_texture(struct NineDevice9 *device,
1496                          DWORD Stage,
1497                          struct NineBaseTexture9 *tex)
1498 {
1499     DWORD fetch4_shadow_enabled = 0;
1500     DWORD lod = 0;
1501     D3DRESOURCETYPE type = D3DRTYPE_TEXTURE;
1502     uint8_t pstype = 0;
1503     struct pipe_resource *res = NULL;
1504     struct pipe_sampler_view *view0 = NULL, *view1 = NULL;
1505 
1506     /* For managed pool, the data can be initially incomplete.
1507      * In that case, the texture is rebound later
1508      * (in NineBaseTexture9_Validate/NineBaseTexture9_UploadSelf). */
1509     if (tex && tex->base.resource) {
1510         fetch4_shadow_enabled = 1;
1511         fetch4_shadow_enabled |= tex->shadow << 1;
1512         fetch4_shadow_enabled |= tex->fetch4_compatible << 2;
1513         lod = tex->managed.lod;
1514         type = tex->base.type;
1515         pstype = tex->pstype;
1516         res = tex->base.resource;
1517         view0 = NineBaseTexture9_GetSamplerView(tex, 0);
1518         view1 = NineBaseTexture9_GetSamplerView(tex, 1);
1519     }
1520 
1521     nine_context_set_texture_apply(device, Stage,
1522                                    fetch4_shadow_enabled,
1523                                    lod, type, pstype,
1524                                    res, view0, view1);
1525 }
1526 
CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,ARG_VAL (DWORD,Sampler),ARG_VAL (D3DSAMPLERSTATETYPE,Type),ARG_VAL (DWORD,Value))1527 CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,
1528                   ARG_VAL(DWORD, Sampler),
1529                   ARG_VAL(D3DSAMPLERSTATETYPE, Type),
1530                   ARG_VAL(DWORD, Value))
1531 {
1532     struct nine_context *context = &device->context;
1533 
1534     if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) {
1535         if (Value == FETCH4_ENABLE ||
1536             Value == FETCH4_DISABLE) {
1537             context->rs[NINED3DRS_FETCH4] &= ~(1 << Sampler);
1538             context->rs[NINED3DRS_FETCH4] |= (Value == FETCH4_ENABLE) << Sampler;
1539             context->changed.group |= NINE_STATE_PS_PARAMS_MISC;
1540             if (Value == FETCH4_ENABLE)
1541                 WARN_ONCE("FETCH4 support is incomplete. Please report if buggy shadows.");
1542             return;
1543         }
1544     }
1545 
1546     if (unlikely(!nine_check_sampler_state_value(Type, Value)))
1547         return;
1548 
1549     context->samp[Sampler][Type] = Value;
1550     context->changed.group |= NINE_STATE_SAMPLER;
1551     context->changed.sampler[Sampler] |= 1 << Type;
1552 }
1553 
CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,ARG_VAL (UINT,StreamNumber),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (UINT,OffsetInBytes),ARG_VAL (UINT,Stride))1554 CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,
1555                   ARG_VAL(UINT, StreamNumber),
1556                   ARG_BIND_RES(struct pipe_resource, res),
1557                   ARG_VAL(UINT, OffsetInBytes),
1558                   ARG_VAL(UINT, Stride))
1559 {
1560     struct nine_context *context = &device->context;
1561     const unsigned i = StreamNumber;
1562 
1563     /* For normal draws, these tests are useless,
1564      * but not for *Up draws */
1565     if (context->vtxbuf[i].buffer.resource == res &&
1566         context->vtxbuf[i].buffer_offset == OffsetInBytes &&
1567         context->vtxbuf[i].stride == Stride)
1568         return;
1569 
1570     context->vtxbuf[i].stride = Stride;
1571     context->vtxbuf[i].buffer_offset = OffsetInBytes;
1572     pipe_resource_reference(&context->vtxbuf[i].buffer.resource, res);
1573 
1574     context->changed.vtxbuf |= 1 << StreamNumber;
1575 }
1576 
1577 void
nine_context_set_stream_source(struct NineDevice9 * device,UINT StreamNumber,struct NineVertexBuffer9 * pVBuf9,UINT OffsetInBytes,UINT Stride)1578 nine_context_set_stream_source(struct NineDevice9 *device,
1579                                UINT StreamNumber,
1580                                struct NineVertexBuffer9 *pVBuf9,
1581                                UINT OffsetInBytes,
1582                                UINT Stride)
1583 {
1584     struct pipe_resource *res = NULL;
1585     unsigned offset = 0;
1586 
1587     if (pVBuf9)
1588         res = NineVertexBuffer9_GetResource(pVBuf9, &offset);
1589     /* in the future when there is internal offset, add it
1590      * to OffsetInBytes */
1591 
1592     nine_context_set_stream_source_apply(device, StreamNumber,
1593                                          res, offset + OffsetInBytes,
1594                                          Stride);
1595 }
1596 
CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,ARG_VAL (UINT,StreamNumber),ARG_VAL (UINT,Setting))1597 CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,
1598                   ARG_VAL(UINT, StreamNumber),
1599                   ARG_VAL(UINT, Setting))
1600 {
1601     struct nine_context *context = &device->context;
1602 
1603     context->stream_freq[StreamNumber] = Setting;
1604 
1605     if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
1606         context->stream_instancedata_mask |= 1 << StreamNumber;
1607     else
1608         context->stream_instancedata_mask &= ~(1 << StreamNumber);
1609 
1610     if (StreamNumber != 0)
1611         context->changed.group |= NINE_STATE_STREAMFREQ;
1612 }
1613 
CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (UINT,IndexSize),ARG_VAL (UINT,OffsetInBytes))1614 CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,
1615                   ARG_BIND_RES(struct pipe_resource, res),
1616                   ARG_VAL(UINT, IndexSize),
1617                   ARG_VAL(UINT, OffsetInBytes))
1618 {
1619     struct nine_context *context = &device->context;
1620 
1621     context->index_size = IndexSize;
1622     context->index_offset = OffsetInBytes;
1623     pipe_resource_reference(&context->idxbuf, res);
1624 
1625     context->changed.group |= NINE_STATE_IDXBUF;
1626 }
1627 
1628 void
nine_context_set_indices(struct NineDevice9 * device,struct NineIndexBuffer9 * idxbuf)1629 nine_context_set_indices(struct NineDevice9 *device,
1630                          struct NineIndexBuffer9 *idxbuf)
1631 {
1632     struct pipe_resource *res = NULL;
1633     UINT IndexSize = 0;
1634     unsigned OffsetInBytes = 0;
1635 
1636     if (idxbuf) {
1637         res = NineIndexBuffer9_GetBuffer(idxbuf, &OffsetInBytes);
1638         IndexSize = idxbuf->index_size;
1639     }
1640 
1641     nine_context_set_indices_apply(device, res, IndexSize, OffsetInBytes);
1642 }
1643 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,ARG_BIND_REF (struct NineVertexDeclaration9,vdecl))1644 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,
1645                   ARG_BIND_REF(struct NineVertexDeclaration9, vdecl))
1646 {
1647     struct nine_context *context = &device->context;
1648     BOOL was_programmable_vs = context->programmable_vs;
1649 
1650     nine_bind(&context->vdecl, vdecl);
1651 
1652     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
1653     if (was_programmable_vs != context->programmable_vs) {
1654         context->commit |= NINE_STATE_COMMIT_CONST_VS;
1655         context->changed.group |= NINE_STATE_VS;
1656     }
1657 
1658     context->changed.group |= NINE_STATE_VDECL;
1659 }
1660 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,ARG_BIND_REF (struct NineVertexShader9,pShader))1661 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,
1662                   ARG_BIND_REF(struct NineVertexShader9, pShader))
1663 {
1664     struct nine_context *context = &device->context;
1665     BOOL was_programmable_vs = context->programmable_vs;
1666 
1667     nine_bind(&context->vs, pShader);
1668 
1669     context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
1670 
1671     /* ff -> non-ff: commit back non-ff constants */
1672     if (!was_programmable_vs && context->programmable_vs)
1673         context->commit |= NINE_STATE_COMMIT_CONST_VS;
1674 
1675     context->changed.group |= NINE_STATE_VS;
1676 }
1677 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,ARG_VAL (UINT,StartRegister),ARG_MEM (float,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4fCount))1678 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,
1679                   ARG_VAL(UINT, StartRegister),
1680                   ARG_MEM(float, pConstantData),
1681                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1682                   ARG_VAL(UINT, Vector4fCount))
1683 {
1684     struct nine_context *context = &device->context;
1685     float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f;
1686 
1687     memcpy(&vs_const_f[StartRegister * 4],
1688            pConstantData,
1689            pConstantData_size);
1690 
1691     if (device->may_swvp) {
1692         Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
1693         if (StartRegister < NINE_MAX_CONST_F)
1694             memcpy(&context->vs_const_f[StartRegister * 4],
1695                    pConstantData,
1696                    Vector4fCount * 4 * sizeof(context->vs_const_f[0]));
1697     }
1698 
1699     context->changed.vs_const_f = TRUE;
1700     context->changed.group |= NINE_STATE_VS_CONST;
1701 }
1702 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,ARG_VAL (UINT,StartRegister),ARG_MEM (int,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4iCount))1703 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,
1704                   ARG_VAL(UINT, StartRegister),
1705                   ARG_MEM(int, pConstantData),
1706                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1707                   ARG_VAL(UINT, Vector4iCount))
1708 {
1709     struct nine_context *context = &device->context;
1710     int i;
1711 
1712     if (device->driver_caps.vs_integer) {
1713         memcpy(&context->vs_const_i[4 * StartRegister],
1714                pConstantData,
1715                pConstantData_size);
1716     } else {
1717         for (i = 0; i < Vector4iCount; i++) {
1718             context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i]));
1719             context->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1]));
1720             context->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2]));
1721             context->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3]));
1722         }
1723     }
1724 
1725     context->changed.vs_const_i = TRUE;
1726     context->changed.group |= NINE_STATE_VS_CONST | NINE_STATE_VS_PARAMS_MISC;
1727 }
1728 
CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,ARG_VAL (UINT,StartRegister),ARG_MEM (BOOL,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,BoolCount))1729 CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,
1730                   ARG_VAL(UINT, StartRegister),
1731                   ARG_MEM(BOOL, pConstantData),
1732                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1733                   ARG_VAL(UINT, BoolCount))
1734 {
1735     struct nine_context *context = &device->context;
1736     int i;
1737     uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f);
1738 
1739     (void) pConstantData_size;
1740 
1741     for (i = 0; i < BoolCount; i++)
1742         context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
1743 
1744     context->changed.vs_const_b = TRUE;
1745     context->changed.group |= NINE_STATE_VS_CONST | NINE_STATE_VS_PARAMS_MISC;
1746 }
1747 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,ARG_BIND_REF (struct NinePixelShader9,ps))1748 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,
1749                   ARG_BIND_REF(struct NinePixelShader9, ps))
1750 {
1751     struct nine_context *context = &device->context;
1752     unsigned old_mask = context->ps ? context->ps->rt_mask : 1;
1753     unsigned mask;
1754 
1755     /* ff -> non-ff: commit back non-ff constants */
1756     if (!context->ps && ps)
1757         context->commit |= NINE_STATE_COMMIT_CONST_PS;
1758 
1759     nine_bind(&context->ps, ps);
1760 
1761     context->changed.group |= NINE_STATE_PS;
1762 
1763     mask = context->ps ? context->ps->rt_mask : 1;
1764     /* We need to update cbufs if the pixel shader would
1765      * write to different render targets */
1766     if (mask != old_mask)
1767         context->changed.group |= NINE_STATE_FB;
1768 }
1769 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,ARG_VAL (UINT,StartRegister),ARG_MEM (float,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4fCount))1770 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,
1771                   ARG_VAL(UINT, StartRegister),
1772                   ARG_MEM(float, pConstantData),
1773                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1774                   ARG_VAL(UINT, Vector4fCount))
1775 {
1776     struct nine_context *context = &device->context;
1777 
1778     memcpy(&context->ps_const_f[StartRegister * 4],
1779            pConstantData,
1780            pConstantData_size);
1781 
1782     context->changed.ps_const_f = TRUE;
1783     context->changed.group |= NINE_STATE_PS_CONST;
1784 }
1785 
1786 /* For stateblocks */
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,ARG_VAL (UINT,StartRegister),ARG_MEM (int,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4iCount))1787 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,
1788                   ARG_VAL(UINT, StartRegister),
1789                   ARG_MEM(int, pConstantData),
1790                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1791                   ARG_VAL(UINT, Vector4iCount))
1792 {
1793     struct nine_context *context = &device->context;
1794 
1795     memcpy(&context->ps_const_i[StartRegister][0],
1796            pConstantData,
1797            Vector4iCount * sizeof(context->ps_const_i[0]));
1798 
1799     context->changed.ps_const_i = TRUE;
1800     context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1801 }
1802 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,ARG_VAL (UINT,StartRegister),ARG_MEM (int,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,Vector4iCount))1803 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,
1804                   ARG_VAL(UINT, StartRegister),
1805                   ARG_MEM(int, pConstantData),
1806                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1807                   ARG_VAL(UINT, Vector4iCount))
1808 {
1809     struct nine_context *context = &device->context;
1810     int i;
1811 
1812     if (device->driver_caps.ps_integer) {
1813         memcpy(&context->ps_const_i[StartRegister][0],
1814                pConstantData,
1815                pConstantData_size);
1816     } else {
1817         for (i = 0; i < Vector4iCount; i++) {
1818             context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
1819             context->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
1820             context->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
1821             context->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
1822         }
1823     }
1824     context->changed.ps_const_i = TRUE;
1825     context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1826 }
1827 
CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,ARG_VAL (UINT,StartRegister),ARG_MEM (BOOL,pConstantData),ARG_MEM_SIZE (unsigned,pConstantData_size),ARG_VAL (UINT,BoolCount))1828 CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,
1829                   ARG_VAL(UINT, StartRegister),
1830                   ARG_MEM(BOOL, pConstantData),
1831                   ARG_MEM_SIZE(unsigned, pConstantData_size),
1832                   ARG_VAL(UINT, BoolCount))
1833 {
1834     struct nine_context *context = &device->context;
1835     int i;
1836     uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f);
1837 
1838     (void) pConstantData_size;
1839 
1840     for (i = 0; i < BoolCount; i++)
1841         context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
1842 
1843     context->changed.ps_const_b = TRUE;
1844     context->changed.group |= NINE_STATE_PS_CONST | NINE_STATE_PS_PARAMS_MISC;
1845 }
1846 
1847 /* XXX: use resource, as resource might change */
CSMT_ITEM_NO_WAIT(nine_context_set_render_target,ARG_VAL (DWORD,RenderTargetIndex),ARG_BIND_REF (struct NineSurface9,rt))1848 CSMT_ITEM_NO_WAIT(nine_context_set_render_target,
1849                   ARG_VAL(DWORD, RenderTargetIndex),
1850                   ARG_BIND_REF(struct NineSurface9, rt))
1851 {
1852     struct nine_context *context = &device->context;
1853     const unsigned i = RenderTargetIndex;
1854 
1855     if (i == 0) {
1856         context->changed.group |= NINE_STATE_MULTISAMPLE;
1857 
1858         if (context->rt[0] &&
1859             (context->rt[0]->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE) !=
1860             (rt->desc.MultiSampleType <= D3DMULTISAMPLE_NONMASKABLE))
1861             context->changed.group |= NINE_STATE_SAMPLE_MASK;
1862     }
1863 
1864     if (context->rt[i] != rt) {
1865        nine_bind(&context->rt[i], rt);
1866        context->changed.group |= NINE_STATE_FB;
1867     }
1868 }
1869 
1870 /* XXX: use resource instead of ds, as resource might change */
CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,ARG_BIND_REF (struct NineSurface9,ds))1871 CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,
1872                   ARG_BIND_REF(struct NineSurface9, ds))
1873 {
1874     struct nine_context *context = &device->context;
1875 
1876     nine_bind(&context->ds, ds);
1877     context->changed.group |= NINE_STATE_FB;
1878 }
1879 
CSMT_ITEM_NO_WAIT(nine_context_set_viewport,ARG_COPY_REF (D3DVIEWPORT9,viewport))1880 CSMT_ITEM_NO_WAIT(nine_context_set_viewport,
1881                   ARG_COPY_REF(D3DVIEWPORT9, viewport))
1882 {
1883     struct nine_context *context = &device->context;
1884 
1885     if (!memcmp(viewport, &context->viewport, sizeof(context->viewport)))
1886         return;
1887 
1888     context->viewport = *viewport;
1889     context->changed.group |= NINE_STATE_VIEWPORT;
1890 }
1891 
CSMT_ITEM_NO_WAIT(nine_context_set_scissor,ARG_COPY_REF (struct pipe_scissor_state,scissor))1892 CSMT_ITEM_NO_WAIT(nine_context_set_scissor,
1893                   ARG_COPY_REF(struct pipe_scissor_state, scissor))
1894 {
1895     struct nine_context *context = &device->context;
1896 
1897     if (!memcmp(scissor, &context->scissor, sizeof(context->scissor)))
1898         return;
1899 
1900     context->scissor = *scissor;
1901     context->changed.group |= NINE_STATE_SCISSOR;
1902 }
1903 
CSMT_ITEM_NO_WAIT(nine_context_set_transform,ARG_VAL (D3DTRANSFORMSTATETYPE,State),ARG_COPY_REF (D3DMATRIX,pMatrix))1904 CSMT_ITEM_NO_WAIT(nine_context_set_transform,
1905                   ARG_VAL(D3DTRANSFORMSTATETYPE, State),
1906                   ARG_COPY_REF(D3DMATRIX, pMatrix))
1907 {
1908     struct nine_context *context = &device->context;
1909     D3DMATRIX *M = nine_state_access_transform(&context->ff, State, TRUE);
1910 
1911     *M = *pMatrix;
1912     context->ff.changed.transform[State / 32] |= 1 << (State % 32);
1913     context->changed.group |= NINE_STATE_FF;
1914 }
1915 
CSMT_ITEM_NO_WAIT(nine_context_set_material,ARG_COPY_REF (D3DMATERIAL9,pMaterial))1916 CSMT_ITEM_NO_WAIT(nine_context_set_material,
1917                   ARG_COPY_REF(D3DMATERIAL9, pMaterial))
1918 {
1919     struct nine_context *context = &device->context;
1920 
1921     context->ff.material = *pMaterial;
1922     context->changed.group |= NINE_STATE_FF_MATERIAL;
1923 }
1924 
CSMT_ITEM_NO_WAIT(nine_context_set_light,ARG_VAL (DWORD,Index),ARG_COPY_REF (D3DLIGHT9,pLight))1925 CSMT_ITEM_NO_WAIT(nine_context_set_light,
1926                   ARG_VAL(DWORD, Index),
1927                   ARG_COPY_REF(D3DLIGHT9, pLight))
1928 {
1929     struct nine_context *context = &device->context;
1930 
1931     (void)nine_state_set_light(&context->ff, Index, pLight);
1932     context->changed.group |= NINE_STATE_FF_LIGHTING;
1933 }
1934 
1935 
1936 /* For stateblocks */
1937 static void
nine_context_light_enable_stateblock(struct NineDevice9 * device,const uint16_t active_light[NINE_MAX_LIGHTS_ACTIVE],unsigned int num_lights_active)1938 nine_context_light_enable_stateblock(struct NineDevice9 *device,
1939                                      const uint16_t active_light[NINE_MAX_LIGHTS_ACTIVE], /* TODO: use pointer that convey size for csmt */
1940                                      unsigned int num_lights_active)
1941 {
1942     struct nine_context *context = &device->context;
1943 
1944     /* TODO: Use CSMT_* to avoid calling nine_csmt_process */
1945     nine_csmt_process(device);
1946     memcpy(context->ff.active_light, active_light, NINE_MAX_LIGHTS_ACTIVE * sizeof(context->ff.active_light[0]));
1947     context->ff.num_lights_active = num_lights_active;
1948     context->changed.group |= NINE_STATE_FF_LIGHTING;
1949 }
1950 
CSMT_ITEM_NO_WAIT(nine_context_light_enable,ARG_VAL (DWORD,Index),ARG_VAL (BOOL,Enable))1951 CSMT_ITEM_NO_WAIT(nine_context_light_enable,
1952                   ARG_VAL(DWORD, Index),
1953                   ARG_VAL(BOOL, Enable))
1954 {
1955     struct nine_context *context = &device->context;
1956 
1957     nine_state_light_enable(&context->ff, Index, Enable);
1958     context->changed.group |= NINE_STATE_FF_LIGHTING;
1959 }
1960 
CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,ARG_VAL (DWORD,Stage),ARG_VAL (D3DTEXTURESTAGESTATETYPE,Type),ARG_VAL (DWORD,Value))1961 CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,
1962                   ARG_VAL(DWORD, Stage),
1963                   ARG_VAL(D3DTEXTURESTAGESTATETYPE, Type),
1964                   ARG_VAL(DWORD, Value))
1965 {
1966     struct nine_context *context = &device->context;
1967     int bumpmap_index = -1;
1968 
1969     context->ff.tex_stage[Stage][Type] = Value;
1970     switch (Type) {
1971     case D3DTSS_BUMPENVMAT00:
1972         bumpmap_index = 4 * Stage;
1973         break;
1974     case D3DTSS_BUMPENVMAT01:
1975         bumpmap_index = 4 * Stage + 1;
1976         break;
1977     case D3DTSS_BUMPENVMAT10:
1978         bumpmap_index = 4 * Stage + 2;
1979         break;
1980     case D3DTSS_BUMPENVMAT11:
1981         bumpmap_index = 4 * Stage + 3;
1982         break;
1983     case D3DTSS_BUMPENVLSCALE:
1984         bumpmap_index = 4 * 8 + 2 * Stage;
1985         break;
1986     case D3DTSS_BUMPENVLOFFSET:
1987         bumpmap_index = 4 * 8 + 2 * Stage + 1;
1988         break;
1989     case D3DTSS_TEXTURETRANSFORMFLAGS:
1990         context->changed.group |= NINE_STATE_PS_PARAMS_MISC;
1991         break;
1992     default:
1993         break;
1994     }
1995 
1996     if (bumpmap_index >= 0) {
1997         context->bumpmap_vars[bumpmap_index] = Value;
1998         context->changed.group |= NINE_STATE_PS_CONST;
1999     }
2000 
2001     context->changed.group |= NINE_STATE_FF_PS_CONSTS;
2002     context->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
2003 }
2004 
CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,ARG_VAL (DWORD,Index),ARG_COPY_REF (struct nine_clipplane,pPlane))2005 CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,
2006                   ARG_VAL(DWORD, Index),
2007                   ARG_COPY_REF(struct nine_clipplane, pPlane))
2008 {
2009     struct nine_context *context = &device->context;
2010 
2011     memcpy(&context->clip.ucp[Index][0], pPlane, sizeof(context->clip.ucp[0]));
2012     context->changed.ucp = TRUE;
2013 }
2014 
CSMT_ITEM_NO_WAIT(nine_context_set_swvp,ARG_VAL (boolean,swvp))2015 CSMT_ITEM_NO_WAIT(nine_context_set_swvp,
2016                   ARG_VAL(boolean, swvp))
2017 {
2018     struct nine_context *context = &device->context;
2019 
2020     context->swvp = swvp;
2021     context->changed.group |= NINE_STATE_SWVP;
2022 }
2023 
2024 /* Do not write to nine_context directly. Slower,
2025  * but works with csmt. TODO: write a special csmt version that
2026  * would record the list of commands as much as possible,
2027  * and use the version above else.
2028  */
2029 void
nine_context_apply_stateblock(struct NineDevice9 * device,const struct nine_state * src)2030 nine_context_apply_stateblock(struct NineDevice9 *device,
2031                               const struct nine_state *src)
2032 {
2033     int i;
2034 
2035     /* No need to apply src->changed.group, since all calls do
2036     * set context->changed.group */
2037 
2038     for (i = 0; i < ARRAY_SIZE(src->changed.rs); ++i) {
2039         uint32_t m = src->changed.rs[i];
2040         while (m) {
2041             const int r = ffs(m) - 1;
2042             m &= ~(1 << r);
2043             nine_context_set_render_state(device, i * 32 + r, src->rs_advertised[i * 32 + r]);
2044         }
2045     }
2046 
2047     /* Textures */
2048     if (src->changed.texture) {
2049         uint32_t m = src->changed.texture;
2050         unsigned s;
2051 
2052         for (s = 0; m; ++s, m >>= 1) {
2053             struct NineBaseTexture9 *tex = src->texture[s];
2054             if (!(m & 1))
2055                 continue;
2056             nine_context_set_texture(device, s, tex);
2057         }
2058     }
2059 
2060     /* Sampler state */
2061     if (src->changed.group & NINE_STATE_SAMPLER) {
2062         unsigned s;
2063 
2064         for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
2065             uint32_t m = src->changed.sampler[s];
2066             while (m) {
2067                 const int i = ffs(m) - 1;
2068                 m &= ~(1 << i);
2069                 nine_context_set_sampler_state(device, s, i, src->samp_advertised[s][i]);
2070             }
2071         }
2072     }
2073 
2074     /* Vertex buffers */
2075     if (src->changed.vtxbuf | src->changed.stream_freq) {
2076         uint32_t m = src->changed.vtxbuf | src->changed.stream_freq;
2077         for (i = 0; m; ++i, m >>= 1) {
2078             if (src->changed.vtxbuf & (1 << i))
2079                 nine_context_set_stream_source(device, i, src->stream[i], src->vtxbuf[i].buffer_offset, src->vtxbuf[i].stride);
2080             if (src->changed.stream_freq & (1 << i))
2081                 nine_context_set_stream_source_freq(device, i, src->stream_freq[i]);
2082         }
2083     }
2084 
2085     /* Index buffer */
2086     if (src->changed.group & NINE_STATE_IDXBUF)
2087         nine_context_set_indices(device, src->idxbuf);
2088 
2089     /* Vertex declaration */
2090     if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
2091         nine_context_set_vertex_declaration(device, src->vdecl);
2092 
2093     /* Vertex shader */
2094     if (src->changed.group & NINE_STATE_VS)
2095         nine_context_set_vertex_shader(device, src->vs);
2096 
2097     /* Pixel shader */
2098     if (src->changed.group & NINE_STATE_PS)
2099         nine_context_set_pixel_shader(device, src->ps);
2100 
2101     /* Vertex constants */
2102     if (src->changed.group & NINE_STATE_VS_CONST) {
2103         struct nine_range *r;
2104         for (r = src->changed.vs_const_f; r; r = r->next)
2105             nine_context_set_vertex_shader_constant_f(device, r->bgn,
2106                                                       &src->vs_const_f[r->bgn * 4],
2107                                                       sizeof(float[4]) * (r->end - r->bgn),
2108                                                       r->end - r->bgn);
2109         for (r = src->changed.vs_const_i; r; r = r->next)
2110             nine_context_set_vertex_shader_constant_i(device, r->bgn,
2111                                                       &src->vs_const_i[r->bgn * 4],
2112                                                       sizeof(int[4]) * (r->end - r->bgn),
2113                                                       r->end - r->bgn);
2114         for (r = src->changed.vs_const_b; r; r = r->next)
2115             nine_context_set_vertex_shader_constant_b(device, r->bgn,
2116                                                       &src->vs_const_b[r->bgn * 4],
2117                                                       sizeof(BOOL) * (r->end - r->bgn),
2118                                                       r->end - r->bgn);
2119     }
2120 
2121     /* Pixel constants */
2122     if (src->changed.group & NINE_STATE_PS_CONST) {
2123         struct nine_range *r;
2124         for (r = src->changed.ps_const_f; r; r = r->next)
2125             nine_context_set_pixel_shader_constant_f(device, r->bgn,
2126                                                      &src->ps_const_f[r->bgn * 4],
2127                                                      sizeof(float[4]) * (r->end - r->bgn),
2128                                                      r->end - r->bgn);
2129         if (src->changed.ps_const_i) {
2130             uint16_t m = src->changed.ps_const_i;
2131             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
2132                 if (m & 1)
2133                     nine_context_set_pixel_shader_constant_i_transformed(device, i,
2134                                                                          src->ps_const_i[i], sizeof(int[4]), 1);
2135         }
2136         if (src->changed.ps_const_b) {
2137             uint16_t m = src->changed.ps_const_b;
2138             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
2139                 if (m & 1)
2140                     nine_context_set_pixel_shader_constant_b(device, i,
2141                                                              &src->ps_const_b[i], sizeof(BOOL), 1);
2142         }
2143     }
2144 
2145     /* Viewport */
2146     if (src->changed.group & NINE_STATE_VIEWPORT)
2147         nine_context_set_viewport(device, &src->viewport);
2148 
2149     /* Scissor */
2150     if (src->changed.group & NINE_STATE_SCISSOR)
2151         nine_context_set_scissor(device, &src->scissor);
2152 
2153     /* User Clip Planes */
2154     if (src->changed.ucp)
2155         for (i = 0; i < PIPE_MAX_CLIP_PLANES; ++i)
2156             if (src->changed.ucp & (1 << i))
2157                 nine_context_set_clip_plane(device, i, (struct nine_clipplane*)&src->clip.ucp[i][0]);
2158 
2159     if (!(src->changed.group & NINE_STATE_FF))
2160         return;
2161 
2162     /* Fixed function state. */
2163 
2164     if (src->changed.group & NINE_STATE_FF_MATERIAL)
2165         nine_context_set_material(device, &src->ff.material);
2166 
2167     if (src->changed.group & NINE_STATE_FF_PS_CONSTS) {
2168         unsigned s;
2169         for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
2170             for (i = 0; i < NINED3DTSS_COUNT; ++i)
2171                 if (src->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
2172                    nine_context_set_texture_stage_state(device, s, i, src->ff.tex_stage[s][i]);
2173         }
2174     }
2175     if (src->changed.group & NINE_STATE_FF_LIGHTING) {
2176         for (i = 0; i < src->ff.num_lights; ++i)
2177             if (src->ff.light[i].Type != NINED3DLIGHT_INVALID)
2178                 nine_context_set_light(device, i, &src->ff.light[i]);
2179 
2180         nine_context_light_enable_stateblock(device, src->ff.active_light, src->ff.num_lights_active);
2181     }
2182     if (src->changed.group & NINE_STATE_FF_VSTRANSF) {
2183         for (i = 0; i < ARRAY_SIZE(src->ff.changed.transform); ++i) {
2184             unsigned s;
2185             if (!src->ff.changed.transform[i])
2186                 continue;
2187             for (s = i * 32; s < (i * 32 + 32); ++s) {
2188                 if (!(src->ff.changed.transform[i] & (1 << (s % 32))))
2189                     continue;
2190                 /* MaxVertexBlendMatrixIndex is 8, which means
2191                  * we don't read past index D3DTS_WORLDMATRIX(8).
2192                  * swvp is supposed to allow all 256, but we don't
2193                  * implement it for now. */
2194                 if (s > D3DTS_WORLDMATRIX(8))
2195                     break;
2196                 nine_context_set_transform(device, s,
2197                                            nine_state_access_transform(
2198                                                (struct nine_ff_state *)&src->ff,
2199                                                                        s, FALSE));
2200             }
2201         }
2202     }
2203 }
2204 
2205 static void
nine_update_state_framebuffer_clear(struct NineDevice9 * device)2206 nine_update_state_framebuffer_clear(struct NineDevice9 *device)
2207 {
2208     struct nine_context *context = &device->context;
2209 
2210     if (context->changed.group & NINE_STATE_FB)
2211         update_framebuffer(device, TRUE);
2212 }
2213 
CSMT_ITEM_NO_WAIT(nine_context_clear_fb,ARG_VAL (DWORD,Count),ARG_COPY_REF (D3DRECT,pRects),ARG_VAL (DWORD,Flags),ARG_VAL (D3DCOLOR,Color),ARG_VAL (float,Z),ARG_VAL (DWORD,Stencil))2214 CSMT_ITEM_NO_WAIT(nine_context_clear_fb,
2215                   ARG_VAL(DWORD, Count),
2216                   ARG_COPY_REF(D3DRECT, pRects),
2217                   ARG_VAL(DWORD, Flags),
2218                   ARG_VAL(D3DCOLOR, Color),
2219                   ARG_VAL(float, Z),
2220                   ARG_VAL(DWORD, Stencil))
2221 {
2222     struct nine_context *context = &device->context;
2223     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
2224     struct pipe_surface *cbuf, *zsbuf;
2225     struct pipe_context *pipe = context->pipe;
2226     struct NineSurface9 *zsbuf_surf = context->ds;
2227     struct NineSurface9 *rt;
2228     unsigned bufs = 0;
2229     unsigned r, i;
2230     union pipe_color_union rgba;
2231     unsigned rt_mask = 0;
2232     D3DRECT rect;
2233 
2234     nine_update_state_framebuffer_clear(device);
2235 
2236     if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
2237     /* Ignore Z buffer if not bound */
2238     if (context->pipe_data.fb.zsbuf != NULL) {
2239         if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
2240         if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
2241     }
2242     if (!bufs)
2243         return;
2244     d3dcolor_to_pipe_color_union(&rgba, Color);
2245 
2246     rect.x1 = context->viewport.X;
2247     rect.y1 = context->viewport.Y;
2248     rect.x2 = context->viewport.Width + rect.x1;
2249     rect.y2 = context->viewport.Height + rect.y1;
2250 
2251     /* Both rectangles apply, which is weird, but that's D3D9. */
2252     if (context->rs[D3DRS_SCISSORTESTENABLE]) {
2253         rect.x1 = MAX2(rect.x1, context->scissor.minx);
2254         rect.y1 = MAX2(rect.y1, context->scissor.miny);
2255         rect.x2 = MIN2(rect.x2, context->scissor.maxx);
2256         rect.y2 = MIN2(rect.y2, context->scissor.maxy);
2257     }
2258 
2259     if (Count) {
2260         /* Maybe apps like to specify a large rect ? */
2261         if (pRects[0].x1 <= rect.x1 && pRects[0].x2 >= rect.x2 &&
2262             pRects[0].y1 <= rect.y1 && pRects[0].y2 >= rect.y2) {
2263             DBG("First rect covers viewport.\n");
2264             Count = 0;
2265             pRects = NULL;
2266         }
2267     }
2268 
2269     if (rect.x1 >= context->pipe_data.fb.width || rect.y1 >= context->pipe_data.fb.height)
2270         return;
2271 
2272     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
2273         if (context->rt[i] && context->rt[i]->desc.Format != D3DFMT_NULL)
2274             rt_mask |= 1 << i;
2275     }
2276 
2277     /* fast path, clears everything at once */
2278     if (!Count &&
2279         (!(bufs & PIPE_CLEAR_COLOR) || (rt_mask == context->rt_mask)) &&
2280         rect.x1 == 0 && rect.y1 == 0 &&
2281         /* Case we clear only render target. Check clear region vs rt. */
2282         ((!(bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
2283          rect.x2 >= context->pipe_data.fb.width &&
2284          rect.y2 >= context->pipe_data.fb.height) ||
2285         /* Case we clear depth buffer (and eventually rt too).
2286          * depth buffer size is always >= rt size. Compare to clear region */
2287         ((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
2288          rect.x2 >= zsbuf_surf->desc.Width &&
2289          rect.y2 >= zsbuf_surf->desc.Height))) {
2290         DBG("Clear fast path\n");
2291         pipe->clear(pipe, bufs, NULL, &rgba, Z, Stencil);
2292         return;
2293     }
2294 
2295     if (!Count) {
2296         Count = 1;
2297         pRects = &rect;
2298     }
2299 
2300     for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
2301         rt = context->rt[i];
2302         if (!rt || rt->desc.Format == D3DFMT_NULL ||
2303             !(bufs & PIPE_CLEAR_COLOR))
2304             continue; /* save space, compiler should hoist this */
2305         cbuf = NineSurface9_GetSurface(rt, sRGB);
2306         for (r = 0; r < Count; ++r) {
2307             /* Don't trust users to pass these in the right order. */
2308             unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
2309             unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
2310             unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
2311             unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
2312 #ifndef NINE_LAX
2313             /* Drop negative rectangles (like wine expects). */
2314             if (pRects[r].x1 > pRects[r].x2) continue;
2315             if (pRects[r].y1 > pRects[r].y2) continue;
2316 #endif
2317 
2318             x1 = MAX2(x1, rect.x1);
2319             y1 = MAX2(y1, rect.y1);
2320             x2 = MIN3(x2, rect.x2, rt->desc.Width);
2321             y2 = MIN3(y2, rect.y2, rt->desc.Height);
2322 
2323             DBG("Clearing (%u..%u)x(%u..%u)\n", x1, x2, y1, y2);
2324             pipe->clear_render_target(pipe, cbuf, &rgba,
2325                                       x1, y1, x2 - x1, y2 - y1, false);
2326         }
2327     }
2328     if (!(bufs & PIPE_CLEAR_DEPTHSTENCIL))
2329         return;
2330 
2331     bufs &= PIPE_CLEAR_DEPTHSTENCIL;
2332 
2333     for (r = 0; r < Count; ++r) {
2334         unsigned x1 = MIN2(pRects[r].x1, pRects[r].x2);
2335         unsigned y1 = MIN2(pRects[r].y1, pRects[r].y2);
2336         unsigned x2 = MAX2(pRects[r].x1, pRects[r].x2);
2337         unsigned y2 = MAX2(pRects[r].y1, pRects[r].y2);
2338 #ifndef NINE_LAX
2339         /* Drop negative rectangles. */
2340         if (pRects[r].x1 > pRects[r].x2) continue;
2341         if (pRects[r].y1 > pRects[r].y2) continue;
2342 #endif
2343 
2344         x1 = MIN2(x1, rect.x1);
2345         y1 = MIN2(y1, rect.y1);
2346         x2 = MIN3(x2, rect.x2, zsbuf_surf->desc.Width);
2347         y2 = MIN3(y2, rect.y2, zsbuf_surf->desc.Height);
2348 
2349         zsbuf = NineSurface9_GetSurface(zsbuf_surf, 0);
2350         assert(zsbuf);
2351         pipe->clear_depth_stencil(pipe, zsbuf, bufs, Z, Stencil,
2352                                   x1, y1, x2 - x1, y2 - y1, false);
2353     }
2354     return;
2355 }
2356 
2357 
2358 static inline void
init_draw_info(struct pipe_draw_info * info,struct pipe_draw_start_count_bias * draw,struct NineDevice9 * dev,D3DPRIMITIVETYPE type,UINT count)2359 init_draw_info(struct pipe_draw_info *info,
2360                struct pipe_draw_start_count_bias *draw,
2361                struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
2362 {
2363     info->mode = d3dprimitivetype_to_pipe_prim(type);
2364     draw->count = prim_count_to_vertex_count(type, count);
2365     info->start_instance = 0;
2366     info->instance_count = 1;
2367     if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask)
2368         info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1);
2369     info->primitive_restart = FALSE;
2370     info->has_user_indices = FALSE;
2371     info->take_index_buffer_ownership = FALSE;
2372     info->index_bias_varies = FALSE;
2373     info->increment_draw_id = FALSE;
2374     info->restart_index = 0;
2375 }
2376 
CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,ARG_VAL (D3DPRIMITIVETYPE,PrimitiveType),ARG_VAL (UINT,StartVertex),ARG_VAL (UINT,PrimitiveCount))2377 CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
2378                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2379                   ARG_VAL(UINT, StartVertex),
2380                   ARG_VAL(UINT, PrimitiveCount))
2381 {
2382     struct nine_context *context = &device->context;
2383     struct pipe_draw_info info;
2384     struct pipe_draw_start_count_bias draw;
2385 
2386     nine_update_state(device);
2387 
2388     init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount);
2389     info.index_size = 0;
2390     draw.start = StartVertex;
2391     draw.index_bias = 0;
2392     info.min_index = draw.start;
2393     info.max_index = draw.start + draw.count - 1;
2394     info.index.resource = NULL;
2395 
2396     context->pipe->draw_vbo(context->pipe, &info, 0, NULL, &draw, 1);
2397 }
2398 
CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,ARG_VAL (D3DPRIMITIVETYPE,PrimitiveType),ARG_VAL (INT,BaseVertexIndex),ARG_VAL (UINT,MinVertexIndex),ARG_VAL (UINT,NumVertices),ARG_VAL (UINT,StartIndex),ARG_VAL (UINT,PrimitiveCount))2399 CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,
2400                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2401                   ARG_VAL(INT, BaseVertexIndex),
2402                   ARG_VAL(UINT, MinVertexIndex),
2403                   ARG_VAL(UINT, NumVertices),
2404                   ARG_VAL(UINT, StartIndex),
2405                   ARG_VAL(UINT, PrimitiveCount))
2406 {
2407     struct nine_context *context = &device->context;
2408     struct pipe_draw_info info;
2409     struct pipe_draw_start_count_bias draw;
2410 
2411     nine_update_state(device);
2412 
2413     init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount);
2414     info.index_size = context->index_size;
2415     draw.start = context->index_offset / context->index_size + StartIndex;
2416     draw.index_bias = BaseVertexIndex;
2417     info.index_bounds_valid = true;
2418     /* These don't include index bias: */
2419     info.min_index = MinVertexIndex;
2420     info.max_index = MinVertexIndex + NumVertices - 1;
2421     info.index.resource = context->idxbuf;
2422 
2423     context->pipe->draw_vbo(context->pipe, &info, 0, NULL, &draw, 1);
2424 }
2425 
CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,ARG_VAL (D3DPRIMITIVETYPE,PrimitiveType),ARG_VAL (UINT,MinVertexIndex),ARG_VAL (UINT,NumVertices),ARG_VAL (UINT,PrimitiveCount),ARG_BIND_VBUF (struct pipe_vertex_buffer,vbuf),ARG_BIND_RES (struct pipe_resource,ibuf),ARG_VAL (void *,user_ibuf),ARG_VAL (UINT,index_offset),ARG_VAL (UINT,index_size))2426 CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,
2427                   ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
2428                   ARG_VAL(UINT, MinVertexIndex),
2429                   ARG_VAL(UINT, NumVertices),
2430                   ARG_VAL(UINT, PrimitiveCount),
2431                   ARG_BIND_VBUF(struct pipe_vertex_buffer, vbuf),
2432                   ARG_BIND_RES(struct pipe_resource, ibuf),
2433                   ARG_VAL(void *, user_ibuf),
2434                   ARG_VAL(UINT, index_offset),
2435                   ARG_VAL(UINT, index_size))
2436 {
2437     struct nine_context *context = &device->context;
2438     struct pipe_draw_info info;
2439     struct pipe_draw_start_count_bias draw;
2440 
2441     nine_update_state(device);
2442 
2443     init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount);
2444     info.index_size = index_size;
2445     draw.start = index_offset / info.index_size;
2446     draw.index_bias = 0;
2447     info.index_bounds_valid = true;
2448     info.min_index = MinVertexIndex;
2449     info.max_index = MinVertexIndex + NumVertices - 1;
2450     info.has_user_indices = ibuf == NULL;
2451     if (ibuf)
2452         info.index.resource = ibuf;
2453     else
2454         info.index.user = user_ibuf;
2455 
2456     context->pipe->set_vertex_buffers(context->pipe, 0, 1, 0, false, vbuf);
2457     context->changed.vtxbuf |= 1;
2458 
2459     context->pipe->draw_vbo(context->pipe, &info, 0, NULL, &draw, 1);
2460 }
2461 
CSMT_ITEM_NO_WAIT(nine_context_resource_copy_region,ARG_BIND_REF (struct NineUnknown,dst),ARG_BIND_REF (struct NineUnknown,src),ARG_BIND_RES (struct pipe_resource,dst_res),ARG_VAL (unsigned,dst_level),ARG_COPY_REF (struct pipe_box,dst_box),ARG_BIND_RES (struct pipe_resource,src_res),ARG_VAL (unsigned,src_level),ARG_COPY_REF (struct pipe_box,src_box))2462 CSMT_ITEM_NO_WAIT(nine_context_resource_copy_region,
2463                   ARG_BIND_REF(struct NineUnknown, dst),
2464                   ARG_BIND_REF(struct NineUnknown, src),
2465                   ARG_BIND_RES(struct pipe_resource, dst_res),
2466                   ARG_VAL(unsigned, dst_level),
2467                   ARG_COPY_REF(struct pipe_box, dst_box),
2468                   ARG_BIND_RES(struct pipe_resource, src_res),
2469                   ARG_VAL(unsigned, src_level),
2470                   ARG_COPY_REF(struct pipe_box, src_box))
2471 {
2472     struct nine_context *context = &device->context;
2473 
2474     (void) dst;
2475     (void) src;
2476 
2477     context->pipe->resource_copy_region(context->pipe,
2478             dst_res, dst_level,
2479             dst_box->x, dst_box->y, dst_box->z,
2480             src_res, src_level,
2481             src_box);
2482 }
2483 
CSMT_ITEM_NO_WAIT(nine_context_blit,ARG_BIND_REF (struct NineUnknown,dst),ARG_BIND_REF (struct NineUnknown,src),ARG_BIND_BLIT (struct pipe_blit_info,blit))2484 CSMT_ITEM_NO_WAIT(nine_context_blit,
2485                   ARG_BIND_REF(struct NineUnknown, dst),
2486                   ARG_BIND_REF(struct NineUnknown, src),
2487                   ARG_BIND_BLIT(struct pipe_blit_info, blit))
2488 {
2489     struct nine_context *context = &device->context;
2490 
2491     (void) dst;
2492     (void) src;
2493 
2494     context->pipe->blit(context->pipe, blit);
2495 }
2496 
CSMT_ITEM_NO_WAIT(nine_context_clear_render_target,ARG_BIND_REF (struct NineSurface9,surface),ARG_VAL (D3DCOLOR,color),ARG_VAL (UINT,x),ARG_VAL (UINT,y),ARG_VAL (UINT,width),ARG_VAL (UINT,height))2497 CSMT_ITEM_NO_WAIT(nine_context_clear_render_target,
2498                   ARG_BIND_REF(struct NineSurface9, surface),
2499                   ARG_VAL(D3DCOLOR, color),
2500                   ARG_VAL(UINT, x),
2501                   ARG_VAL(UINT, y),
2502                   ARG_VAL(UINT, width),
2503                   ARG_VAL(UINT, height))
2504 {
2505     struct nine_context *context = &device->context;
2506     struct pipe_surface *surf;
2507     union pipe_color_union rgba;
2508 
2509     d3dcolor_to_pipe_color_union(&rgba, color);
2510     surf = NineSurface9_GetSurface(surface, 0);
2511     context->pipe->clear_render_target(context->pipe, surf, &rgba, x, y, width, height, false);
2512 }
2513 
CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,ARG_BIND_REF (struct NineUnknown,dst),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (UINT,base_level),ARG_VAL (UINT,last_level),ARG_VAL (UINT,first_layer),ARG_VAL (UINT,last_layer),ARG_VAL (UINT,filter))2514 CSMT_ITEM_NO_WAIT(nine_context_gen_mipmap,
2515                   ARG_BIND_REF(struct NineUnknown, dst),
2516                   ARG_BIND_RES(struct pipe_resource, res),
2517                   ARG_VAL(UINT, base_level),
2518                   ARG_VAL(UINT, last_level),
2519                   ARG_VAL(UINT, first_layer),
2520                   ARG_VAL(UINT, last_layer),
2521                   ARG_VAL(UINT, filter))
2522 {
2523     struct nine_context *context = &device->context;
2524 
2525     /* We just bind dst for the bind count */
2526     (void)dst;
2527 
2528     util_gen_mipmap(context->pipe, res, res->format, base_level,
2529                     last_level, first_layer, last_layer, filter);
2530 }
2531 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,ARG_BIND_REF (struct NineUnknown,src_ref),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (unsigned,offset),ARG_VAL (unsigned,size),ARG_VAL (unsigned,usage),ARG_VAL (const void *,data))2532 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_range_upload,
2533                                ARG_BIND_REF(struct NineUnknown, src_ref),
2534                                ARG_BIND_RES(struct pipe_resource, res),
2535                                ARG_VAL(unsigned, offset),
2536                                ARG_VAL(unsigned, size),
2537                                ARG_VAL(unsigned, usage),
2538                                ARG_VAL(const void *, data))
2539 {
2540     struct nine_context *context = &device->context;
2541 
2542     /* Binding src_ref avoids release before upload */
2543     (void)src_ref;
2544 
2545     context->pipe->buffer_subdata(context->pipe, res, usage, offset, size, data);
2546 }
2547 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,ARG_BIND_REF (struct NineUnknown,src_ref),ARG_BIND_RES (struct pipe_resource,res),ARG_VAL (unsigned,level),ARG_COPY_REF (struct pipe_box,dst_box),ARG_VAL (enum pipe_format,src_format),ARG_VAL (const void *,src),ARG_VAL (unsigned,src_stride),ARG_VAL (unsigned,src_layer_stride),ARG_COPY_REF (struct pipe_box,src_box))2548 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_box_upload,
2549                                ARG_BIND_REF(struct NineUnknown, src_ref),
2550                                ARG_BIND_RES(struct pipe_resource, res),
2551                                ARG_VAL(unsigned, level),
2552                                ARG_COPY_REF(struct pipe_box, dst_box),
2553                                ARG_VAL(enum pipe_format, src_format),
2554                                ARG_VAL(const void *, src),
2555                                ARG_VAL(unsigned, src_stride),
2556                                ARG_VAL(unsigned, src_layer_stride),
2557                                ARG_COPY_REF(struct pipe_box, src_box))
2558 {
2559     struct nine_context *context = &device->context;
2560     struct pipe_context *pipe = context->pipe;
2561     struct pipe_transfer *transfer = NULL;
2562     uint8_t *map;
2563 
2564     /* Binding src_ref avoids release before upload */
2565     (void)src_ref;
2566 
2567     if (is_ATI1_ATI2(src_format)) {
2568         const unsigned bw = util_format_get_blockwidth(src_format);
2569         const unsigned bh = util_format_get_blockheight(src_format);
2570         /* For these formats, the allocate surface can be too small to contain
2571          * a block. Yet we can be asked to upload such surfaces.
2572          * It is ok for these surfaces to have buggy content,
2573          * but we should avoid crashing.
2574          * Calling util_format_translate_3d would read out of bounds. */
2575         if (dst_box->width < bw || dst_box->height < bh)
2576             return;
2577     }
2578 
2579     map = pipe->texture_map(pipe,
2580                              res,
2581                              level,
2582                              PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE,
2583                              dst_box, &transfer);
2584     if (!map)
2585         return;
2586 
2587     /* Note: if formats are the sames, it will revert
2588      * to normal memcpy */
2589     (void) util_format_translate_3d(res->format,
2590                                     map, transfer->stride,
2591                                     transfer->layer_stride,
2592                                     0, 0, 0,
2593                                     src_format,
2594                                     src, src_stride,
2595                                     src_layer_stride,
2596                                     src_box->x, src_box->y, src_box->z,
2597                                     dst_box->width, dst_box->height,
2598                                     dst_box->depth);
2599 
2600     pipe_texture_unmap(pipe, transfer);
2601 }
2602 
2603 struct pipe_query *
nine_context_create_query(struct NineDevice9 * device,unsigned query_type)2604 nine_context_create_query(struct NineDevice9 *device, unsigned query_type)
2605 {
2606     struct pipe_context *pipe;
2607     struct pipe_query *res;
2608 
2609     pipe = nine_context_get_pipe_acquire(device);
2610     res = pipe->create_query(pipe, query_type, 0);
2611     nine_context_get_pipe_release(device);
2612     return res;
2613 }
2614 
CSMT_ITEM_DO_WAIT(nine_context_destroy_query,ARG_REF (struct pipe_query,query))2615 CSMT_ITEM_DO_WAIT(nine_context_destroy_query,
2616                   ARG_REF(struct pipe_query, query))
2617 {
2618     struct nine_context *context = &device->context;
2619 
2620     context->pipe->destroy_query(context->pipe, query);
2621 }
2622 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_begin_query,ARG_REF (struct pipe_query,query))2623 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_begin_query,
2624                                ARG_REF(struct pipe_query, query))
2625 {
2626     struct nine_context *context = &device->context;
2627 
2628     (void) context->pipe->begin_query(context->pipe, query);
2629 }
2630 
CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_end_query,ARG_REF (struct pipe_query,query))2631 CSMT_ITEM_NO_WAIT_WITH_COUNTER(nine_context_end_query,
2632                                ARG_REF(struct pipe_query, query))
2633 {
2634     struct nine_context *context = &device->context;
2635 
2636     (void) context->pipe->end_query(context->pipe, query);
2637 }
2638 
2639 boolean
nine_context_get_query_result(struct NineDevice9 * device,struct pipe_query * query,unsigned * counter,boolean flush,boolean wait,union pipe_query_result * result)2640 nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *query,
2641                               unsigned *counter, boolean flush, boolean wait,
2642                               union pipe_query_result *result)
2643 {
2644     struct pipe_context *pipe;
2645     boolean ret;
2646 
2647     if (wait)
2648         nine_csmt_process(device);
2649     else if (p_atomic_read(counter) > 0) {
2650         if (flush && device->csmt_active)
2651             nine_queue_flush(device->csmt_ctx->pool);
2652         DBG("Pending begin/end. Returning\n");
2653         return false;
2654     }
2655 
2656     pipe = nine_context_get_pipe_acquire(device);
2657     ret = pipe->get_query_result(pipe, query, wait, result);
2658     nine_context_get_pipe_release(device);
2659 
2660     DBG("Query result %s\n", ret ? "found" : "not yet available");
2661     return ret;
2662 }
2663 
CSMT_ITEM_NO_WAIT(nine_context_pipe_flush)2664 CSMT_ITEM_NO_WAIT(nine_context_pipe_flush)
2665 {
2666     struct nine_context *context = &device->context;
2667 
2668     context->pipe->flush(context->pipe, NULL, PIPE_FLUSH_ASYNC);
2669 }
2670 
2671 /* State defaults */
2672 
2673 static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
2674 {
2675  /* [D3DRS_ZENABLE] = D3DZB_TRUE; wine: auto_depth_stencil */
2676     [D3DRS_ZENABLE] = D3DZB_FALSE,
2677     [D3DRS_FILLMODE] = D3DFILL_SOLID,
2678     [D3DRS_SHADEMODE] = D3DSHADE_GOURAUD,
2679 /*  [D3DRS_LINEPATTERN] = 0x00000000, */
2680     [D3DRS_ZWRITEENABLE] = TRUE,
2681     [D3DRS_ALPHATESTENABLE] = FALSE,
2682     [D3DRS_LASTPIXEL] = TRUE,
2683     [D3DRS_SRCBLEND] = D3DBLEND_ONE,
2684     [D3DRS_DESTBLEND] = D3DBLEND_ZERO,
2685     [D3DRS_CULLMODE] = D3DCULL_CCW,
2686     [D3DRS_ZFUNC] = D3DCMP_LESSEQUAL,
2687     [D3DRS_ALPHAFUNC] = D3DCMP_ALWAYS,
2688     [D3DRS_ALPHAREF] = 0,
2689     [D3DRS_DITHERENABLE] = FALSE,
2690     [D3DRS_ALPHABLENDENABLE] = FALSE,
2691     [D3DRS_FOGENABLE] = FALSE,
2692     [D3DRS_SPECULARENABLE] = FALSE,
2693 /*  [D3DRS_ZVISIBLE] = 0, */
2694     [D3DRS_FOGCOLOR] = 0,
2695     [D3DRS_FOGTABLEMODE] = D3DFOG_NONE,
2696     [D3DRS_FOGSTART] = 0x00000000,
2697     [D3DRS_FOGEND] = 0x3F800000,
2698     [D3DRS_FOGDENSITY] = 0x3F800000,
2699 /*  [D3DRS_EDGEANTIALIAS] = FALSE, */
2700     [D3DRS_RANGEFOGENABLE] = FALSE,
2701     [D3DRS_STENCILENABLE] = FALSE,
2702     [D3DRS_STENCILFAIL] = D3DSTENCILOP_KEEP,
2703     [D3DRS_STENCILZFAIL] = D3DSTENCILOP_KEEP,
2704     [D3DRS_STENCILPASS] = D3DSTENCILOP_KEEP,
2705     [D3DRS_STENCILREF] = 0,
2706     [D3DRS_STENCILMASK] = 0xFFFFFFFF,
2707     [D3DRS_STENCILFUNC] = D3DCMP_ALWAYS,
2708     [D3DRS_STENCILWRITEMASK] = 0xFFFFFFFF,
2709     [D3DRS_TEXTUREFACTOR] = 0xFFFFFFFF,
2710     [D3DRS_WRAP0] = 0,
2711     [D3DRS_WRAP1] = 0,
2712     [D3DRS_WRAP2] = 0,
2713     [D3DRS_WRAP3] = 0,
2714     [D3DRS_WRAP4] = 0,
2715     [D3DRS_WRAP5] = 0,
2716     [D3DRS_WRAP6] = 0,
2717     [D3DRS_WRAP7] = 0,
2718     [D3DRS_CLIPPING] = TRUE,
2719     [D3DRS_LIGHTING] = TRUE,
2720     [D3DRS_AMBIENT] = 0,
2721     [D3DRS_FOGVERTEXMODE] = D3DFOG_NONE,
2722     [D3DRS_COLORVERTEX] = TRUE,
2723     [D3DRS_LOCALVIEWER] = TRUE,
2724     [D3DRS_NORMALIZENORMALS] = FALSE,
2725     [D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1,
2726     [D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2,
2727     [D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL,
2728     [D3DRS_EMISSIVEMATERIALSOURCE] = D3DMCS_MATERIAL,
2729     [D3DRS_VERTEXBLEND] = D3DVBF_DISABLE,
2730     [D3DRS_CLIPPLANEENABLE] = 0,
2731 /*  [D3DRS_SOFTWAREVERTEXPROCESSING] = FALSE, */
2732     [D3DRS_POINTSIZE] = 0x3F800000,
2733     [D3DRS_POINTSIZE_MIN] = 0x3F800000,
2734     [D3DRS_POINTSPRITEENABLE] = FALSE,
2735     [D3DRS_POINTSCALEENABLE] = FALSE,
2736     [D3DRS_POINTSCALE_A] = 0x3F800000,
2737     [D3DRS_POINTSCALE_B] = 0x00000000,
2738     [D3DRS_POINTSCALE_C] = 0x00000000,
2739     [D3DRS_MULTISAMPLEANTIALIAS] = TRUE,
2740     [D3DRS_MULTISAMPLEMASK] = 0xFFFFFFFF,
2741     [D3DRS_PATCHEDGESTYLE] = D3DPATCHEDGE_DISCRETE,
2742 /*  [D3DRS_PATCHSEGMENTS] = 0x3F800000, */
2743     [D3DRS_DEBUGMONITORTOKEN] = 0xDEADCAFE,
2744     [D3DRS_POINTSIZE_MAX] = 0x3F800000, /* depends on cap */
2745     [D3DRS_INDEXEDVERTEXBLENDENABLE] = FALSE,
2746     [D3DRS_COLORWRITEENABLE] = 0x0000000f,
2747     [D3DRS_TWEENFACTOR] = 0x00000000,
2748     [D3DRS_BLENDOP] = D3DBLENDOP_ADD,
2749     [D3DRS_POSITIONDEGREE] = D3DDEGREE_CUBIC,
2750     [D3DRS_NORMALDEGREE] = D3DDEGREE_LINEAR,
2751     [D3DRS_SCISSORTESTENABLE] = FALSE,
2752     [D3DRS_SLOPESCALEDEPTHBIAS] = 0,
2753     [D3DRS_MINTESSELLATIONLEVEL] = 0x3F800000,
2754     [D3DRS_MAXTESSELLATIONLEVEL] = 0x3F800000,
2755     [D3DRS_ANTIALIASEDLINEENABLE] = FALSE,
2756     [D3DRS_ADAPTIVETESS_X] = 0x00000000,
2757     [D3DRS_ADAPTIVETESS_Y] = 0x00000000,
2758     [D3DRS_ADAPTIVETESS_Z] = 0x3F800000,
2759     [D3DRS_ADAPTIVETESS_W] = 0x00000000,
2760     [D3DRS_ENABLEADAPTIVETESSELLATION] = FALSE,
2761     [D3DRS_TWOSIDEDSTENCILMODE] = FALSE,
2762     [D3DRS_CCW_STENCILFAIL] = D3DSTENCILOP_KEEP,
2763     [D3DRS_CCW_STENCILZFAIL] = D3DSTENCILOP_KEEP,
2764     [D3DRS_CCW_STENCILPASS] = D3DSTENCILOP_KEEP,
2765     [D3DRS_CCW_STENCILFUNC] = D3DCMP_ALWAYS,
2766     [D3DRS_COLORWRITEENABLE1] = 0x0000000F,
2767     [D3DRS_COLORWRITEENABLE2] = 0x0000000F,
2768     [D3DRS_COLORWRITEENABLE3] = 0x0000000F,
2769     [D3DRS_BLENDFACTOR] = 0xFFFFFFFF,
2770     [D3DRS_SRGBWRITEENABLE] = 0,
2771     [D3DRS_DEPTHBIAS] = 0,
2772     [D3DRS_WRAP8] = 0,
2773     [D3DRS_WRAP9] = 0,
2774     [D3DRS_WRAP10] = 0,
2775     [D3DRS_WRAP11] = 0,
2776     [D3DRS_WRAP12] = 0,
2777     [D3DRS_WRAP13] = 0,
2778     [D3DRS_WRAP14] = 0,
2779     [D3DRS_WRAP15] = 0,
2780     [D3DRS_SEPARATEALPHABLENDENABLE] = FALSE,
2781     [D3DRS_SRCBLENDALPHA] = D3DBLEND_ONE,
2782     [D3DRS_DESTBLENDALPHA] = D3DBLEND_ZERO,
2783     [D3DRS_BLENDOPALPHA] = D3DBLENDOP_ADD,
2784     [NINED3DRS_VSPOINTSIZE] = FALSE,
2785     [NINED3DRS_RTMASK] = 0xf,
2786     [NINED3DRS_ALPHACOVERAGE] = FALSE,
2787     [NINED3DRS_MULTISAMPLE] = FALSE,
2788     [NINED3DRS_FETCH4] = 0
2789 };
2790 static const DWORD nine_tex_stage_state_defaults[NINED3DTSS_LAST + 1] =
2791 {
2792     [D3DTSS_COLOROP] = D3DTOP_DISABLE,
2793     [D3DTSS_ALPHAOP] = D3DTOP_DISABLE,
2794     [D3DTSS_COLORARG1] = D3DTA_TEXTURE,
2795     [D3DTSS_COLORARG2] = D3DTA_CURRENT,
2796     [D3DTSS_COLORARG0] = D3DTA_CURRENT,
2797     [D3DTSS_ALPHAARG1] = D3DTA_TEXTURE,
2798     [D3DTSS_ALPHAARG2] = D3DTA_CURRENT,
2799     [D3DTSS_ALPHAARG0] = D3DTA_CURRENT,
2800     [D3DTSS_RESULTARG] = D3DTA_CURRENT,
2801     [D3DTSS_BUMPENVMAT00] = 0,
2802     [D3DTSS_BUMPENVMAT01] = 0,
2803     [D3DTSS_BUMPENVMAT10] = 0,
2804     [D3DTSS_BUMPENVMAT11] = 0,
2805     [D3DTSS_BUMPENVLSCALE] = 0,
2806     [D3DTSS_BUMPENVLOFFSET] = 0,
2807     [D3DTSS_TEXCOORDINDEX] = 0,
2808     [D3DTSS_TEXTURETRANSFORMFLAGS] = D3DTTFF_DISABLE,
2809 };
2810 static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
2811 {
2812     [D3DSAMP_ADDRESSU] = D3DTADDRESS_WRAP,
2813     [D3DSAMP_ADDRESSV] = D3DTADDRESS_WRAP,
2814     [D3DSAMP_ADDRESSW] = D3DTADDRESS_WRAP,
2815     [D3DSAMP_BORDERCOLOR] = 0,
2816     [D3DSAMP_MAGFILTER] = D3DTEXF_POINT,
2817     [D3DSAMP_MINFILTER] = D3DTEXF_POINT,
2818     [D3DSAMP_MIPFILTER] = D3DTEXF_NONE,
2819     [D3DSAMP_MIPMAPLODBIAS] = 0,
2820     [D3DSAMP_MAXMIPLEVEL] = 0,
2821     [D3DSAMP_MAXANISOTROPY] = 1,
2822     [D3DSAMP_SRGBTEXTURE] = 0,
2823     [D3DSAMP_ELEMENTINDEX] = 0,
2824     [D3DSAMP_DMAPOFFSET] = 0,
2825     [NINED3DSAMP_MINLOD] = 0,
2826     [NINED3DSAMP_SHADOW] = 0,
2827     [NINED3DSAMP_CUBETEX] = 0
2828 };
2829 
2830 /* Note: The following 4 functions assume there is no
2831  * pending commands */
2832 
nine_state_restore_non_cso(struct NineDevice9 * device)2833 void nine_state_restore_non_cso(struct NineDevice9 *device)
2834 {
2835     struct nine_context *context = &device->context;
2836 
2837     context->changed.group = NINE_STATE_ALL;
2838     context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
2839     context->changed.ucp = TRUE;
2840     context->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
2841     context->enabled_sampler_count_vs = 0;
2842     context->enabled_sampler_count_ps = 0;
2843 }
2844 
2845 void
nine_state_set_defaults(struct NineDevice9 * device,const D3DCAPS9 * caps,boolean is_reset)2846 nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
2847                         boolean is_reset)
2848 {
2849     struct nine_state *state = &device->state;
2850     struct nine_context *context = &device->context;
2851     unsigned s;
2852 
2853     /* Initialize defaults.
2854      */
2855     memcpy(context->rs, nine_render_state_defaults, sizeof(context->rs));
2856 
2857     for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s) {
2858         memcpy(&state->ff.tex_stage[s], nine_tex_stage_state_defaults,
2859                sizeof(state->ff.tex_stage[s]));
2860         state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] = s;
2861     }
2862     state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
2863     state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
2864 
2865     for (s = 0; s < ARRAY_SIZE(state->ff.tex_stage); ++s)
2866         memcpy(&context->ff.tex_stage[s], state->ff.tex_stage[s],
2867                sizeof(state->ff.tex_stage[s]));
2868 
2869     memset(&context->bumpmap_vars, 0, sizeof(context->bumpmap_vars));
2870 
2871     for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
2872         memcpy(&context->samp[s], nine_samp_state_defaults,
2873                sizeof(context->samp[s]));
2874         memcpy(&state->samp_advertised[s], nine_samp_state_defaults,
2875                sizeof(state->samp_advertised[s]));
2876     }
2877 
2878     memset(state->vs_const_f, 0, VS_CONST_F_SIZE(device));
2879     memset(context->vs_const_f, 0, device->vs_const_size);
2880     if (context->vs_const_f_swvp)
2881         memset(context->vs_const_f_swvp, 0, NINE_MAX_CONST_F_SWVP * sizeof(float[4]));
2882     memset(state->vs_const_i, 0, VS_CONST_I_SIZE(device));
2883     memset(context->vs_const_i, 0, VS_CONST_I_SIZE(device));
2884     memset(state->vs_const_b, 0, VS_CONST_B_SIZE(device));
2885     memset(context->vs_const_b, 0, VS_CONST_B_SIZE(device));
2886     memset(state->ps_const_f, 0, device->ps_const_size);
2887     memset(context->ps_const_f, 0, device->ps_const_size);
2888     memset(state->ps_const_i, 0, sizeof(state->ps_const_i));
2889     memset(context->ps_const_i, 0, sizeof(context->ps_const_i));
2890     memset(state->ps_const_b, 0, sizeof(state->ps_const_b));
2891     memset(context->ps_const_b, 0, sizeof(context->ps_const_b));
2892 
2893     /* Cap dependent initial state:
2894      */
2895     context->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
2896 
2897     memcpy(state->rs_advertised, context->rs, sizeof(context->rs));
2898 
2899     /* Set changed flags to initialize driver.
2900      */
2901     context->changed.group = NINE_STATE_ALL;
2902     context->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
2903     context->changed.ucp = TRUE;
2904 
2905     context->ff.changed.transform[0] = ~0;
2906     context->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
2907 
2908     if (!is_reset) {
2909         state->viewport.MinZ = context->viewport.MinZ = 0.0f;
2910         state->viewport.MaxZ = context->viewport.MaxZ = 1.0f;
2911     }
2912 
2913     for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
2914         context->changed.sampler[s] = ~0;
2915 
2916     if (!is_reset) {
2917         context->dummy_vbo_bound_at = -1;
2918         context->vbo_bound_done = FALSE;
2919     }
2920 }
2921 
2922 void
nine_device_state_clear(struct NineDevice9 * device)2923 nine_device_state_clear(struct NineDevice9 *device)
2924 {
2925     struct nine_state *state = &device->state;
2926     unsigned i;
2927 
2928     for (i = 0; i < ARRAY_SIZE(state->rt); ++i)
2929        nine_bind(&state->rt[i], NULL);
2930     nine_bind(&state->ds, NULL);
2931     nine_bind(&state->vs, NULL);
2932     nine_bind(&state->ps, NULL);
2933     nine_bind(&state->vdecl, NULL);
2934     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
2935         NineBindBufferToDevice(device,
2936                                (struct NineBuffer9 **)&state->stream[i],
2937                                NULL);
2938     NineBindBufferToDevice(device,
2939                            (struct NineBuffer9 **)&state->idxbuf,
2940                            NULL);
2941 
2942     for (i = 0; i < NINE_MAX_SAMPLERS; ++i)
2943         NineBindTextureToDevice(device, &state->texture[i], NULL);
2944 }
2945 
2946 void
nine_context_clear(struct NineDevice9 * device)2947 nine_context_clear(struct NineDevice9 *device)
2948 {
2949     struct nine_context *context = &device->context;
2950     struct pipe_context *pipe = context->pipe;
2951     struct cso_context *cso = context->cso;
2952     unsigned i;
2953 
2954     /* Early device ctor failure. Nothing to do */
2955     if (!pipe || !cso)
2956         return;
2957 
2958     pipe->bind_vs_state(pipe, NULL);
2959     pipe->bind_fs_state(pipe, NULL);
2960 
2961     /* Don't unbind constant buffers, they're device-private and
2962      * do not change on Reset.
2963      */
2964 
2965     cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL);
2966     cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
2967     context->enabled_sampler_count_vs = 0;
2968     context->enabled_sampler_count_ps = 0;
2969 
2970     pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, 0,
2971                             NINE_MAX_SAMPLERS_VS, false, NULL);
2972     pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0,
2973                             NINE_MAX_SAMPLERS_PS, false, NULL);
2974 
2975     pipe->set_vertex_buffers(pipe, 0, 0, device->caps.MaxStreams, false, NULL);
2976 
2977     for (i = 0; i < ARRAY_SIZE(context->rt); ++i)
2978        nine_bind(&context->rt[i], NULL);
2979     nine_bind(&context->ds, NULL);
2980     nine_bind(&context->vs, NULL);
2981     nine_bind(&context->ps, NULL);
2982     nine_bind(&context->vdecl, NULL);
2983     for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
2984         pipe_vertex_buffer_unreference(&context->vtxbuf[i]);
2985     pipe_resource_reference(&context->idxbuf, NULL);
2986     pipe_resource_reference(&context->pipe_data.cb_vs.buffer, NULL);
2987     pipe_resource_reference(&context->pipe_data.cb_ps.buffer, NULL);
2988 
2989     for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
2990         context->texture[i].enabled = FALSE;
2991         pipe_resource_reference(&context->texture[i].resource,
2992                                 NULL);
2993         pipe_sampler_view_reference(&context->texture[i].view[0],
2994                                     NULL);
2995         pipe_sampler_view_reference(&context->texture[i].view[1],
2996                                     NULL);
2997     }
2998 }
2999 
3000 void
nine_context_update_state(struct NineDevice9 * device)3001 nine_context_update_state(struct NineDevice9 *device)
3002 {
3003     nine_update_state(device);
3004 }
3005 
3006 void
nine_state_init_sw(struct NineDevice9 * device)3007 nine_state_init_sw(struct NineDevice9 *device)
3008 {
3009     struct pipe_context *pipe_sw = device->pipe_sw;
3010     struct pipe_rasterizer_state rast;
3011     struct pipe_blend_state blend;
3012     struct pipe_depth_stencil_alpha_state dsa;
3013     struct pipe_framebuffer_state fb;
3014 
3015     /* Only used with Streamout */
3016     memset(&rast, 0, sizeof(rast));
3017     rast.rasterizer_discard = true;
3018     rast.point_quad_rasterization = 1; /* to make llvmpipe happy */
3019     cso_set_rasterizer(device->cso_sw, &rast);
3020 
3021     /* dummy settings */
3022     memset(&blend, 0, sizeof(blend));
3023     memset(&dsa, 0, sizeof(dsa));
3024     memset(&fb, 0, sizeof(fb));
3025     cso_set_blend(device->cso_sw, &blend);
3026     cso_set_depth_stencil_alpha(device->cso_sw, &dsa);
3027     cso_set_framebuffer(device->cso_sw, &fb);
3028     cso_set_viewport_dims(device->cso_sw, 1.0, 1.0, false);
3029     cso_set_fragment_shader_handle(device->cso_sw, util_make_empty_fragment_shader(pipe_sw));
3030 }
3031 
3032 /* There is duplication with update_vertex_elements.
3033  * TODO: Share the code */
3034 
3035 static void
update_vertex_elements_sw(struct NineDevice9 * device)3036 update_vertex_elements_sw(struct NineDevice9 *device)
3037 {
3038     struct nine_state *state = &device->state;
3039     const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
3040     const struct NineVertexShader9 *vs;
3041     unsigned n, b, i;
3042     int index;
3043     char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
3044     char used_streams[device->caps.MaxStreams];
3045     int dummy_vbo_stream = -1;
3046     BOOL need_dummy_vbo = FALSE;
3047     struct cso_velems_state ve;
3048     bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
3049 
3050     memset(vdecl_index_map, -1, 16);
3051     memset(used_streams, 0, device->caps.MaxStreams);
3052     vs = programmable_vs ? device->state.vs : device->ff.vs;
3053 
3054     if (vdecl) {
3055         for (n = 0; n < vs->num_inputs; ++n) {
3056             DBG("looking up input %u (usage %u) from vdecl(%p)\n",
3057                 n, vs->input_map[n].ndecl, vdecl);
3058 
3059             for (i = 0; i < vdecl->nelems; i++) {
3060                 if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
3061                     vdecl_index_map[n] = i;
3062                     used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
3063                     break;
3064                 }
3065             }
3066             if (vdecl_index_map[n] < 0)
3067                 need_dummy_vbo = TRUE;
3068         }
3069     } else {
3070         /* No vertex declaration. Likely will never happen in practice,
3071          * but we need not crash on this */
3072         need_dummy_vbo = TRUE;
3073     }
3074 
3075     if (need_dummy_vbo) {
3076         for (i = 0; i < device->caps.MaxStreams; i++ ) {
3077             if (!used_streams[i]) {
3078                 dummy_vbo_stream = i;
3079                 break;
3080             }
3081         }
3082     }
3083     /* TODO handle dummy_vbo */
3084     assert (!need_dummy_vbo);
3085 
3086     for (n = 0; n < vs->num_inputs; ++n) {
3087         index = vdecl_index_map[n];
3088         if (index >= 0) {
3089             ve.velems[n] = vdecl->elems[index];
3090             b = ve.velems[n].vertex_buffer_index;
3091             /* XXX wine just uses 1 here: */
3092             if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
3093                 ve.velems[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
3094         } else {
3095             /* if the vertex declaration is incomplete compared to what the
3096              * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
3097              * This is not precised by the spec, but is the behaviour
3098              * tested on win */
3099             ve.velems[n].vertex_buffer_index = dummy_vbo_stream;
3100             ve.velems[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
3101             ve.velems[n].src_offset = 0;
3102             ve.velems[n].instance_divisor = 0;
3103             ve.velems[n].dual_slot = false;
3104         }
3105     }
3106 
3107     ve.count = vs->num_inputs;
3108     cso_set_vertex_elements(device->cso_sw, &ve);
3109 }
3110 
3111 static void
update_vertex_buffers_sw(struct NineDevice9 * device,int start_vertice,int num_vertices)3112 update_vertex_buffers_sw(struct NineDevice9 *device, int start_vertice, int num_vertices)
3113 {
3114     struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
3115     struct pipe_context *pipe_sw = device->pipe_sw;
3116     struct nine_state *state = &device->state;
3117     struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
3118     struct pipe_vertex_buffer vtxbuf;
3119     uint32_t mask = 0xf;
3120     unsigned i;
3121 
3122     DBG("mask=%x\n", mask);
3123 
3124     /* TODO: handle dummy_vbo_bound_at */
3125 
3126     for (i = 0; mask; mask >>= 1, ++i) {
3127         if (mask & 1) {
3128             if (state->stream[i]) {
3129                 unsigned offset;
3130                 struct pipe_resource *buf;
3131                 struct pipe_box box;
3132                 void *userbuf;
3133 
3134                 vtxbuf = state->vtxbuf[i];
3135                 buf = NineVertexBuffer9_GetResource(state->stream[i], &offset);
3136 
3137                 DBG("Locking %p (offset %d, length %d)\n", buf,
3138                     vtxbuf.buffer_offset, num_vertices * vtxbuf.stride);
3139 
3140                 u_box_1d(vtxbuf.buffer_offset + offset + start_vertice * vtxbuf.stride,
3141                          num_vertices * vtxbuf.stride, &box);
3142 
3143                 userbuf = pipe->buffer_map(pipe, buf, 0, PIPE_MAP_READ, &box,
3144                                              &(sw_internal->transfers_so[i]));
3145                 vtxbuf.is_user_buffer = true;
3146                 vtxbuf.buffer.user = userbuf;
3147 
3148                 if (!device->driver_caps.user_sw_vbufs) {
3149                     vtxbuf.buffer.resource = NULL;
3150                     vtxbuf.is_user_buffer = false;
3151                     u_upload_data(device->pipe_sw->stream_uploader,
3152                                   0,
3153                                   box.width,
3154                                   16,
3155                                   userbuf,
3156                                   &(vtxbuf.buffer_offset),
3157                                   &(vtxbuf.buffer.resource));
3158                     u_upload_unmap(device->pipe_sw->stream_uploader);
3159                 }
3160                 pipe_sw->set_vertex_buffers(pipe_sw, i, 1, 0, false, &vtxbuf);
3161                 pipe_vertex_buffer_unreference(&vtxbuf);
3162             } else
3163                 pipe_sw->set_vertex_buffers(pipe_sw, i, 0, 1, false, NULL);
3164         }
3165     }
3166     nine_context_get_pipe_release(device);
3167 }
3168 
3169 static void
update_vs_constants_sw(struct NineDevice9 * device)3170 update_vs_constants_sw(struct NineDevice9 *device)
3171 {
3172     struct nine_state *state = &device->state;
3173     struct pipe_context *pipe_sw = device->pipe_sw;
3174 
3175     DBG("updating\n");
3176 
3177     {
3178         struct pipe_constant_buffer cb;
3179         const void *buf;
3180 
3181         cb.buffer = NULL;
3182         cb.buffer_offset = 0;
3183         cb.buffer_size = 4096 * sizeof(float[4]);
3184         cb.user_buffer = state->vs_const_f;
3185 
3186         if (state->vs->lconstf.ranges) {
3187             const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
3188             const struct nine_range *r = lconstf->ranges;
3189             unsigned n = 0;
3190             float *dst = device->state.vs_lconstf_temp;
3191             float *src = (float *)cb.user_buffer;
3192             memcpy(dst, src, 8192 * sizeof(float[4]));
3193             while (r) {
3194                 unsigned p = r->bgn;
3195                 unsigned c = r->end - r->bgn;
3196                 memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
3197                 n += c;
3198                 r = r->next;
3199             }
3200             cb.user_buffer = dst;
3201         }
3202 
3203         buf = cb.user_buffer;
3204 
3205         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 0, false, &cb);
3206         if (cb.buffer)
3207             pipe_resource_reference(&cb.buffer, NULL);
3208 
3209         cb.user_buffer = (char *)buf + 4096 * sizeof(float[4]);
3210 
3211         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 1, false, &cb);
3212         if (cb.buffer)
3213             pipe_resource_reference(&cb.buffer, NULL);
3214     }
3215 
3216     {
3217         struct pipe_constant_buffer cb;
3218 
3219         cb.buffer = NULL;
3220         cb.buffer_offset = 0;
3221         cb.buffer_size = 2048 * sizeof(float[4]);
3222         cb.user_buffer = state->vs_const_i;
3223 
3224         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 2, false, &cb);
3225         if (cb.buffer)
3226             pipe_resource_reference(&cb.buffer, NULL);
3227     }
3228 
3229     {
3230         struct pipe_constant_buffer cb;
3231 
3232         cb.buffer = NULL;
3233         cb.buffer_offset = 0;
3234         cb.buffer_size = 512 * sizeof(float[4]);
3235         cb.user_buffer = state->vs_const_b;
3236 
3237         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 3, false, &cb);
3238         if (cb.buffer)
3239             pipe_resource_reference(&cb.buffer, NULL);
3240     }
3241 
3242     {
3243         struct pipe_constant_buffer cb;
3244         const D3DVIEWPORT9 *vport = &device->state.viewport;
3245         float viewport_data[8] = {(float)vport->Width * 0.5f,
3246             (float)vport->Height * -0.5f, vport->MaxZ - vport->MinZ, 0.f,
3247             (float)vport->Width * 0.5f + (float)vport->X,
3248             (float)vport->Height * 0.5f + (float)vport->Y,
3249             vport->MinZ, 0.f};
3250 
3251         cb.buffer = NULL;
3252         cb.buffer_offset = 0;
3253         cb.buffer_size = 2 * sizeof(float[4]);
3254         cb.user_buffer = viewport_data;
3255 
3256         {
3257             u_upload_data(device->pipe_sw->const_uploader,
3258                           0,
3259                           cb.buffer_size,
3260                           16,
3261                           cb.user_buffer,
3262                           &(cb.buffer_offset),
3263                           &(cb.buffer));
3264             u_upload_unmap(device->pipe_sw->const_uploader);
3265             cb.user_buffer = NULL;
3266         }
3267 
3268         pipe_sw->set_constant_buffer(pipe_sw, PIPE_SHADER_VERTEX, 4, false, &cb);
3269         if (cb.buffer)
3270             pipe_resource_reference(&cb.buffer, NULL);
3271     }
3272 
3273 }
3274 
3275 void
nine_state_prepare_draw_sw(struct NineDevice9 * device,struct NineVertexDeclaration9 * vdecl_out,int start_vertice,int num_vertices,struct pipe_stream_output_info * so)3276 nine_state_prepare_draw_sw(struct NineDevice9 *device, struct NineVertexDeclaration9 *vdecl_out,
3277                            int start_vertice, int num_vertices, struct pipe_stream_output_info *so)
3278 {
3279     struct nine_state *state = &device->state;
3280     bool programmable_vs = state->vs && !(state->vdecl && state->vdecl->position_t);
3281     struct NineVertexShader9 *vs = programmable_vs ? device->state.vs : device->ff.vs;
3282 
3283     assert(programmable_vs);
3284 
3285     DBG("Preparing draw\n");
3286     cso_set_vertex_shader_handle(device->cso_sw,
3287                                  NineVertexShader9_GetVariantProcessVertices(vs, vdecl_out, so));
3288     update_vertex_elements_sw(device);
3289     update_vertex_buffers_sw(device, start_vertice, num_vertices);
3290     update_vs_constants_sw(device);
3291     DBG("Preparation succeeded\n");
3292 }
3293 
3294 void
nine_state_after_draw_sw(struct NineDevice9 * device)3295 nine_state_after_draw_sw(struct NineDevice9 *device)
3296 {
3297     struct nine_state_sw_internal *sw_internal = &device->state_sw_internal;
3298     struct pipe_context *pipe = nine_context_get_pipe_acquire(device);
3299     struct pipe_context *pipe_sw = device->pipe_sw;
3300     int i;
3301 
3302     for (i = 0; i < 4; i++) {
3303         pipe_sw->set_vertex_buffers(pipe_sw, i, 0, 1, false, NULL);
3304         if (sw_internal->transfers_so[i])
3305             pipe->buffer_unmap(pipe, sw_internal->transfers_so[i]);
3306         sw_internal->transfers_so[i] = NULL;
3307     }
3308     nine_context_get_pipe_release(device);
3309 }
3310 
3311 void
nine_state_destroy_sw(struct NineDevice9 * device)3312 nine_state_destroy_sw(struct NineDevice9 *device)
3313 {
3314     (void) device;
3315     /* Everything destroyed with cso */
3316 }
3317 
3318 /*
3319 static const DWORD nine_render_states_pixel[] =
3320 {
3321     D3DRS_ALPHABLENDENABLE,
3322     D3DRS_ALPHAFUNC,
3323     D3DRS_ALPHAREF,
3324     D3DRS_ALPHATESTENABLE,
3325     D3DRS_ANTIALIASEDLINEENABLE,
3326     D3DRS_BLENDFACTOR,
3327     D3DRS_BLENDOP,
3328     D3DRS_BLENDOPALPHA,
3329     D3DRS_CCW_STENCILFAIL,
3330     D3DRS_CCW_STENCILPASS,
3331     D3DRS_CCW_STENCILZFAIL,
3332     D3DRS_COLORWRITEENABLE,
3333     D3DRS_COLORWRITEENABLE1,
3334     D3DRS_COLORWRITEENABLE2,
3335     D3DRS_COLORWRITEENABLE3,
3336     D3DRS_DEPTHBIAS,
3337     D3DRS_DESTBLEND,
3338     D3DRS_DESTBLENDALPHA,
3339     D3DRS_DITHERENABLE,
3340     D3DRS_FILLMODE,
3341     D3DRS_FOGDENSITY,
3342     D3DRS_FOGEND,
3343     D3DRS_FOGSTART,
3344     D3DRS_LASTPIXEL,
3345     D3DRS_SCISSORTESTENABLE,
3346     D3DRS_SEPARATEALPHABLENDENABLE,
3347     D3DRS_SHADEMODE,
3348     D3DRS_SLOPESCALEDEPTHBIAS,
3349     D3DRS_SRCBLEND,
3350     D3DRS_SRCBLENDALPHA,
3351     D3DRS_SRGBWRITEENABLE,
3352     D3DRS_STENCILENABLE,
3353     D3DRS_STENCILFAIL,
3354     D3DRS_STENCILFUNC,
3355     D3DRS_STENCILMASK,
3356     D3DRS_STENCILPASS,
3357     D3DRS_STENCILREF,
3358     D3DRS_STENCILWRITEMASK,
3359     D3DRS_STENCILZFAIL,
3360     D3DRS_TEXTUREFACTOR,
3361     D3DRS_TWOSIDEDSTENCILMODE,
3362     D3DRS_WRAP0,
3363     D3DRS_WRAP1,
3364     D3DRS_WRAP10,
3365     D3DRS_WRAP11,
3366     D3DRS_WRAP12,
3367     D3DRS_WRAP13,
3368     D3DRS_WRAP14,
3369     D3DRS_WRAP15,
3370     D3DRS_WRAP2,
3371     D3DRS_WRAP3,
3372     D3DRS_WRAP4,
3373     D3DRS_WRAP5,
3374     D3DRS_WRAP6,
3375     D3DRS_WRAP7,
3376     D3DRS_WRAP8,
3377     D3DRS_WRAP9,
3378     D3DRS_ZENABLE,
3379     D3DRS_ZFUNC,
3380     D3DRS_ZWRITEENABLE
3381 };
3382 */
3383 const uint32_t nine_render_states_pixel[(NINED3DRS_LAST + 31) / 32] =
3384 {
3385     0x0f99c380, 0x1ff00070, 0x00000000, 0x00000000,
3386     0x000000ff, 0xde01c900, 0x0003ffcf
3387 };
3388 
3389 /*
3390 static const DWORD nine_render_states_vertex[] =
3391 {
3392     D3DRS_ADAPTIVETESS_W,
3393     D3DRS_ADAPTIVETESS_X,
3394     D3DRS_ADAPTIVETESS_Y,
3395     D3DRS_ADAPTIVETESS_Z,
3396     D3DRS_AMBIENT,
3397     D3DRS_AMBIENTMATERIALSOURCE,
3398     D3DRS_CLIPPING,
3399     D3DRS_CLIPPLANEENABLE,
3400     D3DRS_COLORVERTEX,
3401     D3DRS_CULLMODE,
3402     D3DRS_DIFFUSEMATERIALSOURCE,
3403     D3DRS_EMISSIVEMATERIALSOURCE,
3404     D3DRS_ENABLEADAPTIVETESSELLATION,
3405     D3DRS_FOGCOLOR,
3406     D3DRS_FOGDENSITY,
3407     D3DRS_FOGENABLE,
3408     D3DRS_FOGEND,
3409     D3DRS_FOGSTART,
3410     D3DRS_FOGTABLEMODE,
3411     D3DRS_FOGVERTEXMODE,
3412     D3DRS_INDEXEDVERTEXBLENDENABLE,
3413     D3DRS_LIGHTING,
3414     D3DRS_LOCALVIEWER,
3415     D3DRS_MAXTESSELLATIONLEVEL,
3416     D3DRS_MINTESSELLATIONLEVEL,
3417     D3DRS_MULTISAMPLEANTIALIAS,
3418     D3DRS_MULTISAMPLEMASK,
3419     D3DRS_NORMALDEGREE,
3420     D3DRS_NORMALIZENORMALS,
3421     D3DRS_PATCHEDGESTYLE,
3422     D3DRS_POINTSCALE_A,
3423     D3DRS_POINTSCALE_B,
3424     D3DRS_POINTSCALE_C,
3425     D3DRS_POINTSCALEENABLE,
3426     D3DRS_POINTSIZE,
3427     D3DRS_POINTSIZE_MAX,
3428     D3DRS_POINTSIZE_MIN,
3429     D3DRS_POINTSPRITEENABLE,
3430     D3DRS_POSITIONDEGREE,
3431     D3DRS_RANGEFOGENABLE,
3432     D3DRS_SHADEMODE,
3433     D3DRS_SPECULARENABLE,
3434     D3DRS_SPECULARMATERIALSOURCE,
3435     D3DRS_TWEENFACTOR,
3436     D3DRS_VERTEXBLEND
3437 };
3438 */
3439 const uint32_t nine_render_states_vertex[(NINED3DRS_LAST + 31) / 32] =
3440 {
3441     0x30400200, 0x0001007c, 0x00000000, 0x00000000,
3442     0xfd9efb00, 0x01fc34cf, 0x00000000
3443 };
3444 
3445 /* TODO: put in the right values */
3446 const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] =
3447 {
3448     [D3DRS_ZENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
3449     [D3DRS_FILLMODE] = NINE_STATE_RASTERIZER,
3450     [D3DRS_SHADEMODE] = NINE_STATE_RASTERIZER,
3451     [D3DRS_ZWRITEENABLE] = NINE_STATE_DSA,
3452     [D3DRS_ALPHATESTENABLE] = NINE_STATE_DSA,
3453     [D3DRS_LASTPIXEL] = NINE_STATE_RASTERIZER,
3454     [D3DRS_SRCBLEND] = NINE_STATE_BLEND,
3455     [D3DRS_DESTBLEND] = NINE_STATE_BLEND,
3456     [D3DRS_CULLMODE] = NINE_STATE_RASTERIZER,
3457     [D3DRS_ZFUNC] = NINE_STATE_DSA,
3458     [D3DRS_ALPHAREF] = NINE_STATE_DSA,
3459     [D3DRS_ALPHAFUNC] = NINE_STATE_DSA,
3460     [D3DRS_DITHERENABLE] = NINE_STATE_BLEND,
3461     [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND,
3462     [D3DRS_FOGENABLE] = NINE_STATE_FF_SHADER | NINE_STATE_VS_PARAMS_MISC | NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST,
3463     [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING,
3464     [D3DRS_FOGCOLOR] = NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3465     [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_SHADER | NINE_STATE_PS_PARAMS_MISC | NINE_STATE_PS_CONST,
3466     [D3DRS_FOGSTART] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3467     [D3DRS_FOGEND] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3468     [D3DRS_FOGDENSITY] = NINE_STATE_FF_VS_OTHER | NINE_STATE_FF_PS_CONSTS | NINE_STATE_PS_CONST,
3469     [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_SHADER,
3470     [D3DRS_STENCILENABLE] = NINE_STATE_DSA | NINE_STATE_MULTISAMPLE,
3471     [D3DRS_STENCILFAIL] = NINE_STATE_DSA,
3472     [D3DRS_STENCILZFAIL] = NINE_STATE_DSA,
3473     [D3DRS_STENCILPASS] = NINE_STATE_DSA,
3474     [D3DRS_STENCILFUNC] = NINE_STATE_DSA,
3475     [D3DRS_STENCILREF] = NINE_STATE_STENCIL_REF,
3476     [D3DRS_STENCILMASK] = NINE_STATE_DSA,
3477     [D3DRS_STENCILWRITEMASK] = NINE_STATE_DSA,
3478     [D3DRS_TEXTUREFACTOR] = NINE_STATE_FF_PS_CONSTS,
3479     [D3DRS_WRAP0] = NINE_STATE_UNHANDLED, /* cylindrical wrap is crazy */
3480     [D3DRS_WRAP1] = NINE_STATE_UNHANDLED,
3481     [D3DRS_WRAP2] = NINE_STATE_UNHANDLED,
3482     [D3DRS_WRAP3] = NINE_STATE_UNHANDLED,
3483     [D3DRS_WRAP4] = NINE_STATE_UNHANDLED,
3484     [D3DRS_WRAP5] = NINE_STATE_UNHANDLED,
3485     [D3DRS_WRAP6] = NINE_STATE_UNHANDLED,
3486     [D3DRS_WRAP7] = NINE_STATE_UNHANDLED,
3487     [D3DRS_CLIPPING] = 0, /* software vertex processing only */
3488     [D3DRS_LIGHTING] = NINE_STATE_FF_LIGHTING,
3489     [D3DRS_AMBIENT] = NINE_STATE_FF_LIGHTING | NINE_STATE_FF_MATERIAL,
3490     [D3DRS_FOGVERTEXMODE] = NINE_STATE_FF_SHADER,
3491     [D3DRS_COLORVERTEX] = NINE_STATE_FF_LIGHTING,
3492     [D3DRS_LOCALVIEWER] = NINE_STATE_FF_LIGHTING,
3493     [D3DRS_NORMALIZENORMALS] = NINE_STATE_FF_SHADER,
3494     [D3DRS_DIFFUSEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3495     [D3DRS_SPECULARMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3496     [D3DRS_AMBIENTMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3497     [D3DRS_EMISSIVEMATERIALSOURCE] = NINE_STATE_FF_LIGHTING,
3498     [D3DRS_VERTEXBLEND] = NINE_STATE_FF_SHADER,
3499     [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
3500     [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER,
3501     [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_PARAMS_MISC,
3502     [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
3503     [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_SHADER,
3504     [D3DRS_POINTSCALE_A] = NINE_STATE_FF_VS_OTHER,
3505     [D3DRS_POINTSCALE_B] = NINE_STATE_FF_VS_OTHER,
3506     [D3DRS_POINTSCALE_C] = NINE_STATE_FF_VS_OTHER,
3507     [D3DRS_MULTISAMPLEANTIALIAS] = NINE_STATE_MULTISAMPLE,
3508     [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
3509     [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
3510     [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
3511     [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER | NINE_STATE_FF_VS_OTHER | NINE_STATE_VS_PARAMS_MISC,
3512     [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_SHADER,
3513     [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
3514     [D3DRS_TWEENFACTOR] = NINE_STATE_FF_VS_OTHER,
3515     [D3DRS_BLENDOP] = NINE_STATE_BLEND,
3516     [D3DRS_POSITIONDEGREE] = NINE_STATE_UNHANDLED,
3517     [D3DRS_NORMALDEGREE] = NINE_STATE_UNHANDLED,
3518     [D3DRS_SCISSORTESTENABLE] = NINE_STATE_RASTERIZER,
3519     [D3DRS_SLOPESCALEDEPTHBIAS] = NINE_STATE_RASTERIZER,
3520     [D3DRS_ANTIALIASEDLINEENABLE] = NINE_STATE_RASTERIZER,
3521     [D3DRS_MINTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
3522     [D3DRS_MAXTESSELLATIONLEVEL] = NINE_STATE_UNHANDLED,
3523     [D3DRS_ADAPTIVETESS_X] = NINE_STATE_UNHANDLED,
3524     [D3DRS_ADAPTIVETESS_Y] = NINE_STATE_UNHANDLED,
3525     [D3DRS_ADAPTIVETESS_Z] = NINE_STATE_UNHANDLED,
3526     [D3DRS_ADAPTIVETESS_W] = NINE_STATE_UNHANDLED,
3527     [D3DRS_ENABLEADAPTIVETESSELLATION] = NINE_STATE_UNHANDLED,
3528     [D3DRS_TWOSIDEDSTENCILMODE] = NINE_STATE_DSA,
3529     [D3DRS_CCW_STENCILFAIL] = NINE_STATE_DSA,
3530     [D3DRS_CCW_STENCILZFAIL] = NINE_STATE_DSA,
3531     [D3DRS_CCW_STENCILPASS] = NINE_STATE_DSA,
3532     [D3DRS_CCW_STENCILFUNC] = NINE_STATE_DSA,
3533     [D3DRS_COLORWRITEENABLE1] = NINE_STATE_BLEND,
3534     [D3DRS_COLORWRITEENABLE2] = NINE_STATE_BLEND,
3535     [D3DRS_COLORWRITEENABLE3] = NINE_STATE_BLEND,
3536     [D3DRS_BLENDFACTOR] = NINE_STATE_BLEND_COLOR,
3537     [D3DRS_SRGBWRITEENABLE] = NINE_STATE_FB,
3538     [D3DRS_DEPTHBIAS] = NINE_STATE_RASTERIZER,
3539     [D3DRS_WRAP8] = NINE_STATE_UNHANDLED, /* cylwrap has to be done via GP */
3540     [D3DRS_WRAP9] = NINE_STATE_UNHANDLED,
3541     [D3DRS_WRAP10] = NINE_STATE_UNHANDLED,
3542     [D3DRS_WRAP11] = NINE_STATE_UNHANDLED,
3543     [D3DRS_WRAP12] = NINE_STATE_UNHANDLED,
3544     [D3DRS_WRAP13] = NINE_STATE_UNHANDLED,
3545     [D3DRS_WRAP14] = NINE_STATE_UNHANDLED,
3546     [D3DRS_WRAP15] = NINE_STATE_UNHANDLED,
3547     [D3DRS_SEPARATEALPHABLENDENABLE] = NINE_STATE_BLEND,
3548     [D3DRS_SRCBLENDALPHA] = NINE_STATE_BLEND,
3549     [D3DRS_DESTBLENDALPHA] = NINE_STATE_BLEND,
3550     [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND
3551 };
3552 
3553 /* Misc */
3554 
3555 static D3DMATRIX nine_state_identity = { .m[0] = { 1, 0, 0, 0 },
3556                                          .m[1] = { 0, 1, 0, 0 },
3557                                          .m[2] = { 0, 0, 1, 0 },
3558                                          .m[3] = { 0, 0, 0, 1 } };
3559 
3560 void
nine_state_resize_transform(struct nine_ff_state * ff_state,unsigned N)3561 nine_state_resize_transform(struct nine_ff_state *ff_state, unsigned N)
3562 {
3563     unsigned n = ff_state->num_transforms;
3564 
3565     if (N <= n)
3566         return;
3567 
3568     ff_state->transform = REALLOC(ff_state->transform,
3569                                   n * sizeof(D3DMATRIX),
3570                                   N * sizeof(D3DMATRIX));
3571     for (; n < N; ++n)
3572         ff_state->transform[n] = nine_state_identity;
3573     ff_state->num_transforms = N;
3574 }
3575 
3576 D3DMATRIX *
nine_state_access_transform(struct nine_ff_state * ff_state,D3DTRANSFORMSTATETYPE t,boolean alloc)3577 nine_state_access_transform(struct nine_ff_state *ff_state, D3DTRANSFORMSTATETYPE t,
3578                             boolean alloc)
3579 {
3580     unsigned index;
3581 
3582     switch (t) {
3583     case D3DTS_VIEW: index = 0; break;
3584     case D3DTS_PROJECTION: index = 1; break;
3585     case D3DTS_TEXTURE0: index = 2; break;
3586     case D3DTS_TEXTURE1: index = 3; break;
3587     case D3DTS_TEXTURE2: index = 4; break;
3588     case D3DTS_TEXTURE3: index = 5; break;
3589     case D3DTS_TEXTURE4: index = 6; break;
3590     case D3DTS_TEXTURE5: index = 7; break;
3591     case D3DTS_TEXTURE6: index = 8; break;
3592     case D3DTS_TEXTURE7: index = 9; break;
3593     default:
3594         if (!(t >= D3DTS_WORLDMATRIX(0) && t <= D3DTS_WORLDMATRIX(255)))
3595             return NULL;
3596         index = 10 + (t - D3DTS_WORLDMATRIX(0));
3597         break;
3598     }
3599 
3600     if (index >= ff_state->num_transforms) {
3601         if (!alloc)
3602             return &nine_state_identity;
3603         nine_state_resize_transform(ff_state, index + 1);
3604     }
3605     return &ff_state->transform[index];
3606 }
3607 
3608 HRESULT
nine_state_set_light(struct nine_ff_state * ff_state,DWORD Index,const D3DLIGHT9 * pLight)3609 nine_state_set_light(struct nine_ff_state *ff_state, DWORD Index,
3610                      const D3DLIGHT9 *pLight)
3611 {
3612     if (Index >= ff_state->num_lights) {
3613         unsigned n = ff_state->num_lights;
3614         unsigned N = Index + 1;
3615 
3616         ff_state->light = REALLOC(ff_state->light, n * sizeof(D3DLIGHT9),
3617                                                    N * sizeof(D3DLIGHT9));
3618         if (!ff_state->light)
3619             return E_OUTOFMEMORY;
3620         ff_state->num_lights = N;
3621 
3622         for (; n < Index; ++n) {
3623             memset(&ff_state->light[n], 0, sizeof(D3DLIGHT9));
3624             ff_state->light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
3625         }
3626     }
3627     ff_state->light[Index] = *pLight;
3628 
3629     if (pLight->Type == D3DLIGHT_SPOT && pLight->Theta >= pLight->Phi) {
3630         DBG("Warning: clamping D3DLIGHT9.Theta\n");
3631         ff_state->light[Index].Theta = ff_state->light[Index].Phi;
3632     }
3633     return D3D_OK;
3634 }
3635 
3636 HRESULT
nine_state_light_enable(struct nine_ff_state * ff_state,DWORD Index,BOOL Enable)3637 nine_state_light_enable(struct nine_ff_state *ff_state,
3638                         DWORD Index, BOOL Enable)
3639 {
3640     unsigned i;
3641 
3642     user_assert(Index < ff_state->num_lights, D3DERR_INVALIDCALL);
3643 
3644     for (i = 0; i < ff_state->num_lights_active; ++i) {
3645         if (ff_state->active_light[i] == Index)
3646             break;
3647     }
3648 
3649     if (Enable) {
3650         if (i < ff_state->num_lights_active)
3651             return D3D_OK;
3652         /* XXX wine thinks this should still succeed:
3653          */
3654         user_assert(i < NINE_MAX_LIGHTS_ACTIVE, D3DERR_INVALIDCALL);
3655 
3656         ff_state->active_light[i] = Index;
3657         ff_state->num_lights_active++;
3658     } else {
3659         if (i == ff_state->num_lights_active)
3660             return D3D_OK;
3661         --ff_state->num_lights_active;
3662         for (; i < ff_state->num_lights_active; ++i)
3663             ff_state->active_light[i] = ff_state->active_light[i + 1];
3664     }
3665 
3666     return D3D_OK;
3667 }
3668 
3669 #define D3DRS_TO_STRING_CASE(n) case D3DRS_##n: return "D3DRS_"#n
nine_d3drs_to_string(DWORD State)3670 const char *nine_d3drs_to_string(DWORD State)
3671 {
3672     switch (State) {
3673     D3DRS_TO_STRING_CASE(ZENABLE);
3674     D3DRS_TO_STRING_CASE(FILLMODE);
3675     D3DRS_TO_STRING_CASE(SHADEMODE);
3676     D3DRS_TO_STRING_CASE(ZWRITEENABLE);
3677     D3DRS_TO_STRING_CASE(ALPHATESTENABLE);
3678     D3DRS_TO_STRING_CASE(LASTPIXEL);
3679     D3DRS_TO_STRING_CASE(SRCBLEND);
3680     D3DRS_TO_STRING_CASE(DESTBLEND);
3681     D3DRS_TO_STRING_CASE(CULLMODE);
3682     D3DRS_TO_STRING_CASE(ZFUNC);
3683     D3DRS_TO_STRING_CASE(ALPHAREF);
3684     D3DRS_TO_STRING_CASE(ALPHAFUNC);
3685     D3DRS_TO_STRING_CASE(DITHERENABLE);
3686     D3DRS_TO_STRING_CASE(ALPHABLENDENABLE);
3687     D3DRS_TO_STRING_CASE(FOGENABLE);
3688     D3DRS_TO_STRING_CASE(SPECULARENABLE);
3689     D3DRS_TO_STRING_CASE(FOGCOLOR);
3690     D3DRS_TO_STRING_CASE(FOGTABLEMODE);
3691     D3DRS_TO_STRING_CASE(FOGSTART);
3692     D3DRS_TO_STRING_CASE(FOGEND);
3693     D3DRS_TO_STRING_CASE(FOGDENSITY);
3694     D3DRS_TO_STRING_CASE(RANGEFOGENABLE);
3695     D3DRS_TO_STRING_CASE(STENCILENABLE);
3696     D3DRS_TO_STRING_CASE(STENCILFAIL);
3697     D3DRS_TO_STRING_CASE(STENCILZFAIL);
3698     D3DRS_TO_STRING_CASE(STENCILPASS);
3699     D3DRS_TO_STRING_CASE(STENCILFUNC);
3700     D3DRS_TO_STRING_CASE(STENCILREF);
3701     D3DRS_TO_STRING_CASE(STENCILMASK);
3702     D3DRS_TO_STRING_CASE(STENCILWRITEMASK);
3703     D3DRS_TO_STRING_CASE(TEXTUREFACTOR);
3704     D3DRS_TO_STRING_CASE(WRAP0);
3705     D3DRS_TO_STRING_CASE(WRAP1);
3706     D3DRS_TO_STRING_CASE(WRAP2);
3707     D3DRS_TO_STRING_CASE(WRAP3);
3708     D3DRS_TO_STRING_CASE(WRAP4);
3709     D3DRS_TO_STRING_CASE(WRAP5);
3710     D3DRS_TO_STRING_CASE(WRAP6);
3711     D3DRS_TO_STRING_CASE(WRAP7);
3712     D3DRS_TO_STRING_CASE(CLIPPING);
3713     D3DRS_TO_STRING_CASE(LIGHTING);
3714     D3DRS_TO_STRING_CASE(AMBIENT);
3715     D3DRS_TO_STRING_CASE(FOGVERTEXMODE);
3716     D3DRS_TO_STRING_CASE(COLORVERTEX);
3717     D3DRS_TO_STRING_CASE(LOCALVIEWER);
3718     D3DRS_TO_STRING_CASE(NORMALIZENORMALS);
3719     D3DRS_TO_STRING_CASE(DIFFUSEMATERIALSOURCE);
3720     D3DRS_TO_STRING_CASE(SPECULARMATERIALSOURCE);
3721     D3DRS_TO_STRING_CASE(AMBIENTMATERIALSOURCE);
3722     D3DRS_TO_STRING_CASE(EMISSIVEMATERIALSOURCE);
3723     D3DRS_TO_STRING_CASE(VERTEXBLEND);
3724     D3DRS_TO_STRING_CASE(CLIPPLANEENABLE);
3725     D3DRS_TO_STRING_CASE(POINTSIZE);
3726     D3DRS_TO_STRING_CASE(POINTSIZE_MIN);
3727     D3DRS_TO_STRING_CASE(POINTSPRITEENABLE);
3728     D3DRS_TO_STRING_CASE(POINTSCALEENABLE);
3729     D3DRS_TO_STRING_CASE(POINTSCALE_A);
3730     D3DRS_TO_STRING_CASE(POINTSCALE_B);
3731     D3DRS_TO_STRING_CASE(POINTSCALE_C);
3732     D3DRS_TO_STRING_CASE(MULTISAMPLEANTIALIAS);
3733     D3DRS_TO_STRING_CASE(MULTISAMPLEMASK);
3734     D3DRS_TO_STRING_CASE(PATCHEDGESTYLE);
3735     D3DRS_TO_STRING_CASE(DEBUGMONITORTOKEN);
3736     D3DRS_TO_STRING_CASE(POINTSIZE_MAX);
3737     D3DRS_TO_STRING_CASE(INDEXEDVERTEXBLENDENABLE);
3738     D3DRS_TO_STRING_CASE(COLORWRITEENABLE);
3739     D3DRS_TO_STRING_CASE(TWEENFACTOR);
3740     D3DRS_TO_STRING_CASE(BLENDOP);
3741     D3DRS_TO_STRING_CASE(POSITIONDEGREE);
3742     D3DRS_TO_STRING_CASE(NORMALDEGREE);
3743     D3DRS_TO_STRING_CASE(SCISSORTESTENABLE);
3744     D3DRS_TO_STRING_CASE(SLOPESCALEDEPTHBIAS);
3745     D3DRS_TO_STRING_CASE(ANTIALIASEDLINEENABLE);
3746     D3DRS_TO_STRING_CASE(MINTESSELLATIONLEVEL);
3747     D3DRS_TO_STRING_CASE(MAXTESSELLATIONLEVEL);
3748     D3DRS_TO_STRING_CASE(ADAPTIVETESS_X);
3749     D3DRS_TO_STRING_CASE(ADAPTIVETESS_Y);
3750     D3DRS_TO_STRING_CASE(ADAPTIVETESS_Z);
3751     D3DRS_TO_STRING_CASE(ADAPTIVETESS_W);
3752     D3DRS_TO_STRING_CASE(ENABLEADAPTIVETESSELLATION);
3753     D3DRS_TO_STRING_CASE(TWOSIDEDSTENCILMODE);
3754     D3DRS_TO_STRING_CASE(CCW_STENCILFAIL);
3755     D3DRS_TO_STRING_CASE(CCW_STENCILZFAIL);
3756     D3DRS_TO_STRING_CASE(CCW_STENCILPASS);
3757     D3DRS_TO_STRING_CASE(CCW_STENCILFUNC);
3758     D3DRS_TO_STRING_CASE(COLORWRITEENABLE1);
3759     D3DRS_TO_STRING_CASE(COLORWRITEENABLE2);
3760     D3DRS_TO_STRING_CASE(COLORWRITEENABLE3);
3761     D3DRS_TO_STRING_CASE(BLENDFACTOR);
3762     D3DRS_TO_STRING_CASE(SRGBWRITEENABLE);
3763     D3DRS_TO_STRING_CASE(DEPTHBIAS);
3764     D3DRS_TO_STRING_CASE(WRAP8);
3765     D3DRS_TO_STRING_CASE(WRAP9);
3766     D3DRS_TO_STRING_CASE(WRAP10);
3767     D3DRS_TO_STRING_CASE(WRAP11);
3768     D3DRS_TO_STRING_CASE(WRAP12);
3769     D3DRS_TO_STRING_CASE(WRAP13);
3770     D3DRS_TO_STRING_CASE(WRAP14);
3771     D3DRS_TO_STRING_CASE(WRAP15);
3772     D3DRS_TO_STRING_CASE(SEPARATEALPHABLENDENABLE);
3773     D3DRS_TO_STRING_CASE(SRCBLENDALPHA);
3774     D3DRS_TO_STRING_CASE(DESTBLENDALPHA);
3775     D3DRS_TO_STRING_CASE(BLENDOPALPHA);
3776     default:
3777         return "(invalid)";
3778     }
3779 }
3780