1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "i965_yuv_coefs.h"
42 #include "intel_media.h"
43 #include "intel_gen_vppapi.h"
44 
45 #include "gen75_picture_process.h"
46 
47 extern VAStatus
48 vpp_surface_convert(VADriverContextP ctx,
49                     struct object_surface *src_obj_surf,
50                     struct object_surface *dst_obj_surf);
51 
52 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
53 
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
55             MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
56 
57 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
58 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
59 
60 #define GPU_ASM_BLOCK_WIDTH         16
61 #define GPU_ASM_BLOCK_HEIGHT        8
62 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
63 
64 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
65 
66 #define BIT_CAST(x) (((union{unsigned int a;int b:8;})x).b)
67 
68 static const uint32_t pp_null_gen5[][4] = {
69 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
70 };
71 
72 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
73 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
74 };
75 
76 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
77 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
78 };
79 
80 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
81 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
82 };
83 
84 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
85 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
86 };
87 
88 static const uint32_t pp_nv12_scaling_gen5[][4] = {
89 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
90 };
91 
92 static const uint32_t pp_nv12_avs_gen5[][4] = {
93 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
94 };
95 
96 static const uint32_t pp_nv12_dndi_gen5[][4] = {
97 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
98 };
99 
100 static const uint32_t pp_nv12_dn_gen5[][4] = {
101 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
102 };
103 
104 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
105 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
106 };
107 
108 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
109 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
110 };
111 
112 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
113 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
114 };
115 
116 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
117 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
118 };
119 
120 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
121 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
122 };
123 
124 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
125 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
126 };
127 
128 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
129 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
130 };
131 
132 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
133                                    const struct i965_surface *src_surface,
134                                    const VARectangle *src_rect,
135                                    struct i965_surface *dst_surface,
136                                    const VARectangle *dst_rect,
137                                    void *filter_param);
138 static VAStatus
139 pp_nv12_avs_initialize(VADriverContextP ctx,
140                        struct i965_post_processing_context *pp_context,
141                        const struct i965_surface *src_surface, const VARectangle *src_rect,
142                        struct i965_surface *dst_surface, const VARectangle *dst_rect,
143                        void *filter_param);
144 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
145                                            const struct i965_surface *src_surface,
146                                            const VARectangle *src_rect,
147                                            struct i965_surface *dst_surface,
148                                            const VARectangle *dst_rect,
149                                            void *filter_param);
150 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
151                                              const struct i965_surface *src_surface,
152                                              const VARectangle *src_rect,
153                                              struct i965_surface *dst_surface,
154                                              const VARectangle *dst_rect,
155                                              void *filter_param);
156 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
157                                                 const struct i965_surface *src_surface,
158                                                 const VARectangle *src_rect,
159                                                 struct i965_surface *dst_surface,
160                                                 const VARectangle *dst_rect,
161                                                 void *filter_param);
162 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
163                                         const struct i965_surface *src_surface,
164                                         const VARectangle *src_rect,
165                                         struct i965_surface *dst_surface,
166                                         const VARectangle *dst_rect,
167                                         void *filter_param);
168 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
169                                       const struct i965_surface *src_surface,
170                                       const VARectangle *src_rect,
171                                       struct i965_surface *dst_surface,
172                                       const VARectangle *dst_rect,
173                                       void *filter_param);
174 
175 static struct pp_module pp_modules_gen5[] = {
176     {
177         {
178             "NULL module (for testing)",
179             PP_NULL,
180             pp_null_gen5,
181             sizeof(pp_null_gen5),
182             NULL,
183         },
184 
185         pp_null_initialize,
186     },
187 
188     {
189         {
190             "NV12_NV12",
191             PP_NV12_LOAD_SAVE_N12,
192             pp_nv12_load_save_nv12_gen5,
193             sizeof(pp_nv12_load_save_nv12_gen5),
194             NULL,
195         },
196 
197         pp_plx_load_save_plx_initialize,
198     },
199 
200     {
201         {
202             "NV12_PL3",
203             PP_NV12_LOAD_SAVE_PL3,
204             pp_nv12_load_save_pl3_gen5,
205             sizeof(pp_nv12_load_save_pl3_gen5),
206             NULL,
207         },
208 
209         pp_plx_load_save_plx_initialize,
210     },
211 
212     {
213         {
214             "PL3_NV12",
215             PP_PL3_LOAD_SAVE_N12,
216             pp_pl3_load_save_nv12_gen5,
217             sizeof(pp_pl3_load_save_nv12_gen5),
218             NULL,
219         },
220 
221         pp_plx_load_save_plx_initialize,
222     },
223 
224     {
225         {
226             "PL3_PL3",
227             PP_PL3_LOAD_SAVE_PL3,
228             pp_pl3_load_save_pl3_gen5,
229             sizeof(pp_pl3_load_save_pl3_gen5),
230             NULL,
231         },
232 
233         pp_plx_load_save_plx_initialize
234     },
235 
236     {
237         {
238             "NV12 Scaling module",
239             PP_NV12_SCALING,
240             pp_nv12_scaling_gen5,
241             sizeof(pp_nv12_scaling_gen5),
242             NULL,
243         },
244 
245         pp_nv12_scaling_initialize,
246     },
247 
248     {
249         {
250             "NV12 AVS module",
251             PP_NV12_AVS,
252             pp_nv12_avs_gen5,
253             sizeof(pp_nv12_avs_gen5),
254             NULL,
255         },
256 
257         pp_nv12_avs_initialize,
258     },
259 
260     {
261         {
262             "NV12 DNDI module",
263             PP_NV12_DNDI,
264             pp_nv12_dndi_gen5,
265             sizeof(pp_nv12_dndi_gen5),
266             NULL,
267         },
268 
269         pp_nv12_dndi_initialize,
270     },
271 
272     {
273         {
274             "NV12 DN module",
275             PP_NV12_DN,
276             pp_nv12_dn_gen5,
277             sizeof(pp_nv12_dn_gen5),
278             NULL,
279         },
280 
281         pp_nv12_dn_initialize,
282     },
283 
284     {
285         {
286             "NV12_PA module",
287             PP_NV12_LOAD_SAVE_PA,
288             pp_nv12_load_save_pa_gen5,
289             sizeof(pp_nv12_load_save_pa_gen5),
290             NULL,
291         },
292 
293         pp_plx_load_save_plx_initialize,
294     },
295 
296     {
297         {
298             "PL3_PA module",
299             PP_PL3_LOAD_SAVE_PA,
300             pp_pl3_load_save_pa_gen5,
301             sizeof(pp_pl3_load_save_pa_gen5),
302             NULL,
303         },
304 
305         pp_plx_load_save_plx_initialize,
306     },
307 
308     {
309         {
310             "PA_NV12 module",
311             PP_PA_LOAD_SAVE_NV12,
312             pp_pa_load_save_nv12_gen5,
313             sizeof(pp_pa_load_save_nv12_gen5),
314             NULL,
315         },
316 
317         pp_plx_load_save_plx_initialize,
318     },
319 
320     {
321         {
322             "PA_PL3 module",
323             PP_PA_LOAD_SAVE_PL3,
324             pp_pa_load_save_pl3_gen5,
325             sizeof(pp_pa_load_save_pl3_gen5),
326             NULL,
327         },
328 
329         pp_plx_load_save_plx_initialize,
330     },
331 
332     {
333         {
334             "PA_PA module",
335             PP_PA_LOAD_SAVE_PA,
336             pp_pa_load_save_pa_gen5,
337             sizeof(pp_pa_load_save_pa_gen5),
338             NULL,
339         },
340 
341         pp_plx_load_save_plx_initialize,
342     },
343 
344     {
345         {
346             "RGBX_NV12 module",
347             PP_RGBX_LOAD_SAVE_NV12,
348             pp_rgbx_load_save_nv12_gen5,
349             sizeof(pp_rgbx_load_save_nv12_gen5),
350             NULL,
351         },
352 
353         pp_plx_load_save_plx_initialize,
354     },
355 
356     {
357         {
358             "NV12_RGBX module",
359             PP_NV12_LOAD_SAVE_RGBX,
360             pp_nv12_load_save_rgbx_gen5,
361             sizeof(pp_nv12_load_save_rgbx_gen5),
362             NULL,
363         },
364 
365         pp_plx_load_save_plx_initialize,
366     },
367 };
368 
369 static const uint32_t pp_null_gen6[][4] = {
370 #include "shaders/post_processing/gen5_6/null.g6b"
371 };
372 
373 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
374 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
375 };
376 
377 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
378 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
379 };
380 
381 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
382 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
383 };
384 
385 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
386 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
387 };
388 
389 static const uint32_t pp_nv12_scaling_gen6[][4] = {
390 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
391 };
392 
393 static const uint32_t pp_nv12_avs_gen6[][4] = {
394 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
395 };
396 
397 static const uint32_t pp_nv12_dndi_gen6[][4] = {
398 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
399 };
400 
401 static const uint32_t pp_nv12_dn_gen6[][4] = {
402 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
403 };
404 
405 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
406 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
407 };
408 
409 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
410 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
411 };
412 
413 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
414 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
415 };
416 
417 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
418 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
419 };
420 
421 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
422 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
423 };
424 
425 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
426 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
427 };
428 
429 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
430 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
431 };
432 
433 static struct pp_module pp_modules_gen6[] = {
434     {
435         {
436             "NULL module (for testing)",
437             PP_NULL,
438             pp_null_gen6,
439             sizeof(pp_null_gen6),
440             NULL,
441         },
442 
443         pp_null_initialize,
444     },
445 
446     {
447         {
448             "NV12_NV12",
449             PP_NV12_LOAD_SAVE_N12,
450             pp_nv12_load_save_nv12_gen6,
451             sizeof(pp_nv12_load_save_nv12_gen6),
452             NULL,
453         },
454 
455         pp_plx_load_save_plx_initialize,
456     },
457 
458     {
459         {
460             "NV12_PL3",
461             PP_NV12_LOAD_SAVE_PL3,
462             pp_nv12_load_save_pl3_gen6,
463             sizeof(pp_nv12_load_save_pl3_gen6),
464             NULL,
465         },
466 
467         pp_plx_load_save_plx_initialize,
468     },
469 
470     {
471         {
472             "PL3_NV12",
473             PP_PL3_LOAD_SAVE_N12,
474             pp_pl3_load_save_nv12_gen6,
475             sizeof(pp_pl3_load_save_nv12_gen6),
476             NULL,
477         },
478 
479         pp_plx_load_save_plx_initialize,
480     },
481 
482     {
483         {
484             "PL3_PL3",
485             PP_PL3_LOAD_SAVE_PL3,
486             pp_pl3_load_save_pl3_gen6,
487             sizeof(pp_pl3_load_save_pl3_gen6),
488             NULL,
489         },
490 
491         pp_plx_load_save_plx_initialize,
492     },
493 
494     {
495         {
496             "NV12 Scaling module",
497             PP_NV12_SCALING,
498             pp_nv12_scaling_gen6,
499             sizeof(pp_nv12_scaling_gen6),
500             NULL,
501         },
502 
503         gen6_nv12_scaling_initialize,
504     },
505 
506     {
507         {
508             "NV12 AVS module",
509             PP_NV12_AVS,
510             pp_nv12_avs_gen6,
511             sizeof(pp_nv12_avs_gen6),
512             NULL,
513         },
514 
515         pp_nv12_avs_initialize,
516     },
517 
518     {
519         {
520             "NV12 DNDI module",
521             PP_NV12_DNDI,
522             pp_nv12_dndi_gen6,
523             sizeof(pp_nv12_dndi_gen6),
524             NULL,
525         },
526 
527         pp_nv12_dndi_initialize,
528     },
529 
530     {
531         {
532             "NV12 DN module",
533             PP_NV12_DN,
534             pp_nv12_dn_gen6,
535             sizeof(pp_nv12_dn_gen6),
536             NULL,
537         },
538 
539         pp_nv12_dn_initialize,
540     },
541     {
542         {
543             "NV12_PA module",
544             PP_NV12_LOAD_SAVE_PA,
545             pp_nv12_load_save_pa_gen6,
546             sizeof(pp_nv12_load_save_pa_gen6),
547             NULL,
548         },
549 
550         pp_plx_load_save_plx_initialize,
551     },
552 
553     {
554         {
555             "PL3_PA module",
556             PP_PL3_LOAD_SAVE_PA,
557             pp_pl3_load_save_pa_gen6,
558             sizeof(pp_pl3_load_save_pa_gen6),
559             NULL,
560         },
561 
562         pp_plx_load_save_plx_initialize,
563     },
564 
565     {
566         {
567             "PA_NV12 module",
568             PP_PA_LOAD_SAVE_NV12,
569             pp_pa_load_save_nv12_gen6,
570             sizeof(pp_pa_load_save_nv12_gen6),
571             NULL,
572         },
573 
574         pp_plx_load_save_plx_initialize,
575     },
576 
577     {
578         {
579             "PA_PL3 module",
580             PP_PA_LOAD_SAVE_PL3,
581             pp_pa_load_save_pl3_gen6,
582             sizeof(pp_pa_load_save_pl3_gen6),
583             NULL,
584         },
585 
586         pp_plx_load_save_plx_initialize,
587     },
588 
589     {
590         {
591             "PA_PA module",
592             PP_PA_LOAD_SAVE_PA,
593             pp_pa_load_save_pa_gen6,
594             sizeof(pp_pa_load_save_pa_gen6),
595             NULL,
596         },
597 
598         pp_plx_load_save_plx_initialize,
599     },
600 
601     {
602         {
603             "RGBX_NV12 module",
604             PP_RGBX_LOAD_SAVE_NV12,
605             pp_rgbx_load_save_nv12_gen6,
606             sizeof(pp_rgbx_load_save_nv12_gen6),
607             NULL,
608         },
609 
610         pp_plx_load_save_plx_initialize,
611     },
612 
613     {
614         {
615             "NV12_RGBX module",
616             PP_NV12_LOAD_SAVE_RGBX,
617             pp_nv12_load_save_rgbx_gen6,
618             sizeof(pp_nv12_load_save_rgbx_gen6),
619             NULL,
620         },
621 
622         pp_plx_load_save_plx_initialize,
623     },
624 };
625 
626 static const uint32_t pp_null_gen7[][4] = {
627 };
628 
629 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
631 };
632 
633 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
634 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
635 };
636 
637 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
638 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
639 };
640 
641 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
642 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
643 };
644 
645 static const uint32_t pp_nv12_scaling_gen7[][4] = {
646 #include "shaders/post_processing/gen7/avs.g7b"
647 };
648 
649 static const uint32_t pp_nv12_avs_gen7[][4] = {
650 #include "shaders/post_processing/gen7/avs.g7b"
651 };
652 
653 static const uint32_t pp_nv12_dndi_gen7[][4] = {
654 #include "shaders/post_processing/gen7/dndi.g7b"
655 };
656 
657 static const uint32_t pp_nv12_dn_gen7[][4] = {
658 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
659 };
660 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
661 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
662 };
663 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
664 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
665 };
666 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
667 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
668 };
669 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
670 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
671 };
672 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
673 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
674 };
675 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
676 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
677 };
678 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
679 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
680 };
681 
682 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
683                                            const struct i965_surface *src_surface,
684                                            const VARectangle *src_rect,
685                                            struct i965_surface *dst_surface,
686                                            const VARectangle *dst_rect,
687                                            void *filter_param);
688 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
689                                              const struct i965_surface *src_surface,
690                                              const VARectangle *src_rect,
691                                              struct i965_surface *dst_surface,
692                                              const VARectangle *dst_rect,
693                                              void *filter_param);
694 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
695                                            const struct i965_surface *src_surface,
696                                            const VARectangle *src_rect,
697                                            struct i965_surface *dst_surface,
698                                            const VARectangle *dst_rect,
699                                            void *filter_param);
700 
701 static struct pp_module pp_modules_gen7[] = {
702     {
703         {
704             "NULL module (for testing)",
705             PP_NULL,
706             pp_null_gen7,
707             sizeof(pp_null_gen7),
708             NULL,
709         },
710 
711         pp_null_initialize,
712     },
713 
714     {
715         {
716             "NV12_NV12",
717             PP_NV12_LOAD_SAVE_N12,
718             pp_nv12_load_save_nv12_gen7,
719             sizeof(pp_nv12_load_save_nv12_gen7),
720             NULL,
721         },
722 
723         gen7_pp_plx_avs_initialize,
724     },
725 
726     {
727         {
728             "NV12_PL3",
729             PP_NV12_LOAD_SAVE_PL3,
730             pp_nv12_load_save_pl3_gen7,
731             sizeof(pp_nv12_load_save_pl3_gen7),
732             NULL,
733         },
734 
735         gen7_pp_plx_avs_initialize,
736     },
737 
738     {
739         {
740             "PL3_NV12",
741             PP_PL3_LOAD_SAVE_N12,
742             pp_pl3_load_save_nv12_gen7,
743             sizeof(pp_pl3_load_save_nv12_gen7),
744             NULL,
745         },
746 
747         gen7_pp_plx_avs_initialize,
748     },
749 
750     {
751         {
752             "PL3_PL3",
753             PP_PL3_LOAD_SAVE_PL3,
754             pp_pl3_load_save_pl3_gen7,
755             sizeof(pp_pl3_load_save_pl3_gen7),
756             NULL,
757         },
758 
759         gen7_pp_plx_avs_initialize,
760     },
761 
762     {
763         {
764             "NV12 Scaling module",
765             PP_NV12_SCALING,
766             pp_nv12_scaling_gen7,
767             sizeof(pp_nv12_scaling_gen7),
768             NULL,
769         },
770 
771         gen7_pp_plx_avs_initialize,
772     },
773 
774     {
775         {
776             "NV12 AVS module",
777             PP_NV12_AVS,
778             pp_nv12_avs_gen7,
779             sizeof(pp_nv12_avs_gen7),
780             NULL,
781         },
782 
783         gen7_pp_plx_avs_initialize,
784     },
785 
786     {
787         {
788             "NV12 DNDI module",
789             PP_NV12_DNDI,
790             pp_nv12_dndi_gen7,
791             sizeof(pp_nv12_dndi_gen7),
792             NULL,
793         },
794 
795         gen7_pp_nv12_dndi_initialize,
796     },
797 
798     {
799         {
800             "NV12 DN module",
801             PP_NV12_DN,
802             pp_nv12_dn_gen7,
803             sizeof(pp_nv12_dn_gen7),
804             NULL,
805         },
806 
807         gen7_pp_nv12_dn_initialize,
808     },
809     {
810         {
811             "NV12_PA module",
812             PP_NV12_LOAD_SAVE_PA,
813             pp_nv12_load_save_pa_gen7,
814             sizeof(pp_nv12_load_save_pa_gen7),
815             NULL,
816         },
817 
818         gen7_pp_plx_avs_initialize,
819     },
820 
821     {
822         {
823             "PL3_PA module",
824             PP_PL3_LOAD_SAVE_PA,
825             pp_pl3_load_save_pa_gen7,
826             sizeof(pp_pl3_load_save_pa_gen7),
827             NULL,
828         },
829 
830         gen7_pp_plx_avs_initialize,
831     },
832 
833     {
834         {
835             "PA_NV12 module",
836             PP_PA_LOAD_SAVE_NV12,
837             pp_pa_load_save_nv12_gen7,
838             sizeof(pp_pa_load_save_nv12_gen7),
839             NULL,
840         },
841 
842         gen7_pp_plx_avs_initialize,
843     },
844 
845     {
846         {
847             "PA_PL3 module",
848             PP_PA_LOAD_SAVE_PL3,
849             pp_pa_load_save_pl3_gen7,
850             sizeof(pp_pa_load_save_pl3_gen7),
851             NULL,
852         },
853 
854         gen7_pp_plx_avs_initialize,
855     },
856 
857     {
858         {
859             "PA_PA module",
860             PP_PA_LOAD_SAVE_PA,
861             pp_pa_load_save_pa_gen7,
862             sizeof(pp_pa_load_save_pa_gen7),
863             NULL,
864         },
865 
866         gen7_pp_plx_avs_initialize,
867     },
868 
869     {
870         {
871             "RGBX_NV12 module",
872             PP_RGBX_LOAD_SAVE_NV12,
873             pp_rgbx_load_save_nv12_gen7,
874             sizeof(pp_rgbx_load_save_nv12_gen7),
875             NULL,
876         },
877 
878         gen7_pp_plx_avs_initialize,
879     },
880 
881     {
882         {
883             "NV12_RGBX module",
884             PP_NV12_LOAD_SAVE_RGBX,
885             pp_nv12_load_save_rgbx_gen7,
886             sizeof(pp_nv12_load_save_rgbx_gen7),
887             NULL,
888         },
889 
890         gen7_pp_plx_avs_initialize,
891     },
892 
893 };
894 
895 static const uint32_t pp_null_gen75[][4] = {
896 };
897 
898 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
899 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
900 };
901 
902 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
903 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
904 };
905 
906 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
907 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
908 };
909 
910 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
911 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
912 };
913 
914 static const uint32_t pp_nv12_scaling_gen75[][4] = {
915 #include "shaders/post_processing/gen7/avs.g75b"
916 };
917 
918 static const uint32_t pp_nv12_avs_gen75[][4] = {
919 #include "shaders/post_processing/gen7/avs.g75b"
920 };
921 
922 static const uint32_t pp_nv12_dndi_gen75[][4] = {
923 // #include "shaders/post_processing/gen7/dndi.g75b"
924 };
925 
926 static const uint32_t pp_nv12_dn_gen75[][4] = {
927 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
928 };
929 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
931 };
932 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
933 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
934 };
935 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
936 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
937 };
938 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
939 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
940 };
941 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
942 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
943 };
944 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
945 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
946 };
947 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
948 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
949 };
950 
951 static struct pp_module pp_modules_gen75[] = {
952     {
953         {
954             "NULL module (for testing)",
955             PP_NULL,
956             pp_null_gen75,
957             sizeof(pp_null_gen75),
958             NULL,
959         },
960 
961         pp_null_initialize,
962     },
963 
964     {
965         {
966             "NV12_NV12",
967             PP_NV12_LOAD_SAVE_N12,
968             pp_nv12_load_save_nv12_gen75,
969             sizeof(pp_nv12_load_save_nv12_gen75),
970             NULL,
971         },
972 
973         gen7_pp_plx_avs_initialize,
974     },
975 
976     {
977         {
978             "NV12_PL3",
979             PP_NV12_LOAD_SAVE_PL3,
980             pp_nv12_load_save_pl3_gen75,
981             sizeof(pp_nv12_load_save_pl3_gen75),
982             NULL,
983         },
984 
985         gen7_pp_plx_avs_initialize,
986     },
987 
988     {
989         {
990             "PL3_NV12",
991             PP_PL3_LOAD_SAVE_N12,
992             pp_pl3_load_save_nv12_gen75,
993             sizeof(pp_pl3_load_save_nv12_gen75),
994             NULL,
995         },
996 
997         gen7_pp_plx_avs_initialize,
998     },
999 
1000     {
1001         {
1002             "PL3_PL3",
1003             PP_PL3_LOAD_SAVE_PL3,
1004             pp_pl3_load_save_pl3_gen75,
1005             sizeof(pp_pl3_load_save_pl3_gen75),
1006             NULL,
1007         },
1008 
1009         gen7_pp_plx_avs_initialize,
1010     },
1011 
1012     {
1013         {
1014             "NV12 Scaling module",
1015             PP_NV12_SCALING,
1016             pp_nv12_scaling_gen75,
1017             sizeof(pp_nv12_scaling_gen75),
1018             NULL,
1019         },
1020 
1021         gen7_pp_plx_avs_initialize,
1022     },
1023 
1024     {
1025         {
1026             "NV12 AVS module",
1027             PP_NV12_AVS,
1028             pp_nv12_avs_gen75,
1029             sizeof(pp_nv12_avs_gen75),
1030             NULL,
1031         },
1032 
1033         gen7_pp_plx_avs_initialize,
1034     },
1035 
1036     {
1037         {
1038             "NV12 DNDI module",
1039             PP_NV12_DNDI,
1040             pp_nv12_dndi_gen75,
1041             sizeof(pp_nv12_dndi_gen75),
1042             NULL,
1043         },
1044 
1045         gen7_pp_nv12_dn_initialize,
1046     },
1047 
1048     {
1049         {
1050             "NV12 DN module",
1051             PP_NV12_DN,
1052             pp_nv12_dn_gen75,
1053             sizeof(pp_nv12_dn_gen75),
1054             NULL,
1055         },
1056 
1057         gen7_pp_nv12_dn_initialize,
1058     },
1059 
1060     {
1061         {
1062             "NV12_PA module",
1063             PP_NV12_LOAD_SAVE_PA,
1064             pp_nv12_load_save_pa_gen75,
1065             sizeof(pp_nv12_load_save_pa_gen75),
1066             NULL,
1067         },
1068 
1069         gen7_pp_plx_avs_initialize,
1070     },
1071 
1072     {
1073         {
1074             "PL3_PA module",
1075             PP_PL3_LOAD_SAVE_PA,
1076             pp_pl3_load_save_pa_gen75,
1077             sizeof(pp_pl3_load_save_pa_gen75),
1078             NULL,
1079         },
1080 
1081         gen7_pp_plx_avs_initialize,
1082     },
1083 
1084     {
1085         {
1086             "PA_NV12 module",
1087             PP_PA_LOAD_SAVE_NV12,
1088             pp_pa_load_save_nv12_gen75,
1089             sizeof(pp_pa_load_save_nv12_gen75),
1090             NULL,
1091         },
1092 
1093         gen7_pp_plx_avs_initialize,
1094     },
1095 
1096     {
1097         {
1098             "PA_PL3 module",
1099             PP_PA_LOAD_SAVE_PL3,
1100             pp_pa_load_save_pl3_gen75,
1101             sizeof(pp_pa_load_save_pl3_gen75),
1102             NULL,
1103         },
1104 
1105         gen7_pp_plx_avs_initialize,
1106     },
1107 
1108     {
1109         {
1110             "PA_PA module",
1111             PP_PA_LOAD_SAVE_PA,
1112             pp_pa_load_save_pa_gen75,
1113             sizeof(pp_pa_load_save_pa_gen75),
1114             NULL,
1115         },
1116 
1117         gen7_pp_plx_avs_initialize,
1118     },
1119 
1120     {
1121         {
1122             "RGBX_NV12 module",
1123             PP_RGBX_LOAD_SAVE_NV12,
1124             pp_rgbx_load_save_nv12_gen75,
1125             sizeof(pp_rgbx_load_save_nv12_gen75),
1126             NULL,
1127         },
1128 
1129         gen7_pp_plx_avs_initialize,
1130     },
1131 
1132     {
1133         {
1134             "NV12_RGBX module",
1135             PP_NV12_LOAD_SAVE_RGBX,
1136             pp_nv12_load_save_rgbx_gen75,
1137             sizeof(pp_nv12_load_save_rgbx_gen75),
1138             NULL,
1139         },
1140 
1141         gen7_pp_plx_avs_initialize,
1142     },
1143 
1144 };
1145 
1146 static void
pp_dndi_frame_store_reset(DNDIFrameStore * fs)1147 pp_dndi_frame_store_reset(DNDIFrameStore *fs)
1148 {
1149     fs->obj_surface = NULL;
1150     fs->surface_id = VA_INVALID_ID;
1151     fs->is_scratch_surface = 0;
1152 }
1153 
1154 static inline void
pp_dndi_frame_store_swap(DNDIFrameStore * fs1,DNDIFrameStore * fs2)1155 pp_dndi_frame_store_swap(DNDIFrameStore *fs1, DNDIFrameStore *fs2)
1156 {
1157     const DNDIFrameStore tmpfs = *fs1;
1158     *fs1 = *fs2;
1159     *fs2 = tmpfs;
1160 }
1161 
1162 static inline void
pp_dndi_frame_store_clear(DNDIFrameStore * fs,VADriverContextP ctx)1163 pp_dndi_frame_store_clear(DNDIFrameStore *fs, VADriverContextP ctx)
1164 {
1165     if (fs->obj_surface && fs->is_scratch_surface) {
1166         VASurfaceID va_surface = fs->obj_surface->base.id;
1167         i965_DestroySurfaces(ctx, &va_surface, 1);
1168     }
1169     pp_dndi_frame_store_reset(fs);
1170 }
1171 
1172 static void
pp_dndi_context_init(struct pp_dndi_context * dndi_ctx)1173 pp_dndi_context_init(struct pp_dndi_context *dndi_ctx)
1174 {
1175     int i;
1176 
1177     memset(dndi_ctx, 0, sizeof(*dndi_ctx));
1178     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i++)
1179         pp_dndi_frame_store_reset(&dndi_ctx->frame_store[i]);
1180 }
1181 
1182 static VAStatus
pp_dndi_context_init_surface_params(struct pp_dndi_context * dndi_ctx,struct object_surface * obj_surface,const VAProcPipelineParameterBuffer * pipe_params,const VAProcFilterParameterBufferDeinterlacing * deint_params)1183 pp_dndi_context_init_surface_params(struct pp_dndi_context *dndi_ctx,
1184                                     struct object_surface *obj_surface,
1185                                     const VAProcPipelineParameterBuffer *pipe_params,
1186                                     const VAProcFilterParameterBufferDeinterlacing *deint_params)
1187 {
1188     DNDIFrameStore *fs;
1189 
1190     dndi_ctx->is_di_enabled = 1;
1191     dndi_ctx->is_di_adv_enabled = 0;
1192     dndi_ctx->is_first_frame = 0;
1193     dndi_ctx->is_second_field = 0;
1194 
1195     /* Check whether we are deinterlacing the second field */
1196     if (dndi_ctx->is_di_enabled) {
1197         const unsigned int tff =
1198             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD_FIRST);
1199         const unsigned int is_top_field =
1200             !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
1201 
1202         if ((tff ^ is_top_field) != 0) {
1203             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1204             if (fs->surface_id != obj_surface->base.id) {
1205                 WARN_ONCE("invalid surface provided for second field\n");
1206                 return VA_STATUS_ERROR_INVALID_PARAMETER;
1207             }
1208             dndi_ctx->is_second_field = 1;
1209         }
1210     }
1211 
1212     /* Check whether we are deinterlacing the first frame */
1213     if (dndi_ctx->is_di_enabled) {
1214         switch (deint_params->algorithm) {
1215         case VAProcDeinterlacingBob:
1216             dndi_ctx->is_first_frame = 1;
1217             break;
1218         case VAProcDeinterlacingMotionAdaptive:
1219         case VAProcDeinterlacingMotionCompensated:
1220             fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1221             if (fs->surface_id == VA_INVALID_ID)
1222                 dndi_ctx->is_first_frame = 1;
1223             else if (dndi_ctx->is_second_field) {
1224                 /* At this stage, we have already deinterlaced the
1225                    first field successfully. So, the first frame flag
1226                    is trigerred if the previous field was deinterlaced
1227                    without reference frame */
1228                 fs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1229                 if (fs->surface_id == VA_INVALID_ID)
1230                     dndi_ctx->is_first_frame = 1;
1231             } else {
1232                 if (pipe_params->num_forward_references < 1 ||
1233                     pipe_params->forward_references[0] == VA_INVALID_ID) {
1234                     WARN_ONCE("A forward temporal reference is needed for Motion adaptive/compensated deinterlacing !!!\n");
1235                     return VA_STATUS_ERROR_INVALID_PARAMETER;
1236                 }
1237             }
1238             dndi_ctx->is_di_adv_enabled = 1;
1239             break;
1240         default:
1241             WARN_ONCE("unsupported deinterlacing algorithm (%d)\n",
1242                       deint_params->algorithm);
1243             return VA_STATUS_ERROR_UNSUPPORTED_FILTER;
1244         }
1245     }
1246     return VA_STATUS_SUCCESS;
1247 }
1248 
1249 static VAStatus
pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,struct i965_post_processing_context * pp_context,struct object_surface * src_surface,struct object_surface * dst_surface)1250 pp_dndi_context_ensure_surfaces_storage(VADriverContextP ctx,
1251                                         struct i965_post_processing_context *pp_context,
1252                                         struct object_surface *src_surface, struct object_surface *dst_surface)
1253 {
1254     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1255     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1256     unsigned int src_fourcc, dst_fourcc;
1257     unsigned int src_sampling, dst_sampling;
1258     unsigned int src_tiling, dst_tiling;
1259     unsigned int i, swizzle;
1260     VAStatus status;
1261 
1262     /* Determine input surface info. Always use NV12 Y-tiled */
1263     if (src_surface->bo) {
1264         src_fourcc = src_surface->fourcc;
1265         src_sampling = src_surface->subsampling;
1266         dri_bo_get_tiling(src_surface->bo, &src_tiling, &swizzle);
1267         src_tiling = !!src_tiling;
1268     } else {
1269         src_fourcc = VA_FOURCC_NV12;
1270         src_sampling = SUBSAMPLE_YUV420;
1271         src_tiling = 1;
1272         status = i965_check_alloc_surface_bo(ctx, src_surface,
1273                                              src_tiling, src_fourcc, src_sampling);
1274         if (status != VA_STATUS_SUCCESS)
1275             return status;
1276     }
1277 
1278     /* Determine output surface info. Always use NV12 Y-tiled */
1279     if (dst_surface->bo) {
1280         dst_fourcc   = dst_surface->fourcc;
1281         dst_sampling = dst_surface->subsampling;
1282         dri_bo_get_tiling(dst_surface->bo, &dst_tiling, &swizzle);
1283         dst_tiling = !!dst_tiling;
1284     } else {
1285         dst_fourcc = VA_FOURCC_NV12;
1286         dst_sampling = SUBSAMPLE_YUV420;
1287         dst_tiling = 1;
1288         status = i965_check_alloc_surface_bo(ctx, dst_surface,
1289                                              dst_tiling, dst_fourcc, dst_sampling);
1290         if (status != VA_STATUS_SUCCESS)
1291             return status;
1292     }
1293 
1294     /* Create pipeline surfaces */
1295     for (i = 0; i < ARRAY_ELEMS(dndi_ctx->frame_store); i ++) {
1296         struct object_surface *obj_surface;
1297         VASurfaceID new_surface;
1298         unsigned int width, height;
1299 
1300         if (dndi_ctx->frame_store[i].obj_surface &&
1301             dndi_ctx->frame_store[i].obj_surface->bo)
1302             continue; // user allocated surface, not VPP internal
1303 
1304         if (dndi_ctx->frame_store[i].obj_surface) {
1305             obj_surface = dndi_ctx->frame_store[i].obj_surface;
1306             dndi_ctx->frame_store[i].is_scratch_surface = 0;
1307         } else {
1308             if (i <= DNDI_FRAME_IN_STMM) {
1309                 width = src_surface->orig_width;
1310                 height = src_surface->orig_height;
1311             } else {
1312                 width = dst_surface->orig_width;
1313                 height = dst_surface->orig_height;
1314             }
1315 
1316             status = i965_CreateSurfaces(ctx, width, height, VA_RT_FORMAT_YUV420,
1317                                          1, &new_surface);
1318             if (status != VA_STATUS_SUCCESS)
1319                 return status;
1320 
1321             obj_surface = SURFACE(new_surface);
1322             assert(obj_surface != NULL);
1323             dndi_ctx->frame_store[i].is_scratch_surface = 1;
1324         }
1325 
1326         if (i <= DNDI_FRAME_IN_PREVIOUS) {
1327             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1328                                                  src_tiling, src_fourcc, src_sampling);
1329         } else if (i == DNDI_FRAME_IN_STMM || i == DNDI_FRAME_OUT_STMM) {
1330             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1331                                                  1, VA_FOURCC_Y800, SUBSAMPLE_YUV400);
1332         } else if (i >= DNDI_FRAME_OUT_CURRENT) {
1333             status = i965_check_alloc_surface_bo(ctx, obj_surface,
1334                                                  dst_tiling, dst_fourcc, dst_sampling);
1335         }
1336         if (status != VA_STATUS_SUCCESS)
1337             return status;
1338 
1339         dndi_ctx->frame_store[i].obj_surface = obj_surface;
1340     }
1341     return VA_STATUS_SUCCESS;
1342 }
1343 
1344 static VAStatus
pp_dndi_context_ensure_surfaces(VADriverContextP ctx,struct i965_post_processing_context * pp_context,struct object_surface * src_surface,struct object_surface * dst_surface)1345 pp_dndi_context_ensure_surfaces(VADriverContextP ctx,
1346                                 struct i965_post_processing_context *pp_context,
1347                                 struct object_surface *src_surface, struct object_surface *dst_surface)
1348 {
1349     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1350     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
1351     DNDIFrameStore *ifs, *ofs;
1352     bool is_new_frame = false;
1353 
1354     /* Update the previous input surface */
1355     is_new_frame = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].surface_id !=
1356                    src_surface->base.id;
1357     if (is_new_frame) {
1358         ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS];
1359         ofs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1360         do {
1361             const VAProcPipelineParameterBuffer * const pipe_params =
1362                 pp_context->pipeline_param;
1363             struct object_surface *obj_surface;
1364 
1365             if (pipe_params->num_forward_references < 1)
1366                 break;
1367             if (pipe_params->forward_references[0] == VA_INVALID_ID)
1368                 break;
1369 
1370             obj_surface = SURFACE(pipe_params->forward_references[0]);
1371             if (!obj_surface || obj_surface->base.id == ifs->surface_id)
1372                 break;
1373 
1374             pp_dndi_frame_store_clear(ifs, ctx);
1375             if (obj_surface->base.id == ofs->surface_id) {
1376                 *ifs = *ofs;
1377                 pp_dndi_frame_store_reset(ofs);
1378             } else {
1379                 ifs->obj_surface = obj_surface;
1380                 ifs->surface_id = obj_surface->base.id;
1381             }
1382         } while (0);
1383     }
1384 
1385     /* Update the input surface */
1386     ifs = &dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT];
1387     pp_dndi_frame_store_clear(ifs, ctx);
1388     ifs->obj_surface = src_surface;
1389     ifs->surface_id = src_surface->base.id;
1390 
1391     /* Update the Spatial Temporal Motion Measure (STMM) surfaces */
1392     if (is_new_frame)
1393         pp_dndi_frame_store_swap(&dndi_ctx->frame_store[DNDI_FRAME_IN_STMM],
1394                                  &dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM]);
1395 
1396     /* Update the output surfaces */
1397     ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT];
1398     if (dndi_ctx->is_di_adv_enabled && !dndi_ctx->is_first_frame) {
1399         pp_dndi_frame_store_swap(ofs,
1400                                  &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS]);
1401         if (!dndi_ctx->is_second_field)
1402             ofs = &dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS];
1403     }
1404     pp_dndi_frame_store_clear(ofs, ctx);
1405     ofs->obj_surface = dst_surface;
1406     ofs->surface_id = dst_surface->base.id;
1407 
1408     return VA_STATUS_SUCCESS;
1409 }
1410 
1411 int
pp_get_surface_fourcc(VADriverContextP ctx,const struct i965_surface * surface)1412 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1413 {
1414     int fourcc;
1415 
1416     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1417         struct object_image *obj_image = (struct object_image *)surface->base;
1418         fourcc = obj_image->image.format.fourcc;
1419     } else {
1420         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1421         fourcc = obj_surface->fourcc;
1422     }
1423 
1424     return fourcc;
1425 }
1426 
1427 static void
pp_get_surface_size(VADriverContextP ctx,const struct i965_surface * surface,int * width,int * height)1428 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1429 {
1430     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1431         struct object_image *obj_image = (struct object_image *)surface->base;
1432 
1433         *width = obj_image->image.width;
1434         *height = obj_image->image.height;
1435     } else {
1436         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1437 
1438         *width = obj_surface->orig_width;
1439         *height = obj_surface->orig_height;
1440     }
1441 }
1442 
1443 static void
pp_set_surface_tiling(struct i965_surface_state * ss,unsigned int tiling)1444 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1445 {
1446     switch (tiling) {
1447     case I915_TILING_NONE:
1448         ss->ss3.tiled_surface = 0;
1449         ss->ss3.tile_walk = 0;
1450         break;
1451     case I915_TILING_X:
1452         ss->ss3.tiled_surface = 1;
1453         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1454         break;
1455     case I915_TILING_Y:
1456         ss->ss3.tiled_surface = 1;
1457         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1458         break;
1459     }
1460 }
1461 
1462 static void
pp_set_surface2_tiling(struct i965_surface_state2 * ss,unsigned int tiling)1463 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1464 {
1465     switch (tiling) {
1466     case I915_TILING_NONE:
1467         ss->ss2.tiled_surface = 0;
1468         ss->ss2.tile_walk = 0;
1469         break;
1470     case I915_TILING_X:
1471         ss->ss2.tiled_surface = 1;
1472         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1473         break;
1474     case I915_TILING_Y:
1475         ss->ss2.tiled_surface = 1;
1476         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1477         break;
1478     }
1479 }
1480 
1481 static void
gen7_pp_set_surface_tiling(struct gen7_surface_state * ss,unsigned int tiling)1482 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1483 {
1484     switch (tiling) {
1485     case I915_TILING_NONE:
1486         ss->ss0.tiled_surface = 0;
1487         ss->ss0.tile_walk = 0;
1488         break;
1489     case I915_TILING_X:
1490         ss->ss0.tiled_surface = 1;
1491         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1492         break;
1493     case I915_TILING_Y:
1494         ss->ss0.tiled_surface = 1;
1495         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1496         break;
1497     }
1498 }
1499 
1500 static void
gen7_pp_set_surface2_tiling(struct gen7_surface_state2 * ss,unsigned int tiling)1501 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1502 {
1503     switch (tiling) {
1504     case I915_TILING_NONE:
1505         ss->ss2.tiled_surface = 0;
1506         ss->ss2.tile_walk = 0;
1507         break;
1508     case I915_TILING_X:
1509         ss->ss2.tiled_surface = 1;
1510         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1511         break;
1512     case I915_TILING_Y:
1513         ss->ss2.tiled_surface = 1;
1514         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1515         break;
1516     }
1517 }
1518 
1519 static void
ironlake_pp_interface_descriptor_table(struct i965_post_processing_context * pp_context)1520 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1521 {
1522     struct i965_interface_descriptor *desc;
1523     dri_bo *bo;
1524     int pp_index = pp_context->current_pp;
1525 
1526     bo = pp_context->idrt.bo;
1527     dri_bo_map(bo, 1);
1528     assert(bo->virtual);
1529     desc = bo->virtual;
1530     memset(desc, 0, sizeof(*desc));
1531     desc->desc0.grf_reg_blocks = 10;
1532     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1533     desc->desc1.const_urb_entry_read_offset = 0;
1534     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1535     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1536     desc->desc2.sampler_count = 0;
1537     desc->desc3.binding_table_entry_count = 0;
1538     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1539 
1540     dri_bo_emit_reloc(bo,
1541                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1542                       desc->desc0.grf_reg_blocks,
1543                       offsetof(struct i965_interface_descriptor, desc0),
1544                       pp_context->pp_modules[pp_index].kernel.bo);
1545 
1546     dri_bo_emit_reloc(bo,
1547                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1548                       desc->desc2.sampler_count << 2,
1549                       offsetof(struct i965_interface_descriptor, desc2),
1550                       pp_context->sampler_state_table.bo);
1551 
1552     dri_bo_unmap(bo);
1553     pp_context->idrt.num_interface_descriptors++;
1554 }
1555 
1556 static void
ironlake_pp_vfe_state(struct i965_post_processing_context * pp_context)1557 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1558 {
1559     struct i965_vfe_state *vfe_state;
1560     dri_bo *bo;
1561 
1562     bo = pp_context->vfe_state.bo;
1563     dri_bo_map(bo, 1);
1564     assert(bo->virtual);
1565     vfe_state = bo->virtual;
1566     memset(vfe_state, 0, sizeof(*vfe_state));
1567     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1568     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1569     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1570     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1571     vfe_state->vfe1.children_present = 0;
1572     vfe_state->vfe2.interface_descriptor_base =
1573         pp_context->idrt.bo->offset >> 4; /* reloc */
1574     dri_bo_emit_reloc(bo,
1575                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1576                       0,
1577                       offsetof(struct i965_vfe_state, vfe2),
1578                       pp_context->idrt.bo);
1579     dri_bo_unmap(bo);
1580 }
1581 
1582 static void
ironlake_pp_upload_constants(struct i965_post_processing_context * pp_context)1583 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1584 {
1585     unsigned char *constant_buffer;
1586     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1587 
1588     assert(sizeof(*pp_static_parameter) == 128);
1589     dri_bo_map(pp_context->curbe.bo, 1);
1590     assert(pp_context->curbe.bo->virtual);
1591     constant_buffer = pp_context->curbe.bo->virtual;
1592     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1593     dri_bo_unmap(pp_context->curbe.bo);
1594 }
1595 
1596 static void
ironlake_pp_states_setup(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1597 ironlake_pp_states_setup(VADriverContextP ctx,
1598                          struct i965_post_processing_context *pp_context)
1599 {
1600     ironlake_pp_interface_descriptor_table(pp_context);
1601     ironlake_pp_vfe_state(pp_context);
1602     ironlake_pp_upload_constants(pp_context);
1603 }
1604 
1605 static void
ironlake_pp_pipeline_select(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1606 ironlake_pp_pipeline_select(VADriverContextP ctx,
1607                             struct i965_post_processing_context *pp_context)
1608 {
1609     struct intel_batchbuffer *batch = pp_context->batch;
1610 
1611     BEGIN_BATCH(batch, 1);
1612     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1613     ADVANCE_BATCH(batch);
1614 }
1615 
1616 static void
ironlake_pp_urb_layout(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1617 ironlake_pp_urb_layout(VADriverContextP ctx,
1618                        struct i965_post_processing_context *pp_context)
1619 {
1620     struct intel_batchbuffer *batch = pp_context->batch;
1621     unsigned int vfe_fence, cs_fence;
1622 
1623     vfe_fence = pp_context->urb.cs_start;
1624     cs_fence = pp_context->urb.size;
1625 
1626     BEGIN_BATCH(batch, 3);
1627     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1628     OUT_BATCH(batch, 0);
1629     OUT_BATCH(batch,
1630               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1631               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1632     ADVANCE_BATCH(batch);
1633 }
1634 
1635 static void
ironlake_pp_state_base_address(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1636 ironlake_pp_state_base_address(VADriverContextP ctx,
1637                                struct i965_post_processing_context *pp_context)
1638 {
1639     struct intel_batchbuffer *batch = pp_context->batch;
1640 
1641     BEGIN_BATCH(batch, 8);
1642     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1643     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1644     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1645     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1646     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1647     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1648     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1649     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1650     ADVANCE_BATCH(batch);
1651 }
1652 
1653 static void
ironlake_pp_state_pointers(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1654 ironlake_pp_state_pointers(VADriverContextP ctx,
1655                            struct i965_post_processing_context *pp_context)
1656 {
1657     struct intel_batchbuffer *batch = pp_context->batch;
1658 
1659     BEGIN_BATCH(batch, 3);
1660     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1661     OUT_BATCH(batch, 0);
1662     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1663     ADVANCE_BATCH(batch);
1664 }
1665 
1666 static void
ironlake_pp_cs_urb_layout(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1667 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1668                           struct i965_post_processing_context *pp_context)
1669 {
1670     struct intel_batchbuffer *batch = pp_context->batch;
1671 
1672     BEGIN_BATCH(batch, 2);
1673     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1674     OUT_BATCH(batch,
1675               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1676               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1677     ADVANCE_BATCH(batch);
1678 }
1679 
1680 static void
ironlake_pp_constant_buffer(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1681 ironlake_pp_constant_buffer(VADriverContextP ctx,
1682                             struct i965_post_processing_context *pp_context)
1683 {
1684     struct intel_batchbuffer *batch = pp_context->batch;
1685 
1686     BEGIN_BATCH(batch, 2);
1687     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1688     OUT_RELOC(batch, pp_context->curbe.bo,
1689               I915_GEM_DOMAIN_INSTRUCTION, 0,
1690               pp_context->urb.size_cs_entry - 1);
1691     ADVANCE_BATCH(batch);
1692 }
1693 
1694 static void
ironlake_pp_object_walker(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1695 ironlake_pp_object_walker(VADriverContextP ctx,
1696                           struct i965_post_processing_context *pp_context)
1697 {
1698     struct intel_batchbuffer *batch = pp_context->batch;
1699     int x, x_steps, y, y_steps;
1700     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1701 
1702     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1703     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1704 
1705     for (y = 0; y < y_steps; y++) {
1706         for (x = 0; x < x_steps; x++) {
1707             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1708                 BEGIN_BATCH(batch, 20);
1709                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1710                 OUT_BATCH(batch, 0);
1711                 OUT_BATCH(batch, 0); /* no indirect data */
1712                 OUT_BATCH(batch, 0);
1713 
1714                 /* inline data grf 5-6 */
1715                 assert(sizeof(*pp_inline_parameter) == 64);
1716                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1717 
1718                 ADVANCE_BATCH(batch);
1719             }
1720         }
1721     }
1722 }
1723 
1724 static void
ironlake_pp_pipeline_setup(VADriverContextP ctx,struct i965_post_processing_context * pp_context)1725 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1726                            struct i965_post_processing_context *pp_context)
1727 {
1728     struct intel_batchbuffer *batch = pp_context->batch;
1729 
1730     intel_batchbuffer_start_atomic(batch, 0x1000);
1731     intel_batchbuffer_emit_mi_flush(batch);
1732     ironlake_pp_pipeline_select(ctx, pp_context);
1733     ironlake_pp_state_base_address(ctx, pp_context);
1734     ironlake_pp_state_pointers(ctx, pp_context);
1735     ironlake_pp_urb_layout(ctx, pp_context);
1736     ironlake_pp_cs_urb_layout(ctx, pp_context);
1737     ironlake_pp_constant_buffer(ctx, pp_context);
1738     ironlake_pp_object_walker(ctx, pp_context);
1739     intel_batchbuffer_end_atomic(batch);
1740 }
1741 
1742 // update u/v offset when the surface format are packed yuv
i965_update_src_surface_static_parameter(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * surface)1743 static void i965_update_src_surface_static_parameter(
1744     VADriverContextP    ctx,
1745     struct i965_post_processing_context *pp_context,
1746     const struct i965_surface *surface)
1747 {
1748     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1749     int fourcc = pp_get_surface_fourcc(ctx, surface);
1750 
1751     switch (fourcc) {
1752     case VA_FOURCC_YUY2:
1753         pp_static_parameter->grf1.source_packed_u_offset = 1;
1754         pp_static_parameter->grf1.source_packed_v_offset = 3;
1755         break;
1756     case VA_FOURCC_UYVY:
1757         pp_static_parameter->grf1.source_packed_y_offset = 1;
1758         pp_static_parameter->grf1.source_packed_v_offset = 2;
1759         break;
1760     case VA_FOURCC_BGRX:
1761     case VA_FOURCC_BGRA:
1762         pp_static_parameter->grf1.source_rgb_layout = 0;
1763         break;
1764     case VA_FOURCC_RGBX:
1765     case VA_FOURCC_RGBA:
1766         pp_static_parameter->grf1.source_rgb_layout = 1;
1767         break;
1768     default:
1769         break;
1770     }
1771 
1772 }
1773 
i965_update_dst_surface_static_parameter(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * surface)1774 static void i965_update_dst_surface_static_parameter(
1775     VADriverContextP    ctx,
1776     struct i965_post_processing_context *pp_context,
1777     const struct i965_surface *surface)
1778 {
1779     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1780     int fourcc = pp_get_surface_fourcc(ctx, surface);
1781 
1782     switch (fourcc) {
1783     case VA_FOURCC_YUY2:
1784         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1785         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1786         break;
1787     case VA_FOURCC_UYVY:
1788         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1789         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1790         break;
1791     case VA_FOURCC_BGRX:
1792     case VA_FOURCC_BGRA:
1793         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1794         break;
1795     case VA_FOURCC_RGBX:
1796     case VA_FOURCC_RGBA:
1797         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1798         break;
1799     default:
1800         break;
1801     }
1802 
1803 }
1804 
1805 static void
i965_pp_set_surface_state(VADriverContextP ctx,struct i965_post_processing_context * pp_context,dri_bo * surf_bo,unsigned long surf_bo_offset,int width,int height,int pitch,int format,int index,int is_target)1806 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1807                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1808                           int width, int height, int pitch, int format,
1809                           int index, int is_target)
1810 {
1811     struct i965_surface_state *ss;
1812     dri_bo *ss_bo;
1813     unsigned int tiling;
1814     unsigned int swizzle;
1815 
1816     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1817     ss_bo = pp_context->surface_state_binding_table.bo;
1818     assert(ss_bo);
1819 
1820     dri_bo_map(ss_bo, True);
1821     assert(ss_bo->virtual);
1822     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1823     memset(ss, 0, sizeof(*ss));
1824     ss->ss0.surface_type = I965_SURFACE_2D;
1825     ss->ss0.surface_format = format;
1826     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1827     ss->ss2.width = width - 1;
1828     ss->ss2.height = height - 1;
1829     ss->ss3.pitch = pitch - 1;
1830     pp_set_surface_tiling(ss, tiling);
1831     dri_bo_emit_reloc(ss_bo,
1832                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1833                       surf_bo_offset,
1834                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1835                       surf_bo);
1836     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1837     dri_bo_unmap(ss_bo);
1838 }
1839 
1840 static void
i965_pp_set_surface2_state(VADriverContextP ctx,struct i965_post_processing_context * pp_context,dri_bo * surf_bo,unsigned long surf_bo_offset,int width,int height,int wpitch,int xoffset,int yoffset,int format,int interleave_chroma,int index)1841 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1842                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1843                            int width, int height, int wpitch,
1844                            int xoffset, int yoffset,
1845                            int format, int interleave_chroma,
1846                            int index)
1847 {
1848     struct i965_surface_state2 *ss2;
1849     dri_bo *ss2_bo;
1850     unsigned int tiling;
1851     unsigned int swizzle;
1852 
1853     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1854     ss2_bo = pp_context->surface_state_binding_table.bo;
1855     assert(ss2_bo);
1856 
1857     dri_bo_map(ss2_bo, True);
1858     assert(ss2_bo->virtual);
1859     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1860     memset(ss2, 0, sizeof(*ss2));
1861     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1862     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1863     ss2->ss1.width = width - 1;
1864     ss2->ss1.height = height - 1;
1865     ss2->ss2.pitch = wpitch - 1;
1866     ss2->ss2.interleave_chroma = interleave_chroma;
1867     ss2->ss2.surface_format = format;
1868     ss2->ss3.x_offset_for_cb = xoffset;
1869     ss2->ss3.y_offset_for_cb = yoffset;
1870     pp_set_surface2_tiling(ss2, tiling);
1871     dri_bo_emit_reloc(ss2_bo,
1872                       I915_GEM_DOMAIN_RENDER, 0,
1873                       surf_bo_offset,
1874                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1875                       surf_bo);
1876     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1877     dri_bo_unmap(ss2_bo);
1878 }
1879 
1880 static void
gen7_pp_set_surface_state(VADriverContextP ctx,struct i965_post_processing_context * pp_context,dri_bo * surf_bo,unsigned long surf_bo_offset,int width,int height,int pitch,int format,int index,int is_target)1881 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1882                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1883                           int width, int height, int pitch, int format,
1884                           int index, int is_target)
1885 {
1886     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1887     struct gen7_surface_state *ss;
1888     dri_bo *ss_bo;
1889     unsigned int tiling;
1890     unsigned int swizzle;
1891 
1892     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1893     ss_bo = pp_context->surface_state_binding_table.bo;
1894     assert(ss_bo);
1895 
1896     dri_bo_map(ss_bo, True);
1897     assert(ss_bo->virtual);
1898     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1899     memset(ss, 0, sizeof(*ss));
1900     ss->ss0.surface_type = I965_SURFACE_2D;
1901     ss->ss0.surface_format = format;
1902     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1903     ss->ss2.width = width - 1;
1904     ss->ss2.height = height - 1;
1905     ss->ss3.pitch = pitch - 1;
1906     gen7_pp_set_surface_tiling(ss, tiling);
1907     if (IS_HASWELL(i965->intel.device_info))
1908         gen7_render_set_surface_scs(ss);
1909     dri_bo_emit_reloc(ss_bo,
1910                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1911                       surf_bo_offset,
1912                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1913                       surf_bo);
1914     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1915     dri_bo_unmap(ss_bo);
1916 }
1917 
1918 static void
gen7_pp_set_surface2_state(VADriverContextP ctx,struct i965_post_processing_context * pp_context,dri_bo * surf_bo,unsigned long surf_bo_offset,int width,int height,int wpitch,int xoffset,int yoffset,int format,int interleave_chroma,int index)1919 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1920                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1921                            int width, int height, int wpitch,
1922                            int xoffset, int yoffset,
1923                            int format, int interleave_chroma,
1924                            int index)
1925 {
1926     struct gen7_surface_state2 *ss2;
1927     dri_bo *ss2_bo;
1928     unsigned int tiling;
1929     unsigned int swizzle;
1930 
1931     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1932     ss2_bo = pp_context->surface_state_binding_table.bo;
1933     assert(ss2_bo);
1934 
1935     dri_bo_map(ss2_bo, True);
1936     assert(ss2_bo->virtual);
1937     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1938     memset(ss2, 0, sizeof(*ss2));
1939     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1940     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1941     ss2->ss1.width = width - 1;
1942     ss2->ss1.height = height - 1;
1943     ss2->ss2.pitch = wpitch - 1;
1944     ss2->ss2.interleave_chroma = interleave_chroma;
1945     ss2->ss2.surface_format = format;
1946     ss2->ss3.x_offset_for_cb = xoffset;
1947     ss2->ss3.y_offset_for_cb = yoffset;
1948     gen7_pp_set_surface2_tiling(ss2, tiling);
1949     dri_bo_emit_reloc(ss2_bo,
1950                       I915_GEM_DOMAIN_RENDER, 0,
1951                       surf_bo_offset,
1952                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1953                       surf_bo);
1954     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1955     dri_bo_unmap(ss2_bo);
1956 }
1957 
1958 static void
pp_set_media_rw_message_surface(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * surface,int base_index,int is_target,int * width,int * height,int * pitch,int * offset)1959 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1960                                 const struct i965_surface *surface,
1961                                 int base_index, int is_target,
1962                                 int *width, int *height, int *pitch, int *offset)
1963 {
1964     struct object_surface *obj_surface;
1965     struct object_image *obj_image;
1966     dri_bo *bo;
1967     int fourcc = pp_get_surface_fourcc(ctx, surface);
1968     const int Y = 0;
1969     const int U = ((fourcc == VA_FOURCC_YV12) ||
1970                    (fourcc == VA_FOURCC_YV16))
1971                   ? 2 : 1;
1972     const int V = ((fourcc == VA_FOURCC_YV12) ||
1973                    (fourcc == VA_FOURCC_YV16))
1974                   ? 1 : 2;
1975     const int UV = 1;
1976     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1977     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1978     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1979                               fourcc == VA_FOURCC_RGBX ||
1980                               fourcc == VA_FOURCC_BGRA ||
1981                               fourcc == VA_FOURCC_BGRX);
1982     int scale_factor_of_1st_plane_width_in_byte = 1;
1983 
1984     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1985         obj_surface = (struct object_surface *)surface->base;
1986         bo = obj_surface->bo;
1987         width[0] = obj_surface->orig_width;
1988         height[0] = obj_surface->orig_height;
1989         pitch[0] = obj_surface->width;
1990         offset[0] = 0;
1991 
1992         if (full_packed_format) {
1993             scale_factor_of_1st_plane_width_in_byte = 4;
1994         } else if (packed_yuv) {
1995             scale_factor_of_1st_plane_width_in_byte =  2;
1996         } else if (interleaved_uv) {
1997             width[1] = obj_surface->orig_width;
1998             height[1] = obj_surface->orig_height / 2;
1999             pitch[1] = obj_surface->width;
2000             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2001         } else {
2002             width[1] = obj_surface->orig_width / 2;
2003             height[1] = obj_surface->orig_height / 2;
2004             pitch[1] = obj_surface->width / 2;
2005             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2006             width[2] = obj_surface->orig_width / 2;
2007             height[2] = obj_surface->orig_height / 2;
2008             pitch[2] = obj_surface->width / 2;
2009             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2010         }
2011     } else {
2012         obj_image = (struct object_image *)surface->base;
2013         bo = obj_image->bo;
2014         width[0] = obj_image->image.width;
2015         height[0] = obj_image->image.height;
2016         pitch[0] = obj_image->image.pitches[0];
2017         offset[0] = obj_image->image.offsets[0];
2018 
2019         if (full_packed_format) {
2020             scale_factor_of_1st_plane_width_in_byte = 4;
2021         } else if (packed_yuv) {
2022             scale_factor_of_1st_plane_width_in_byte = 2;
2023         } else if (interleaved_uv) {
2024             width[1] = obj_image->image.width;
2025             height[1] = obj_image->image.height / 2;
2026             pitch[1] = obj_image->image.pitches[1];
2027             offset[1] = obj_image->image.offsets[1];
2028         } else {
2029             width[1] = obj_image->image.width / 2;
2030             height[1] = obj_image->image.height / 2;
2031             pitch[1] = obj_image->image.pitches[1];
2032             offset[1] = obj_image->image.offsets[1];
2033             width[2] = obj_image->image.width / 2;
2034             height[2] = obj_image->image.height / 2;
2035             pitch[2] = obj_image->image.pitches[2];
2036             offset[2] = obj_image->image.offsets[2];
2037             if (fourcc == VA_FOURCC_YV16) {
2038                 width[1] = obj_image->image.width / 2;
2039                 height[1] = obj_image->image.height;
2040                 width[2] = obj_image->image.width / 2;
2041                 height[2] = obj_image->image.height;
2042             }
2043         }
2044     }
2045 
2046     /* Y surface */
2047     i965_pp_set_surface_state(ctx, pp_context,
2048                               bo, offset[Y],
2049                               ALIGN(width[Y] *scale_factor_of_1st_plane_width_in_byte, 4) / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2050                               base_index, is_target);
2051 
2052     if (!packed_yuv && !full_packed_format) {
2053         if (interleaved_uv) {
2054             i965_pp_set_surface_state(ctx, pp_context,
2055                                       bo, offset[UV],
2056                                       ALIGN(width[UV], 4) / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2057                                       base_index + 1, is_target);
2058         } else {
2059             /* U surface */
2060             i965_pp_set_surface_state(ctx, pp_context,
2061                                       bo, offset[U],
2062                                       ALIGN(width[U], 4) / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2063                                       base_index + 1, is_target);
2064 
2065             /* V surface */
2066             i965_pp_set_surface_state(ctx, pp_context,
2067                                       bo, offset[V],
2068                                       ALIGN(width[V], 4) / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2069                                       base_index + 2, is_target);
2070         }
2071     }
2072 
2073 }
2074 
2075 static void
gen7_pp_set_media_rw_message_surface(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * surface,int base_index,int is_target,const VARectangle * rect,int * width,int * height,int * pitch,int * offset)2076 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2077                                      const struct i965_surface *surface,
2078                                      int base_index, int is_target,
2079                                      const VARectangle *rect,
2080                                      int *width, int *height, int *pitch, int *offset)
2081 {
2082     struct object_surface *obj_surface;
2083     struct object_image *obj_image;
2084     dri_bo *bo;
2085     int fourcc = pp_get_surface_fourcc(ctx, surface);
2086     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
2087 
2088     if (fourcc_info == NULL)
2089         return;
2090 
2091     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2092         obj_surface = (struct object_surface *)surface->base;
2093         bo = obj_surface->bo;
2094         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
2095         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
2096         pitch[0] = obj_surface->width;
2097         offset[0] = 0;
2098 
2099         if (fourcc_info->num_planes == 1 && is_target)
2100             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2101 
2102         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2103         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2104         pitch[1] = obj_surface->cb_cr_pitch;
2105         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2106 
2107         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
2108         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
2109         pitch[2] = obj_surface->cb_cr_pitch;
2110         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2111     } else {
2112         int U = 0, V = 0;
2113 
2114         /* FIXME: add support for ARGB/ABGR image */
2115         obj_image = (struct object_image *)surface->base;
2116         bo = obj_image->bo;
2117         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
2118         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
2119         pitch[0] = obj_image->image.pitches[0];
2120         offset[0] = obj_image->image.offsets[0];
2121 
2122         if (fourcc_info->num_planes == 1) {
2123             if (is_target)
2124                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
2125         } else if (fourcc_info->num_planes == 2) {
2126             U = 1, V = 1;
2127         } else {
2128             assert(fourcc_info->num_components == 3);
2129 
2130             U = fourcc_info->components[1].plane;
2131             V = fourcc_info->components[2].plane;
2132             assert((U == 1 && V == 2) ||
2133                    (U == 2 && V == 1));
2134         }
2135 
2136         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
2137         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2138         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2139         pitch[1] = obj_image->image.pitches[U];
2140         offset[1] = obj_image->image.offsets[U];
2141 
2142         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
2143         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
2144         pitch[2] = obj_image->image.pitches[V];
2145         offset[2] = obj_image->image.offsets[V];
2146     }
2147 
2148     if (is_target) {
2149         gen7_pp_set_surface_state(ctx, pp_context,
2150                                   bo, 0,
2151                                   ALIGN(width[0], 4) / 4, height[0], pitch[0],
2152                                   I965_SURFACEFORMAT_R8_UINT,
2153                                   base_index, 1);
2154 
2155         if (fourcc_info->num_planes == 2) {
2156             gen7_pp_set_surface_state(ctx, pp_context,
2157                                       bo, offset[1],
2158                                       ALIGN(width[1], 2) / 2, height[1], pitch[1],
2159                                       I965_SURFACEFORMAT_R8G8_SINT,
2160                                       base_index + 1, 1);
2161         } else if (fourcc_info->num_planes == 3) {
2162             gen7_pp_set_surface_state(ctx, pp_context,
2163                                       bo, offset[1],
2164                                       ALIGN(width[1], 4) / 4, height[1], pitch[1],
2165                                       I965_SURFACEFORMAT_R8_SINT,
2166                                       base_index + 1, 1);
2167             gen7_pp_set_surface_state(ctx, pp_context,
2168                                       bo, offset[2],
2169                                       ALIGN(width[2], 4) / 4, height[2], pitch[2],
2170                                       I965_SURFACEFORMAT_R8_SINT,
2171                                       base_index + 2, 1);
2172         }
2173 
2174         if (fourcc_info->format == I965_COLOR_RGB) {
2175             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2176             /* the format is MSB: X-B-G-R */
2177             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2178             if ((fourcc == VA_FOURCC_BGRA) ||
2179                 (fourcc == VA_FOURCC_BGRX)) {
2180                 /* It is stored as MSB: X-R-G-B */
2181                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2182             }
2183         }
2184     } else {
2185         int format0 = SURFACE_FORMAT_Y8_UNORM;
2186 
2187         switch (fourcc) {
2188         case VA_FOURCC_YUY2:
2189             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2190             break;
2191 
2192         case VA_FOURCC_UYVY:
2193             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2194             break;
2195 
2196         default:
2197             break;
2198         }
2199 
2200         if (fourcc_info->format == I965_COLOR_RGB) {
2201             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2202             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2203             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2204             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2205             if ((fourcc == VA_FOURCC_BGRA) ||
2206                 (fourcc == VA_FOURCC_BGRX)) {
2207                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2208             }
2209         }
2210 
2211         gen7_pp_set_surface2_state(ctx, pp_context,
2212                                    bo, offset[0],
2213                                    width[0], height[0], pitch[0],
2214                                    0, 0,
2215                                    format0, 0,
2216                                    base_index);
2217 
2218         if (fourcc_info->num_planes == 2) {
2219             gen7_pp_set_surface2_state(ctx, pp_context,
2220                                        bo, offset[1],
2221                                        width[1], height[1], pitch[1],
2222                                        0, 0,
2223                                        SURFACE_FORMAT_R8B8_UNORM, 0,
2224                                        base_index + 1);
2225         } else if (fourcc_info->num_planes == 3) {
2226             gen7_pp_set_surface2_state(ctx, pp_context,
2227                                        bo, offset[1],
2228                                        width[1], height[1], pitch[1],
2229                                        0, 0,
2230                                        SURFACE_FORMAT_R8_UNORM, 0,
2231                                        base_index + 1);
2232             gen7_pp_set_surface2_state(ctx, pp_context,
2233                                        bo, offset[2],
2234                                        width[2], height[2], pitch[2],
2235                                        0, 0,
2236                                        SURFACE_FORMAT_R8_UNORM, 0,
2237                                        base_index + 2);
2238         }
2239     }
2240 }
2241 
2242 static int
pp_null_x_steps(void * private_context)2243 pp_null_x_steps(void *private_context)
2244 {
2245     return 1;
2246 }
2247 
2248 static int
pp_null_y_steps(void * private_context)2249 pp_null_y_steps(void *private_context)
2250 {
2251     return 1;
2252 }
2253 
2254 static int
pp_null_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)2255 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2256 {
2257     return 0;
2258 }
2259 
2260 static VAStatus
pp_null_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)2261 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2262                    const struct i965_surface *src_surface,
2263                    const VARectangle *src_rect,
2264                    struct i965_surface *dst_surface,
2265                    const VARectangle *dst_rect,
2266                    void *filter_param)
2267 {
2268     /* private function & data */
2269     pp_context->pp_x_steps = pp_null_x_steps;
2270     pp_context->pp_y_steps = pp_null_y_steps;
2271     pp_context->private_context = NULL;
2272     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2273 
2274     dst_surface->flags = src_surface->flags;
2275 
2276     return VA_STATUS_SUCCESS;
2277 }
2278 
2279 static int
pp_load_save_x_steps(void * private_context)2280 pp_load_save_x_steps(void *private_context)
2281 {
2282     return 1;
2283 }
2284 
2285 static int
pp_load_save_y_steps(void * private_context)2286 pp_load_save_y_steps(void *private_context)
2287 {
2288     struct pp_load_save_context *pp_load_save_context = private_context;
2289 
2290     return pp_load_save_context->dest_h / 8;
2291 }
2292 
2293 static int
pp_load_save_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)2294 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2295 {
2296     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2297     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2298 
2299     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2300     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2301 
2302     return 0;
2303 }
2304 
calculate_boundary_block_mask(struct i965_post_processing_context * pp_context,const VARectangle * dst_rect)2305 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2306 {
2307     int i;
2308     /* x offset of dest surface must be dword aligned.
2309      * so we have to extend dst surface on left edge, and mask out pixels not interested
2310      */
2311     if (dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT) {
2312         pp_context->block_horizontal_mask_left = 0;
2313         for (i = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT; i < GPU_ASM_BLOCK_WIDTH; i++) {
2314             pp_context->block_horizontal_mask_left |= 1 << i;
2315         }
2316     } else {
2317         pp_context->block_horizontal_mask_left = 0xffff;
2318     }
2319 
2320     int dst_width_adjust = dst_rect->width + dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2321     if (dst_width_adjust % GPU_ASM_BLOCK_WIDTH) {
2322         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust % GPU_ASM_BLOCK_WIDTH)) - 1;
2323     } else {
2324         pp_context->block_horizontal_mask_right = 0xffff;
2325     }
2326 
2327     if (dst_rect->height % GPU_ASM_BLOCK_HEIGHT) {
2328         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height % GPU_ASM_BLOCK_HEIGHT)) - 1;
2329     } else {
2330         pp_context->block_vertical_mask_bottom = 0xff;
2331     }
2332 
2333 }
2334 static VAStatus
pp_plx_load_save_plx_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)2335 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2336                                 const struct i965_surface *src_surface,
2337                                 const VARectangle *src_rect,
2338                                 struct i965_surface *dst_surface,
2339                                 const VARectangle *dst_rect,
2340                                 void *filter_param)
2341 {
2342     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2343     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2344     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2345     int width[3], height[3], pitch[3], offset[3];
2346 
2347     /* source surface */
2348     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2349                                     width, height, pitch, offset);
2350 
2351     /* destination surface */
2352     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2353                                     width, height, pitch, offset);
2354 
2355     /* private function & data */
2356     pp_context->pp_x_steps = pp_load_save_x_steps;
2357     pp_context->pp_y_steps = pp_load_save_y_steps;
2358     pp_context->private_context = &pp_context->pp_load_save_context;
2359     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2360 
2361     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;;
2362     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2363     pp_load_save_context->dest_y = dst_rect->y;
2364     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2365     pp_load_save_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2366 
2367     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2368     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2369 
2370     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2371     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2372 
2373     // update u/v offset for packed yuv
2374     i965_update_src_surface_static_parameter(ctx, pp_context, src_surface);
2375     i965_update_dst_surface_static_parameter(ctx, pp_context, dst_surface);
2376 
2377     dst_surface->flags = src_surface->flags;
2378 
2379     return VA_STATUS_SUCCESS;
2380 }
2381 
2382 static int
pp_scaling_x_steps(void * private_context)2383 pp_scaling_x_steps(void *private_context)
2384 {
2385     return 1;
2386 }
2387 
2388 static int
pp_scaling_y_steps(void * private_context)2389 pp_scaling_y_steps(void *private_context)
2390 {
2391     struct pp_scaling_context *pp_scaling_context = private_context;
2392 
2393     return pp_scaling_context->dest_h / 8;
2394 }
2395 
2396 static int
pp_scaling_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)2397 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2398 {
2399     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2400     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2401     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2402     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2403     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2404 
2405     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2406     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2407     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2408     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2409 
2410     return 0;
2411 }
2412 
2413 static VAStatus
pp_nv12_scaling_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)2414 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2415                            const struct i965_surface *src_surface,
2416                            const VARectangle *src_rect,
2417                            struct i965_surface *dst_surface,
2418                            const VARectangle *dst_rect,
2419                            void *filter_param)
2420 {
2421     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2422     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2423     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2424     struct object_surface *obj_surface;
2425     struct i965_sampler_state *sampler_state;
2426     int in_w, in_h, in_wpitch, in_hpitch;
2427     int out_w, out_h, out_wpitch, out_hpitch;
2428 
2429     /* source surface */
2430     obj_surface = (struct object_surface *)src_surface->base;
2431     in_w = obj_surface->orig_width;
2432     in_h = obj_surface->orig_height;
2433     in_wpitch = obj_surface->width;
2434     in_hpitch = obj_surface->height;
2435 
2436     /* source Y surface index 1 */
2437     i965_pp_set_surface_state(ctx, pp_context,
2438                               obj_surface->bo, 0,
2439                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2440                               1, 0);
2441 
2442     /* source UV surface index 2 */
2443     i965_pp_set_surface_state(ctx, pp_context,
2444                               obj_surface->bo, in_wpitch * in_hpitch,
2445                               ALIGN(in_w, 2) / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2446                               2, 0);
2447 
2448     /* destination surface */
2449     obj_surface = (struct object_surface *)dst_surface->base;
2450     out_w = obj_surface->orig_width;
2451     out_h = obj_surface->orig_height;
2452     out_wpitch = obj_surface->width;
2453     out_hpitch = obj_surface->height;
2454 
2455     /* destination Y surface index 7 */
2456     i965_pp_set_surface_state(ctx, pp_context,
2457                               obj_surface->bo, 0,
2458                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2459                               7, 1);
2460 
2461     /* destination UV surface index 8 */
2462     i965_pp_set_surface_state(ctx, pp_context,
2463                               obj_surface->bo, out_wpitch * out_hpitch,
2464                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2465                               8, 1);
2466 
2467     /* sampler state */
2468     dri_bo_map(pp_context->sampler_state_table.bo, True);
2469     assert(pp_context->sampler_state_table.bo->virtual);
2470     sampler_state = pp_context->sampler_state_table.bo->virtual;
2471 
2472     /* SIMD16 Y index 1 */
2473     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2474     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2475     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2476     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2477     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2478 
2479     /* SIMD16 UV index 2 */
2480     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2481     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2482     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2483     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2484     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2485 
2486     dri_bo_unmap(pp_context->sampler_state_table.bo);
2487 
2488     /* private function & data */
2489     pp_context->pp_x_steps = pp_scaling_x_steps;
2490     pp_context->pp_y_steps = pp_scaling_y_steps;
2491     pp_context->private_context = &pp_context->pp_scaling_context;
2492     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2493 
2494     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2495     float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
2496     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2497     pp_scaling_context->dest_y = dst_rect->y;
2498     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2499     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2500     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
2501     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2502 
2503     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2504 
2505     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
2506     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2507     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2508 
2509     dst_surface->flags = src_surface->flags;
2510 
2511     return VA_STATUS_SUCCESS;
2512 }
2513 
2514 static int
pp_avs_x_steps(void * private_context)2515 pp_avs_x_steps(void *private_context)
2516 {
2517     struct pp_avs_context *pp_avs_context = private_context;
2518 
2519     return pp_avs_context->dest_w / 16;
2520 }
2521 
2522 static int
pp_avs_y_steps(void * private_context)2523 pp_avs_y_steps(void *private_context)
2524 {
2525     return 1;
2526 }
2527 
2528 static int
pp_avs_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)2529 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2530 {
2531     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2532     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2533     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2534     float src_x_steping, src_y_steping, video_step_delta;
2535     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2536 
2537     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2538         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2539         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2540     } else if (tmp_w >= pp_avs_context->dest_w) {
2541         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2542         pp_inline_parameter->grf6.video_step_delta = 0;
2543 
2544         if (x == 0) {
2545             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2546                                                                                                pp_avs_context->src_normalized_x;
2547         } else {
2548             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2549             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2550             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2551                                                                                                 16 * 15 * video_step_delta / 2;
2552         }
2553     } else {
2554         int n0, n1, n2, nls_left, nls_right;
2555         int factor_a = 5, factor_b = 4;
2556         float f;
2557 
2558         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2559         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2560         n2 = tmp_w / (16 * factor_a);
2561         nls_left = n0 + n2;
2562         nls_right = n1 + n2;
2563         f = (float) n2 * 16 / tmp_w;
2564 
2565         if (n0 < 5) {
2566             pp_inline_parameter->grf6.video_step_delta = 0.0;
2567 
2568             if (x == 0) {
2569                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2570                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2571             } else {
2572                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2573                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2574                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2575                                                                                                     16 * 15 * video_step_delta / 2;
2576             }
2577         } else {
2578             if (x < nls_left) {
2579                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2580                 float a = f / (nls_left * 16 * factor_b);
2581                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2582 
2583                 pp_inline_parameter->grf6.video_step_delta = b;
2584 
2585                 if (x == 0) {
2586                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2587                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2588                 } else {
2589                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2590                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2591                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2592                                                                                                         16 * 15 * video_step_delta / 2;
2593                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2594                 }
2595             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2596                 /* scale the center linearly */
2597                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2598                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2599                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2600                                                                                                     16 * 15 * video_step_delta / 2;
2601                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2602                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2603             } else {
2604                 float a = f / (nls_right * 16 * factor_b);
2605                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2606 
2607                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2608                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2609                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2610                                                                                                     16 * 15 * video_step_delta / 2;
2611                 pp_inline_parameter->grf6.video_step_delta = -b;
2612 
2613                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2614                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2615                 else
2616                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2617             }
2618         }
2619     }
2620 
2621     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2622     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2623     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2624     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2625 
2626     return 0;
2627 }
2628 
2629 static const AVSConfig gen5_avs_config = {
2630     .coeff_frac_bits = 6,
2631     .coeff_epsilon = 1.0f / (1U << 6),
2632     .num_phases = 16,
2633     .num_luma_coeffs = 8,
2634     .num_chroma_coeffs = 4,
2635 
2636     .coeff_range = {
2637         .lower_bound = {
2638             .y_k_h = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2639             .y_k_v = { -0.25f, -0.5f, -1, 0, 0, -1, -0.5f, -0.25f },
2640             .uv_k_h = { -1, 0, 0, -1 },
2641             .uv_k_v = { -1, 0, 0, -1 },
2642         },
2643         .upper_bound = {
2644             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2645             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2646             .uv_k_h = { 1, 2, 2, 1 },
2647             .uv_k_v = { 1, 2, 2, 1 },
2648         },
2649     },
2650 };
2651 
2652 static const AVSConfig gen6_avs_config = {
2653     .coeff_frac_bits = 6,
2654     .coeff_epsilon = 1.0f / (1U << 6),
2655     .num_phases = 16,
2656     .num_luma_coeffs = 8,
2657     .num_chroma_coeffs = 4,
2658 
2659     .coeff_range = {
2660         .lower_bound = {
2661             .y_k_h = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2662             .y_k_v = { -0.25f, -0.5f, -1, -2, -2, -1, -0.5f, -0.25f },
2663             .uv_k_h = { -1, 0, 0, -1 },
2664             .uv_k_v = { -1, 0, 0, -1 },
2665         },
2666         .upper_bound = {
2667             .y_k_h = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2668             .y_k_v = { 0.25f, 0.5f, 1, 2, 2, 1, 0.5f, 0.25f },
2669             .uv_k_h = { 1, 2, 2, 1 },
2670             .uv_k_v = { 1, 2, 2, 1 },
2671         },
2672     },
2673 };
2674 
2675 static VAStatus
pp_nv12_avs_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)2676 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2677                        const struct i965_surface *src_surface,
2678                        const VARectangle *src_rect,
2679                        struct i965_surface *dst_surface,
2680                        const VARectangle *dst_rect,
2681                        void *filter_param)
2682 {
2683     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2684     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2685     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2686     struct object_surface *obj_surface;
2687     struct i965_sampler_8x8 *sampler_8x8;
2688     struct i965_sampler_8x8_state *sampler_8x8_state;
2689     int index;
2690     int in_w, in_h, in_wpitch, in_hpitch;
2691     int out_w, out_h, out_wpitch, out_hpitch;
2692     int i;
2693     AVSState * const avs = &pp_avs_context->state;
2694     float sx, sy;
2695 
2696     const int nlas = (pp_context->filter_flags & VA_FILTER_SCALING_MASK) ==
2697                      VA_FILTER_SCALING_NL_ANAMORPHIC;
2698 
2699     /* surface */
2700     obj_surface = (struct object_surface *)src_surface->base;
2701     in_w = obj_surface->orig_width;
2702     in_h = obj_surface->orig_height;
2703     in_wpitch = obj_surface->width;
2704     in_hpitch = obj_surface->height;
2705 
2706     /* source Y surface index 1 */
2707     i965_pp_set_surface2_state(ctx, pp_context,
2708                                obj_surface->bo, 0,
2709                                in_w, in_h, in_wpitch,
2710                                0, 0,
2711                                SURFACE_FORMAT_Y8_UNORM, 0,
2712                                1);
2713 
2714     /* source UV surface index 2 */
2715     i965_pp_set_surface2_state(ctx, pp_context,
2716                                obj_surface->bo, in_wpitch * in_hpitch,
2717                                in_w / 2, in_h / 2, in_wpitch,
2718                                0, 0,
2719                                SURFACE_FORMAT_R8B8_UNORM, 0,
2720                                2);
2721 
2722     /* destination surface */
2723     obj_surface = (struct object_surface *)dst_surface->base;
2724     out_w = obj_surface->orig_width;
2725     out_h = obj_surface->orig_height;
2726     out_wpitch = obj_surface->width;
2727     out_hpitch = obj_surface->height;
2728     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2729 
2730     /* destination Y surface index 7 */
2731     i965_pp_set_surface_state(ctx, pp_context,
2732                               obj_surface->bo, 0,
2733                               ALIGN(out_w, 4) / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2734                               7, 1);
2735 
2736     /* destination UV surface index 8 */
2737     i965_pp_set_surface_state(ctx, pp_context,
2738                               obj_surface->bo, out_wpitch * out_hpitch,
2739                               ALIGN(out_w, 4) / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2740                               8, 1);
2741 
2742     /* sampler 8x8 state */
2743     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2744     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2745     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2746     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2747     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2748 
2749     sx = (float)dst_rect->width / src_rect->width;
2750     sy = (float)dst_rect->height / src_rect->height;
2751     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
2752 
2753     assert(avs->config->num_phases == 16);
2754     for (i = 0; i <= 16; i++) {
2755         const AVSCoeffs * const coeffs = &avs->coeffs[i];
2756 
2757         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
2758             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
2759         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
2760             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
2761         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
2762             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
2763         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
2764             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
2765         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
2766             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
2767         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
2768             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
2769         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
2770             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
2771         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
2772             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
2773 
2774         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
2775             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
2776         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
2777             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
2778         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
2779             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
2780         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
2781             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
2782 
2783         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
2784             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
2785         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
2786             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
2787         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
2788             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
2789         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
2790             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
2791         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
2792             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
2793         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
2794             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
2795         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
2796             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
2797         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
2798             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
2799 
2800         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
2801             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
2802         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
2803             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
2804         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
2805             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
2806         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
2807             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
2808     }
2809 
2810     /* Adaptive filter for all channels (DW4.15) */
2811     sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = BIT_CAST((1U << 7));
2812 
2813     sampler_8x8_state->dw136.default_sharpness_level =
2814         -avs_is_needed(pp_context->filter_flags);
2815     sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
2816     sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
2817     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2818 
2819     /* sampler 8x8 */
2820     dri_bo_map(pp_context->sampler_state_table.bo, True);
2821     assert(pp_context->sampler_state_table.bo->virtual);
2822     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2823     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2824 
2825     /* sample_8x8 Y index 1 */
2826     index = 1;
2827     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2828     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2829     sampler_8x8[index].dw0.ief_bypass = 1;
2830     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2831     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2832     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2833     sampler_8x8[index].dw2.global_noise_estimation = 22;
2834     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2835     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2836     sampler_8x8[index].dw3.strong_edge_weight = 7;
2837     sampler_8x8[index].dw3.regular_weight = 2;
2838     sampler_8x8[index].dw3.non_edge_weight = 0;
2839     sampler_8x8[index].dw3.gain_factor = 40;
2840     sampler_8x8[index].dw4.steepness_boost = 0;
2841     sampler_8x8[index].dw4.steepness_threshold = 0;
2842     sampler_8x8[index].dw4.mr_boost = 0;
2843     sampler_8x8[index].dw4.mr_threshold = 5;
2844     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2845     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2846     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2847     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2848     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2849     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2850     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2851     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2852     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2853     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2854     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2855     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2856     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2857     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2858     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2859     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2860     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2861     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2862     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2863     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2864     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2865     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2866     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2867     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2868     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2869     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2870     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2871     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2872     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2873     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2874     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2875     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2876     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2877     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2878     sampler_8x8[index].dw13.limiter_boost = 0;
2879     sampler_8x8[index].dw13.minimum_limiter = 10;
2880     sampler_8x8[index].dw13.maximum_limiter = 11;
2881     sampler_8x8[index].dw14.clip_limiter = 130;
2882     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2883                       I915_GEM_DOMAIN_RENDER,
2884                       0,
2885                       0,
2886                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2887                       pp_context->sampler_state_table.bo_8x8);
2888 
2889     /* sample_8x8 UV index 2 */
2890     index = 2;
2891     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2892     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2893     sampler_8x8[index].dw0.ief_bypass = 1;
2894     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2895     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2896     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2897     sampler_8x8[index].dw2.global_noise_estimation = 22;
2898     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2899     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2900     sampler_8x8[index].dw3.strong_edge_weight = 7;
2901     sampler_8x8[index].dw3.regular_weight = 2;
2902     sampler_8x8[index].dw3.non_edge_weight = 0;
2903     sampler_8x8[index].dw3.gain_factor = 40;
2904     sampler_8x8[index].dw4.steepness_boost = 0;
2905     sampler_8x8[index].dw4.steepness_threshold = 0;
2906     sampler_8x8[index].dw4.mr_boost = 0;
2907     sampler_8x8[index].dw4.mr_threshold = 5;
2908     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2909     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2910     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2911     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2912     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2913     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2914     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2915     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2916     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2917     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2918     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2919     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2920     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2921     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2922     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2923     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2924     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2925     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2926     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2927     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2928     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2929     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2930     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2931     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2932     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2933     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2934     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2935     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2936     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2937     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2938     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2939     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2940     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2941     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2942     sampler_8x8[index].dw13.limiter_boost = 0;
2943     sampler_8x8[index].dw13.minimum_limiter = 10;
2944     sampler_8x8[index].dw13.maximum_limiter = 11;
2945     sampler_8x8[index].dw14.clip_limiter = 130;
2946     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2947                       I915_GEM_DOMAIN_RENDER,
2948                       0,
2949                       0,
2950                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2951                       pp_context->sampler_state_table.bo_8x8);
2952 
2953     dri_bo_unmap(pp_context->sampler_state_table.bo);
2954 
2955     /* private function & data */
2956     pp_context->pp_x_steps = pp_avs_x_steps;
2957     pp_context->pp_y_steps = pp_avs_y_steps;
2958     pp_context->private_context = &pp_context->pp_avs_context;
2959     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2960 
2961     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
2962     float src_left_edge_extend = (float)dst_left_edge_extend * src_rect->width / dst_rect->width;
2963     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2964     pp_avs_context->dest_y = dst_rect->y;
2965     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2966     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2967     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend) / in_w;
2968     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2969     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2970     pp_avs_context->src_h = src_rect->height;
2971 
2972     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2973     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2974 
2975     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float)(src_rect->width + src_left_edge_extend) / in_w / (dst_rect->width + dst_left_edge_extend);
2976     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2977     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2978     pp_inline_parameter->grf6.video_step_delta = 0.0;
2979 
2980     dst_surface->flags = src_surface->flags;
2981 
2982     return VA_STATUS_SUCCESS;
2983 }
2984 
2985 static VAStatus
gen6_nv12_scaling_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)2986 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2987                              const struct i965_surface *src_surface,
2988                              const VARectangle *src_rect,
2989                              struct i965_surface *dst_surface,
2990                              const VARectangle *dst_rect,
2991                              void *filter_param)
2992 {
2993     return pp_nv12_avs_initialize(ctx, pp_context,
2994                                   src_surface,
2995                                   src_rect,
2996                                   dst_surface,
2997                                   dst_rect,
2998                                   filter_param);
2999 }
3000 
3001 static int
gen7_pp_avs_x_steps(void * private_context)3002 gen7_pp_avs_x_steps(void *private_context)
3003 {
3004     struct pp_avs_context *pp_avs_context = private_context;
3005 
3006     return pp_avs_context->dest_w / 16;
3007 }
3008 
3009 static int
gen7_pp_avs_y_steps(void * private_context)3010 gen7_pp_avs_y_steps(void *private_context)
3011 {
3012     struct pp_avs_context *pp_avs_context = private_context;
3013 
3014     return pp_avs_context->dest_h / 16;
3015 }
3016 
3017 static int
gen7_pp_avs_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)3018 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3019 {
3020     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3021     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3022 
3023     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3024     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3025     pp_inline_parameter->grf9.constant_0 = 0xffffffff;
3026     pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3027 
3028     return 0;
3029 }
3030 
gen7_update_src_surface_uv_offset(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * surface)3031 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx,
3032                                               struct i965_post_processing_context *pp_context,
3033                                               const struct i965_surface *surface)
3034 {
3035     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3036     int fourcc = pp_get_surface_fourcc(ctx, surface);
3037 
3038     if (fourcc == VA_FOURCC_YUY2) {
3039         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3040         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3041         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3042     } else if (fourcc == VA_FOURCC_UYVY) {
3043         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3044         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3045         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3046     }
3047 }
3048 
3049 static VAStatus
gen7_pp_plx_avs_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)3050 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3051                            const struct i965_surface *src_surface,
3052                            const VARectangle *src_rect,
3053                            struct i965_surface *dst_surface,
3054                            const VARectangle *dst_rect,
3055                            void *filter_param)
3056 {
3057     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3058     struct i965_driver_data *i965 = i965_driver_data(ctx);
3059     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3060     struct gen7_sampler_8x8 *sampler_8x8;
3061     struct i965_sampler_8x8_state *sampler_8x8_state;
3062     int index, i;
3063     int width[3], height[3], pitch[3], offset[3];
3064     int src_width, src_height;
3065     AVSState * const avs = &pp_avs_context->state;
3066     float sx, sy;
3067     const float * yuv_to_rgb_coefs;
3068     size_t yuv_to_rgb_coefs_size;
3069 
3070     /* source surface */
3071     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3072                                          src_rect,
3073                                          width, height, pitch, offset);
3074     src_width = width[0];
3075     src_height = height[0];
3076 
3077     /* destination surface */
3078     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3079                                          dst_rect,
3080                                          width, height, pitch, offset);
3081 
3082     /* sampler 8x8 state */
3083     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3084     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3085     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3086     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3087     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3088 
3089     sx = (float)dst_rect->width / src_rect->width;
3090     sy = (float)dst_rect->height / src_rect->height;
3091     avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
3092 
3093     assert(avs->config->num_phases == 16);
3094     for (i = 0; i <= 16; i++) {
3095         const AVSCoeffs * const coeffs = &avs->coeffs[i];
3096 
3097         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
3098             intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
3099         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
3100             intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
3101         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
3102             intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
3103         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
3104             intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
3105         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
3106             intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
3107         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
3108             intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
3109         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
3110             intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
3111         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
3112             intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
3113 
3114         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
3115             intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
3116         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
3117             intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
3118         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
3119             intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
3120         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
3121             intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
3122 
3123         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
3124             intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
3125         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
3126             intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
3127         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
3128             intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
3129         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
3130             intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
3131         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
3132             intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
3133         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
3134             intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
3135         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
3136             intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
3137         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
3138             intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
3139 
3140         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
3141             intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
3142         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
3143             intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
3144         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
3145             intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
3146         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
3147             intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
3148     }
3149 
3150     sampler_8x8_state->dw136.default_sharpness_level =
3151         -avs_is_needed(pp_context->filter_flags);
3152     if (IS_HASWELL(i965->intel.device_info)) {
3153         sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
3154         sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;
3155         sampler_8x8_state->dw137.hsw.bypass_x_adaptive_filtering = 1;
3156     } else {
3157         sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = BIT_CAST((1U << 7));
3158         sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
3159         sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
3160     }
3161     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3162 
3163     /* sampler 8x8 */
3164     dri_bo_map(pp_context->sampler_state_table.bo, True);
3165     assert(pp_context->sampler_state_table.bo->virtual);
3166     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3167     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3168 
3169     /* sample_8x8 Y index 4 */
3170     index = 4;
3171     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3172     sampler_8x8[index].dw0.global_noise_estimation = 255;
3173     sampler_8x8[index].dw0.ief_bypass = 1;
3174 
3175     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3176 
3177     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3178     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3179     sampler_8x8[index].dw2.r5x_coefficient = 9;
3180     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3181     sampler_8x8[index].dw2.r5c_coefficient = 3;
3182 
3183     sampler_8x8[index].dw3.r3x_coefficient = 27;
3184     sampler_8x8[index].dw3.r3c_coefficient = 5;
3185     sampler_8x8[index].dw3.gain_factor = 40;
3186     sampler_8x8[index].dw3.non_edge_weight = 1;
3187     sampler_8x8[index].dw3.regular_weight = 2;
3188     sampler_8x8[index].dw3.strong_edge_weight = 7;
3189     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3190 
3191     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3192                       I915_GEM_DOMAIN_RENDER,
3193                       0,
3194                       0,
3195                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3196                       pp_context->sampler_state_table.bo_8x8);
3197 
3198     /* sample_8x8 UV index 8 */
3199     index = 8;
3200     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3201     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3202     sampler_8x8[index].dw0.global_noise_estimation = 255;
3203     sampler_8x8[index].dw0.ief_bypass = 1;
3204     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3205     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3206     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3207     sampler_8x8[index].dw2.r5x_coefficient = 9;
3208     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3209     sampler_8x8[index].dw2.r5c_coefficient = 3;
3210     sampler_8x8[index].dw3.r3x_coefficient = 27;
3211     sampler_8x8[index].dw3.r3c_coefficient = 5;
3212     sampler_8x8[index].dw3.gain_factor = 40;
3213     sampler_8x8[index].dw3.non_edge_weight = 1;
3214     sampler_8x8[index].dw3.regular_weight = 2;
3215     sampler_8x8[index].dw3.strong_edge_weight = 7;
3216     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3217 
3218     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3219                       I915_GEM_DOMAIN_RENDER,
3220                       0,
3221                       0,
3222                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3223                       pp_context->sampler_state_table.bo_8x8);
3224 
3225     /* sampler_8x8 V, index 12 */
3226     index = 12;
3227     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3228     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3229     sampler_8x8[index].dw0.global_noise_estimation = 255;
3230     sampler_8x8[index].dw0.ief_bypass = 1;
3231     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3232     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3233     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3234     sampler_8x8[index].dw2.r5x_coefficient = 9;
3235     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3236     sampler_8x8[index].dw2.r5c_coefficient = 3;
3237     sampler_8x8[index].dw3.r3x_coefficient = 27;
3238     sampler_8x8[index].dw3.r3c_coefficient = 5;
3239     sampler_8x8[index].dw3.gain_factor = 40;
3240     sampler_8x8[index].dw3.non_edge_weight = 1;
3241     sampler_8x8[index].dw3.regular_weight = 2;
3242     sampler_8x8[index].dw3.strong_edge_weight = 7;
3243     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3244 
3245     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3246                       I915_GEM_DOMAIN_RENDER,
3247                       0,
3248                       0,
3249                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3250                       pp_context->sampler_state_table.bo_8x8);
3251 
3252     dri_bo_unmap(pp_context->sampler_state_table.bo);
3253 
3254     /* private function & data */
3255     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3256     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3257     pp_context->private_context = &pp_context->pp_avs_context;
3258     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3259 
3260     int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
3261     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3262     pp_avs_context->dest_y = dst_rect->y;
3263     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3264     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3265     pp_avs_context->src_w = src_rect->width;
3266     pp_avs_context->src_h = src_rect->height;
3267     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3268 
3269     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3270     dw = MAX(dw, dst_rect->width + dst_left_edge_extend);
3271 
3272     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3273     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3274     if (IS_HASWELL(i965->intel.device_info))
3275         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3276 
3277     if (pp_static_parameter->grf2.avs_wa_enable) {
3278         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3279         if ((src_fourcc == VA_FOURCC_RGBA) ||
3280             (src_fourcc == VA_FOURCC_RGBX) ||
3281             (src_fourcc == VA_FOURCC_BGRA) ||
3282             (src_fourcc == VA_FOURCC_BGRX)) {
3283             pp_static_parameter->grf2.avs_wa_enable = 0;
3284         }
3285     }
3286 
3287     pp_static_parameter->grf2.avs_wa_width = src_width;
3288     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3289     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3290     pp_static_parameter->grf2.alpha = 255;
3291 
3292     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3293     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3294     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3295                                                                    (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3296     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3297                                                                      (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3298 
3299     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3300 
3301     yuv_to_rgb_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(src_surface->flags &
3302                                                                                   VA_SRC_COLOR_MASK),
3303                                                     &yuv_to_rgb_coefs_size);
3304     memcpy(&pp_static_parameter->grf7, yuv_to_rgb_coefs, yuv_to_rgb_coefs_size);
3305 
3306     dst_surface->flags = src_surface->flags;
3307 
3308     return VA_STATUS_SUCCESS;
3309 }
3310 
3311 static int
pp_dndi_x_steps(void * private_context)3312 pp_dndi_x_steps(void *private_context)
3313 {
3314     return 1;
3315 }
3316 
3317 static int
pp_dndi_y_steps(void * private_context)3318 pp_dndi_y_steps(void *private_context)
3319 {
3320     struct pp_dndi_context *pp_dndi_context = private_context;
3321 
3322     return pp_dndi_context->dest_h / 4;
3323 }
3324 
3325 static int
pp_dndi_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)3326 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3327 {
3328     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3329 
3330     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3331     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3332 
3333     return 0;
3334 }
3335 
3336 static VAStatus
pp_nv12_dndi_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)3337 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3338                         const struct i965_surface *src_surface,
3339                         const VARectangle *src_rect,
3340                         struct i965_surface *dst_surface,
3341                         const VARectangle *dst_rect,
3342                         void *filter_param)
3343 {
3344     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3345     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3346     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3347     const VAProcPipelineParameterBuffer * const pipe_params =
3348         pp_context->pipeline_param;
3349     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3350         filter_param;
3351     struct object_surface * const src_obj_surface = (struct object_surface *)
3352                                                     src_surface->base;
3353     struct object_surface * const dst_obj_surface = (struct object_surface *)
3354                                                     dst_surface->base;
3355     struct object_surface *obj_surface;
3356     struct i965_sampler_dndi *sampler_dndi;
3357     int index, dndi_top_first;
3358     int w, h, orig_w, orig_h;
3359     VAStatus status;
3360 
3361     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3362                                                  pipe_params, deint_params);
3363     if (status != VA_STATUS_SUCCESS)
3364         return status;
3365 
3366     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3367                                              src_obj_surface, dst_obj_surface);
3368     if (status != VA_STATUS_SUCCESS)
3369         return status;
3370 
3371     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3372                                                      src_obj_surface, dst_obj_surface);
3373     if (status != VA_STATUS_SUCCESS)
3374         return status;
3375 
3376     /* Current input surface (index = 4) */
3377     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3378     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3379                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3380                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3381 
3382     /* Previous input surface (index = 5) */
3383     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3384     i965_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3385                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3386                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 5);
3387 
3388     /* STMM input surface (index = 6) */
3389     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3390     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3391                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3392                               I965_SURFACEFORMAT_R8_UNORM, 6, 1);
3393 
3394     /* Previous output surfaces (index = { 7, 8 }) */
3395     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3396     w = obj_surface->width;
3397     h = obj_surface->height;
3398     orig_w = obj_surface->orig_width;
3399     orig_h = obj_surface->orig_height;
3400 
3401     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3402                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 7, 1);
3403     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3404                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 8, 1);
3405 
3406     /* Current output surfaces (index = { 10, 11 }) */
3407     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3408     w = obj_surface->width;
3409     h = obj_surface->height;
3410     orig_w = obj_surface->orig_width;
3411     orig_h = obj_surface->orig_height;
3412 
3413     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3414                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 10, 1);
3415     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3416                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 11, 1);
3417 
3418     /* STMM output surface (index = 20) */
3419     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3420     i965_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3421                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3422                               I965_SURFACEFORMAT_R8_UNORM, 20, 1);
3423 
3424     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3425 
3426     /* sampler dndi */
3427     dri_bo_map(pp_context->sampler_state_table.bo, True);
3428     assert(pp_context->sampler_state_table.bo->virtual);
3429     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3430     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3431 
3432     /* sample dndi index 1 */
3433     index = 0;
3434     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3435     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3436     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3437     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3438 
3439     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3440     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3441     sampler_dndi[index].dw1.stmm_c2 = 1;
3442     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3443     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3444 
3445     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3446     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3447     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3448     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3449 
3450     sampler_dndi[index].dw3.maximum_stmm = 150;
3451     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3452     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3453     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3454     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3455 
3456     sampler_dndi[index].dw4.sdi_delta = 5;
3457     sampler_dndi[index].dw4.sdi_threshold = 100;
3458     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3459     sampler_dndi[index].dw4.stmm_shift_up = 1;
3460     sampler_dndi[index].dw4.stmm_shift_down = 3;
3461     sampler_dndi[index].dw4.minimum_stmm = 118;
3462 
3463     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3464     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3465     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3466     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3467 
3468     sampler_dndi[index].dw6.dn_enable = 1;
3469     sampler_dndi[index].dw6.di_enable = 1;
3470     sampler_dndi[index].dw6.di_partial = 0;
3471     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3472     sampler_dndi[index].dw6.dndi_stream_id = 0;
3473     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3474     sampler_dndi[index].dw6.progressive_dn = 0;
3475     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3476     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3477     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3478 
3479     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3480     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3481     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3482     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3483 
3484     dri_bo_unmap(pp_context->sampler_state_table.bo);
3485 
3486     /* private function & data */
3487     pp_context->pp_x_steps = pp_dndi_x_steps;
3488     pp_context->pp_y_steps = pp_dndi_y_steps;
3489     pp_context->private_context = dndi_ctx;
3490     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3491 
3492     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3493     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3494     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3495     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3496 
3497     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3498     pp_inline_parameter->grf5.number_blocks = w / 16;
3499     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3500     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3501 
3502     dndi_ctx->dest_w = w;
3503     dndi_ctx->dest_h = h;
3504 
3505     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3506     return VA_STATUS_SUCCESS;
3507 }
3508 
3509 static int
pp_dn_x_steps(void * private_context)3510 pp_dn_x_steps(void *private_context)
3511 {
3512     return 1;
3513 }
3514 
3515 static int
pp_dn_y_steps(void * private_context)3516 pp_dn_y_steps(void *private_context)
3517 {
3518     struct pp_dn_context *pp_dn_context = private_context;
3519 
3520     return pp_dn_context->dest_h / 8;
3521 }
3522 
3523 static int
pp_dn_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)3524 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3525 {
3526     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3527 
3528     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3529     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3530 
3531     return 0;
3532 }
3533 
3534 static VAStatus
pp_nv12_dn_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)3535 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3536                       const struct i965_surface *src_surface,
3537                       const VARectangle *src_rect,
3538                       struct i965_surface *dst_surface,
3539                       const VARectangle *dst_rect,
3540                       void *filter_param)
3541 {
3542     struct i965_driver_data *i965 = i965_driver_data(ctx);
3543     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3544     struct object_surface *obj_surface;
3545     struct i965_sampler_dndi *sampler_dndi;
3546     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3547     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3548     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3549     int index;
3550     int w, h;
3551     int orig_w, orig_h;
3552     int dn_strength = 15;
3553     int dndi_top_first = 1;
3554     int dn_progressive = 0;
3555 
3556     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3557         dndi_top_first = 1;
3558         dn_progressive = 1;
3559     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3560         dndi_top_first = 1;
3561         dn_progressive = 0;
3562     } else {
3563         dndi_top_first = 0;
3564         dn_progressive = 0;
3565     }
3566 
3567     if (dn_filter_param) {
3568         float value = dn_filter_param->value;
3569 
3570         if (value > 1.0)
3571             value = 1.0;
3572 
3573         if (value < 0.0)
3574             value = 0.0;
3575 
3576         dn_strength = (int)(value * 31.0F);
3577     }
3578 
3579     /* surface */
3580     obj_surface = (struct object_surface *)src_surface->base;
3581     orig_w = obj_surface->orig_width;
3582     orig_h = obj_surface->orig_height;
3583     w = obj_surface->width;
3584     h = obj_surface->height;
3585 
3586     if (pp_dn_context->stmm_bo == NULL) {
3587         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3588                                               "STMM surface",
3589                                               w * h,
3590                                               4096);
3591         assert(pp_dn_context->stmm_bo);
3592     }
3593 
3594     /* source UV surface index 2 */
3595     i965_pp_set_surface_state(ctx, pp_context,
3596                               obj_surface->bo, w * h,
3597                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3598                               2, 0);
3599 
3600     /* source YUV surface index 4 */
3601     i965_pp_set_surface2_state(ctx, pp_context,
3602                                obj_surface->bo, 0,
3603                                orig_w, orig_h, w,
3604                                0, h,
3605                                SURFACE_FORMAT_PLANAR_420_8, 1,
3606                                4);
3607 
3608     /* source STMM surface index 20 */
3609     i965_pp_set_surface_state(ctx, pp_context,
3610                               pp_dn_context->stmm_bo, 0,
3611                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3612                               20, 1);
3613 
3614     /* destination surface */
3615     obj_surface = (struct object_surface *)dst_surface->base;
3616     orig_w = obj_surface->orig_width;
3617     orig_h = obj_surface->orig_height;
3618     w = obj_surface->width;
3619     h = obj_surface->height;
3620 
3621     /* destination Y surface index 7 */
3622     i965_pp_set_surface_state(ctx, pp_context,
3623                               obj_surface->bo, 0,
3624                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3625                               7, 1);
3626 
3627     /* destination UV surface index 8 */
3628     i965_pp_set_surface_state(ctx, pp_context,
3629                               obj_surface->bo, w * h,
3630                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3631                               8, 1);
3632     /* sampler dn */
3633     dri_bo_map(pp_context->sampler_state_table.bo, True);
3634     assert(pp_context->sampler_state_table.bo->virtual);
3635     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3636     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3637 
3638     /* sample dndi index 1 */
3639     index = 0;
3640     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3641     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3642     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3643     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3644 
3645     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3646     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3647     sampler_dndi[index].dw1.stmm_c2 = 0;
3648     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3649     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3650 
3651     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3652     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3653     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3654     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3655 
3656     sampler_dndi[index].dw3.maximum_stmm = 128;
3657     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3658     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3659     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3660     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3661 
3662     sampler_dndi[index].dw4.sdi_delta = 8;
3663     sampler_dndi[index].dw4.sdi_threshold = 128;
3664     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3665     sampler_dndi[index].dw4.stmm_shift_up = 0;
3666     sampler_dndi[index].dw4.stmm_shift_down = 0;
3667     sampler_dndi[index].dw4.minimum_stmm = 0;
3668 
3669     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3670     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3671     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3672     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3673 
3674     sampler_dndi[index].dw6.dn_enable = 1;
3675     sampler_dndi[index].dw6.di_enable = 0;
3676     sampler_dndi[index].dw6.di_partial = 0;
3677     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3678     sampler_dndi[index].dw6.dndi_stream_id = 1;
3679     sampler_dndi[index].dw6.dndi_first_frame = 1;
3680     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3681     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3682     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3683     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3684 
3685     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3686     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3687     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3688     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3689 
3690     dri_bo_unmap(pp_context->sampler_state_table.bo);
3691 
3692     /* private function & data */
3693     pp_context->pp_x_steps = pp_dn_x_steps;
3694     pp_context->pp_y_steps = pp_dn_y_steps;
3695     pp_context->private_context = &pp_context->pp_dn_context;
3696     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3697 
3698     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3699     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3700     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3701     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3702 
3703     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3704     pp_inline_parameter->grf5.number_blocks = w / 16;
3705     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3706     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3707 
3708     pp_dn_context->dest_w = w;
3709     pp_dn_context->dest_h = h;
3710 
3711     dst_surface->flags = src_surface->flags;
3712 
3713     return VA_STATUS_SUCCESS;
3714 }
3715 
3716 static int
gen7_pp_dndi_x_steps(void * private_context)3717 gen7_pp_dndi_x_steps(void *private_context)
3718 {
3719     struct pp_dndi_context *pp_dndi_context = private_context;
3720 
3721     return pp_dndi_context->dest_w / 16;
3722 }
3723 
3724 static int
gen7_pp_dndi_y_steps(void * private_context)3725 gen7_pp_dndi_y_steps(void *private_context)
3726 {
3727     struct pp_dndi_context *pp_dndi_context = private_context;
3728 
3729     return pp_dndi_context->dest_h / 4;
3730 }
3731 
3732 static int
gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)3733 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3734 {
3735     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3736 
3737     pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16;
3738     pp_inline_parameter->grf9.destination_block_vertical_origin = y * 4;
3739 
3740     return 0;
3741 }
3742 
3743 static VAStatus
gen7_pp_nv12_dndi_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)3744 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3745                              const struct i965_surface *src_surface,
3746                              const VARectangle *src_rect,
3747                              struct i965_surface *dst_surface,
3748                              const VARectangle *dst_rect,
3749                              void *filter_param)
3750 {
3751     struct pp_dndi_context * const dndi_ctx = &pp_context->pp_dndi_context;
3752     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3753     const VAProcPipelineParameterBuffer * const pipe_params =
3754         pp_context->pipeline_param;
3755     const VAProcFilterParameterBufferDeinterlacing * const deint_params =
3756         filter_param;
3757     struct object_surface * const src_obj_surface = (struct object_surface *)
3758                                                     src_surface->base;
3759     struct object_surface * const dst_obj_surface = (struct object_surface *)
3760                                                     dst_surface->base;
3761     struct object_surface *obj_surface;
3762     struct gen7_sampler_dndi *sampler_dndi;
3763     int index, dndi_top_first;
3764     int w, h, orig_w, orig_h;
3765     VAStatus status;
3766 
3767     status = pp_dndi_context_init_surface_params(dndi_ctx, src_obj_surface,
3768                                                  pipe_params, deint_params);
3769     if (status != VA_STATUS_SUCCESS)
3770         return status;
3771 
3772     status = pp_dndi_context_ensure_surfaces(ctx, pp_context,
3773                                              src_obj_surface, dst_obj_surface);
3774     if (status != VA_STATUS_SUCCESS)
3775         return status;
3776 
3777     status = pp_dndi_context_ensure_surfaces_storage(ctx, pp_context,
3778                                                      src_obj_surface, dst_obj_surface);
3779     if (status != VA_STATUS_SUCCESS)
3780         return status;
3781 
3782     /* Current input surface (index = 3) */
3783     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_CURRENT].obj_surface;
3784     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3785                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3786                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 3);
3787 
3788     /* Previous input surface (index = 4) */
3789     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_PREVIOUS].obj_surface;
3790     gen7_pp_set_surface2_state(ctx, pp_context, obj_surface->bo, 0,
3791                                obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3792                                0, obj_surface->y_cb_offset, SURFACE_FORMAT_PLANAR_420_8, 1, 4);
3793 
3794     /* STMM input surface (index = 5) */
3795     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_IN_STMM].obj_surface;
3796     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3797                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3798                               I965_SURFACEFORMAT_R8_UNORM, 5, 1);
3799 
3800     /* Previous output surfaces (index = { 27, 28 }) */
3801     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_PREVIOUS].obj_surface;
3802     w = obj_surface->width;
3803     h = obj_surface->height;
3804     orig_w = obj_surface->orig_width;
3805     orig_h = obj_surface->orig_height;
3806 
3807     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3808                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 27, 1);
3809     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3810                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 28, 1);
3811 
3812     /* Current output surfaces (index = { 30, 31 }) */
3813     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_CURRENT].obj_surface;
3814     w = obj_surface->width;
3815     h = obj_surface->height;
3816     orig_w = obj_surface->orig_width;
3817     orig_h = obj_surface->orig_height;
3818 
3819     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3820                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM, 30, 1);
3821     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, w * h,
3822                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM, 31, 1);
3823 
3824     /* STMM output surface (index = 33) */
3825     obj_surface = dndi_ctx->frame_store[DNDI_FRAME_OUT_STMM].obj_surface;
3826     gen7_pp_set_surface_state(ctx, pp_context, obj_surface->bo, 0,
3827                               obj_surface->orig_width, obj_surface->orig_height, obj_surface->width,
3828                               I965_SURFACEFORMAT_R8_UNORM, 33, 1);
3829 
3830     dndi_top_first = !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD);
3831 
3832     /* sampler dndi */
3833     dri_bo_map(pp_context->sampler_state_table.bo, True);
3834     assert(pp_context->sampler_state_table.bo->virtual);
3835     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3836     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3837 
3838     /* sample dndi index 0 */
3839     index = 0;
3840     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3841     sampler_dndi[index].dw0.dnmh_delt = 7;
3842     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3843     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3844     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3845     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3846 
3847     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3848     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3849     sampler_dndi[index].dw1.stmm_c2 = 2;
3850     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3851     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3852 
3853     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3854     sampler_dndi[index].dw2.bne_edge_th = 1;
3855     sampler_dndi[index].dw2.smooth_mv_th = 0;
3856     sampler_dndi[index].dw2.sad_tight_th = 5;
3857     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3858     sampler_dndi[index].dw2.good_neighbor_th = 12;
3859 
3860     sampler_dndi[index].dw3.maximum_stmm = 150;
3861     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3862     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3863     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3864     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3865 
3866     sampler_dndi[index].dw4.sdi_delta = 5;
3867     sampler_dndi[index].dw4.sdi_threshold = 100;
3868     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3869     sampler_dndi[index].dw4.stmm_shift_up = 1;
3870     sampler_dndi[index].dw4.stmm_shift_down = 3;
3871     sampler_dndi[index].dw4.minimum_stmm = 118;
3872 
3873     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3874     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3875     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3876     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3877     sampler_dndi[index].dw6.dn_enable = 0;
3878     sampler_dndi[index].dw6.di_enable = 1;
3879     sampler_dndi[index].dw6.di_partial = 0;
3880     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3881     sampler_dndi[index].dw6.dndi_stream_id = 1;
3882     sampler_dndi[index].dw6.dndi_first_frame = dndi_ctx->is_first_frame;
3883     sampler_dndi[index].dw6.progressive_dn = 0;
3884     sampler_dndi[index].dw6.mcdi_enable =
3885         (deint_params->algorithm == VAProcDeinterlacingMotionCompensated);
3886     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3887     sampler_dndi[index].dw6.cat_th1 = 0;
3888     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3889     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3890 
3891     sampler_dndi[index].dw7.sad_tha = 5;
3892     sampler_dndi[index].dw7.sad_thb = 10;
3893     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3894     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3895     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3896     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3897     sampler_dndi[index].dw7.neighborpixel_th = 10;
3898     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3899 
3900     dri_bo_unmap(pp_context->sampler_state_table.bo);
3901 
3902     /* private function & data */
3903     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3904     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3905     pp_context->private_context = dndi_ctx;
3906     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3907 
3908     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3909     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3910     pp_static_parameter->grf1.di_top_field_first = 0;
3911     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3912 
3913     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3914     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3915     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3916 
3917     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3918     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3919 
3920     dndi_ctx->dest_w = w;
3921     dndi_ctx->dest_h = h;
3922 
3923     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3924     return VA_STATUS_SUCCESS;
3925 }
3926 
3927 static int
gen7_pp_dn_x_steps(void * private_context)3928 gen7_pp_dn_x_steps(void *private_context)
3929 {
3930     struct pp_dn_context *pp_dn_context = private_context;
3931 
3932     return pp_dn_context->dest_w / 16;
3933 }
3934 
3935 static int
gen7_pp_dn_y_steps(void * private_context)3936 gen7_pp_dn_y_steps(void *private_context)
3937 {
3938     struct pp_dn_context *pp_dn_context = private_context;
3939 
3940     return pp_dn_context->dest_h / 4;
3941 }
3942 
3943 static int
gen7_pp_dn_set_block_parameter(struct i965_post_processing_context * pp_context,int x,int y)3944 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3945 {
3946     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3947 
3948     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3949     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3950 
3951     return 0;
3952 }
3953 
3954 static VAStatus
gen7_pp_nv12_dn_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,void * filter_param)3955 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3956                            const struct i965_surface *src_surface,
3957                            const VARectangle *src_rect,
3958                            struct i965_surface *dst_surface,
3959                            const VARectangle *dst_rect,
3960                            void *filter_param)
3961 {
3962     struct i965_driver_data *i965 = i965_driver_data(ctx);
3963     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3964     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3965     struct object_surface *obj_surface;
3966     struct gen7_sampler_dndi *sampler_dn;
3967     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3968     int index;
3969     int w, h;
3970     int orig_w, orig_h;
3971     int dn_strength = 15;
3972     int dndi_top_first = 1;
3973     int dn_progressive = 0;
3974 
3975     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3976         dndi_top_first = 1;
3977         dn_progressive = 1;
3978     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3979         dndi_top_first = 1;
3980         dn_progressive = 0;
3981     } else {
3982         dndi_top_first = 0;
3983         dn_progressive = 0;
3984     }
3985 
3986     if (dn_filter_param) {
3987         float value = dn_filter_param->value;
3988 
3989         if (value > 1.0)
3990             value = 1.0;
3991 
3992         if (value < 0.0)
3993             value = 0.0;
3994 
3995         dn_strength = (int)(value * 31.0F);
3996     }
3997 
3998     /* surface */
3999     obj_surface = (struct object_surface *)src_surface->base;
4000     orig_w = obj_surface->orig_width;
4001     orig_h = obj_surface->orig_height;
4002     w = obj_surface->width;
4003     h = obj_surface->height;
4004 
4005     if (pp_dn_context->stmm_bo == NULL) {
4006         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4007                                               "STMM surface",
4008                                               w * h,
4009                                               4096);
4010         assert(pp_dn_context->stmm_bo);
4011     }
4012 
4013     /* source UV surface index 1 */
4014     gen7_pp_set_surface_state(ctx, pp_context,
4015                               obj_surface->bo, w * h,
4016                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4017                               1, 0);
4018 
4019     /* source YUV surface index 3 */
4020     gen7_pp_set_surface2_state(ctx, pp_context,
4021                                obj_surface->bo, 0,
4022                                orig_w, orig_h, w,
4023                                0, h,
4024                                SURFACE_FORMAT_PLANAR_420_8, 1,
4025                                3);
4026 
4027     /* source (temporal reference) YUV surface index 4 */
4028     gen7_pp_set_surface2_state(ctx, pp_context,
4029                                obj_surface->bo, 0,
4030                                orig_w, orig_h, w,
4031                                0, h,
4032                                SURFACE_FORMAT_PLANAR_420_8, 1,
4033                                4);
4034 
4035     /* STMM / History Statistics input surface, index 5 */
4036     gen7_pp_set_surface_state(ctx, pp_context,
4037                               pp_dn_context->stmm_bo, 0,
4038                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4039                               33, 1);
4040 
4041     /* destination surface */
4042     obj_surface = (struct object_surface *)dst_surface->base;
4043     orig_w = obj_surface->orig_width;
4044     orig_h = obj_surface->orig_height;
4045     w = obj_surface->width;
4046     h = obj_surface->height;
4047 
4048     /* destination Y surface index 24 */
4049     gen7_pp_set_surface_state(ctx, pp_context,
4050                               obj_surface->bo, 0,
4051                               ALIGN(orig_w, 4) / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4052                               24, 1);
4053 
4054     /* destination UV surface index 25 */
4055     gen7_pp_set_surface_state(ctx, pp_context,
4056                               obj_surface->bo, w * h,
4057                               ALIGN(orig_w, 4) / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4058                               25, 1);
4059 
4060     /* sampler dn */
4061     dri_bo_map(pp_context->sampler_state_table.bo, True);
4062     assert(pp_context->sampler_state_table.bo->virtual);
4063     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4064     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4065 
4066     /* sample dn index 1 */
4067     index = 0;
4068     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4069     sampler_dn[index].dw0.dnmh_delt = 8;
4070     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4071     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4072     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4073     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4074 
4075     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4076     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4077     sampler_dn[index].dw1.stmm_c2 = 0;
4078     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4079     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4080 
4081     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4082     sampler_dn[index].dw2.bne_edge_th = 1;
4083     sampler_dn[index].dw2.smooth_mv_th = 0;
4084     sampler_dn[index].dw2.sad_tight_th = 5;
4085     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4086     sampler_dn[index].dw2.good_neighbor_th = 4;
4087 
4088     sampler_dn[index].dw3.maximum_stmm = 128;
4089     sampler_dn[index].dw3.multipler_for_vecm = 2;
4090     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4091     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4092     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4093 
4094     sampler_dn[index].dw4.sdi_delta = 8;
4095     sampler_dn[index].dw4.sdi_threshold = 128;
4096     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4097     sampler_dn[index].dw4.stmm_shift_up = 0;
4098     sampler_dn[index].dw4.stmm_shift_down = 0;
4099     sampler_dn[index].dw4.minimum_stmm = 0;
4100 
4101     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4102     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4103     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4104     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4105 
4106     sampler_dn[index].dw6.dn_enable = 1;
4107     sampler_dn[index].dw6.di_enable = 0;
4108     sampler_dn[index].dw6.di_partial = 0;
4109     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4110     sampler_dn[index].dw6.dndi_stream_id = 1;
4111     sampler_dn[index].dw6.dndi_first_frame = 1;
4112     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4113     sampler_dn[index].dw6.mcdi_enable = 0;
4114     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4115     sampler_dn[index].dw6.cat_th1 = 0;
4116     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4117     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4118 
4119     sampler_dn[index].dw7.sad_tha = 5;
4120     sampler_dn[index].dw7.sad_thb = 10;
4121     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4122     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4123     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4124     sampler_dn[index].dw7.vdi_walker_enable = 0;
4125     sampler_dn[index].dw7.neighborpixel_th = 10;
4126     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4127 
4128     dri_bo_unmap(pp_context->sampler_state_table.bo);
4129 
4130     /* private function & data */
4131     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4132     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4133     pp_context->private_context = &pp_context->pp_dn_context;
4134     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4135 
4136     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4137     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4138     pp_static_parameter->grf1.di_top_field_first = 0;
4139     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4140 
4141     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4142     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4143     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4144 
4145     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4146     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4147 
4148     pp_dn_context->dest_w = w;
4149     pp_dn_context->dest_h = h;
4150 
4151     dst_surface->flags = src_surface->flags;
4152 
4153     return VA_STATUS_SUCCESS;
4154 }
4155 
4156 static VAStatus
ironlake_pp_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,int pp_index,void * filter_param)4157 ironlake_pp_initialize(
4158     VADriverContextP ctx,
4159     struct i965_post_processing_context *pp_context,
4160     const struct i965_surface *src_surface,
4161     const VARectangle *src_rect,
4162     struct i965_surface *dst_surface,
4163     const VARectangle *dst_rect,
4164     int pp_index,
4165     void *filter_param
4166 )
4167 {
4168     VAStatus va_status;
4169     struct i965_driver_data *i965 = i965_driver_data(ctx);
4170     struct pp_module *pp_module;
4171     dri_bo *bo;
4172     int static_param_size, inline_param_size;
4173 
4174     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4175     bo = dri_bo_alloc(i965->intel.bufmgr,
4176                       "surface state & binding table",
4177                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4178                       4096);
4179     assert(bo);
4180     pp_context->surface_state_binding_table.bo = bo;
4181 
4182     dri_bo_unreference(pp_context->curbe.bo);
4183     bo = dri_bo_alloc(i965->intel.bufmgr,
4184                       "constant buffer",
4185                       4096,
4186                       4096);
4187     assert(bo);
4188     pp_context->curbe.bo = bo;
4189 
4190     dri_bo_unreference(pp_context->idrt.bo);
4191     bo = dri_bo_alloc(i965->intel.bufmgr,
4192                       "interface discriptor",
4193                       sizeof(struct i965_interface_descriptor),
4194                       4096);
4195     assert(bo);
4196     pp_context->idrt.bo = bo;
4197     pp_context->idrt.num_interface_descriptors = 0;
4198 
4199     dri_bo_unreference(pp_context->sampler_state_table.bo);
4200     bo = dri_bo_alloc(i965->intel.bufmgr,
4201                       "sampler state table",
4202                       4096,
4203                       4096);
4204     assert(bo);
4205     dri_bo_map(bo, True);
4206     memset(bo->virtual, 0, bo->size);
4207     dri_bo_unmap(bo);
4208     pp_context->sampler_state_table.bo = bo;
4209 
4210     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4211     bo = dri_bo_alloc(i965->intel.bufmgr,
4212                       "sampler 8x8 state ",
4213                       4096,
4214                       4096);
4215     assert(bo);
4216     pp_context->sampler_state_table.bo_8x8 = bo;
4217 
4218     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4219     bo = dri_bo_alloc(i965->intel.bufmgr,
4220                       "sampler 8x8 state ",
4221                       4096,
4222                       4096);
4223     assert(bo);
4224     pp_context->sampler_state_table.bo_8x8_uv = bo;
4225 
4226     dri_bo_unreference(pp_context->vfe_state.bo);
4227     bo = dri_bo_alloc(i965->intel.bufmgr,
4228                       "vfe state",
4229                       sizeof(struct i965_vfe_state),
4230                       4096);
4231     assert(bo);
4232     pp_context->vfe_state.bo = bo;
4233 
4234     static_param_size = sizeof(struct pp_static_parameter);
4235     inline_param_size = sizeof(struct pp_inline_parameter);
4236 
4237     memset(pp_context->pp_static_parameter, 0, static_param_size);
4238     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4239 
4240     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4241     pp_context->current_pp = pp_index;
4242     pp_module = &pp_context->pp_modules[pp_index];
4243 
4244     if (pp_module->initialize)
4245         va_status = pp_module->initialize(ctx, pp_context,
4246                                           src_surface,
4247                                           src_rect,
4248                                           dst_surface,
4249                                           dst_rect,
4250                                           filter_param);
4251     else
4252         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4253 
4254     return va_status;
4255 }
4256 
4257 static VAStatus
ironlake_post_processing(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,int pp_index,void * filter_param)4258 ironlake_post_processing(
4259     VADriverContextP   ctx,
4260     struct i965_post_processing_context *pp_context,
4261     const struct i965_surface *src_surface,
4262     const VARectangle *src_rect,
4263     struct i965_surface *dst_surface,
4264     const VARectangle *dst_rect,
4265     int                pp_index,
4266     void *filter_param
4267 )
4268 {
4269     VAStatus va_status;
4270 
4271     va_status = ironlake_pp_initialize(ctx, pp_context,
4272                                        src_surface,
4273                                        src_rect,
4274                                        dst_surface,
4275                                        dst_rect,
4276                                        pp_index,
4277                                        filter_param);
4278 
4279     if (va_status == VA_STATUS_SUCCESS) {
4280         ironlake_pp_states_setup(ctx, pp_context);
4281         ironlake_pp_pipeline_setup(ctx, pp_context);
4282     }
4283 
4284     return va_status;
4285 }
4286 
4287 static VAStatus
gen6_pp_initialize(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,int pp_index,void * filter_param)4288 gen6_pp_initialize(
4289     VADriverContextP ctx,
4290     struct i965_post_processing_context *pp_context,
4291     const struct i965_surface *src_surface,
4292     const VARectangle *src_rect,
4293     struct i965_surface *dst_surface,
4294     const VARectangle *dst_rect,
4295     int pp_index,
4296     void *filter_param
4297 )
4298 {
4299     VAStatus va_status;
4300     struct i965_driver_data *i965 = i965_driver_data(ctx);
4301     struct pp_module *pp_module;
4302     dri_bo *bo;
4303     int static_param_size, inline_param_size;
4304 
4305     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4306     bo = dri_bo_alloc(i965->intel.bufmgr,
4307                       "surface state & binding table",
4308                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4309                       4096);
4310     assert(bo);
4311     pp_context->surface_state_binding_table.bo = bo;
4312 
4313     dri_bo_unreference(pp_context->curbe.bo);
4314     bo = dri_bo_alloc(i965->intel.bufmgr,
4315                       "constant buffer",
4316                       4096,
4317                       4096);
4318     assert(bo);
4319     pp_context->curbe.bo = bo;
4320 
4321     dri_bo_unreference(pp_context->idrt.bo);
4322     bo = dri_bo_alloc(i965->intel.bufmgr,
4323                       "interface discriptor",
4324                       sizeof(struct gen6_interface_descriptor_data),
4325                       4096);
4326     assert(bo);
4327     pp_context->idrt.bo = bo;
4328     pp_context->idrt.num_interface_descriptors = 0;
4329 
4330     dri_bo_unreference(pp_context->sampler_state_table.bo);
4331     bo = dri_bo_alloc(i965->intel.bufmgr,
4332                       "sampler state table",
4333                       4096,
4334                       4096);
4335     assert(bo);
4336     dri_bo_map(bo, True);
4337     memset(bo->virtual, 0, bo->size);
4338     dri_bo_unmap(bo);
4339     pp_context->sampler_state_table.bo = bo;
4340 
4341     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4342     bo = dri_bo_alloc(i965->intel.bufmgr,
4343                       "sampler 8x8 state ",
4344                       4096,
4345                       4096);
4346     assert(bo);
4347     pp_context->sampler_state_table.bo_8x8 = bo;
4348 
4349     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4350     bo = dri_bo_alloc(i965->intel.bufmgr,
4351                       "sampler 8x8 state ",
4352                       4096,
4353                       4096);
4354     assert(bo);
4355     pp_context->sampler_state_table.bo_8x8_uv = bo;
4356 
4357     dri_bo_unreference(pp_context->vfe_state.bo);
4358     bo = dri_bo_alloc(i965->intel.bufmgr,
4359                       "vfe state",
4360                       sizeof(struct i965_vfe_state),
4361                       4096);
4362     assert(bo);
4363     pp_context->vfe_state.bo = bo;
4364 
4365     if (IS_GEN7(i965->intel.device_info)) {
4366         static_param_size = sizeof(struct gen7_pp_static_parameter);
4367         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4368     } else {
4369         static_param_size = sizeof(struct pp_static_parameter);
4370         inline_param_size = sizeof(struct pp_inline_parameter);
4371     }
4372 
4373     memset(pp_context->pp_static_parameter, 0, static_param_size);
4374     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4375 
4376     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4377     pp_context->current_pp = pp_index;
4378     pp_module = &pp_context->pp_modules[pp_index];
4379 
4380     if (pp_module->initialize)
4381         va_status = pp_module->initialize(ctx, pp_context,
4382                                           src_surface,
4383                                           src_rect,
4384                                           dst_surface,
4385                                           dst_rect,
4386                                           filter_param);
4387     else
4388         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4389 
4390     calculate_boundary_block_mask(pp_context, dst_rect);
4391 
4392     return va_status;
4393 }
4394 
4395 
4396 static void
gen6_pp_interface_descriptor_table(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4397 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4398                                    struct i965_post_processing_context *pp_context)
4399 {
4400     struct i965_driver_data *i965 = i965_driver_data(ctx);
4401     struct gen6_interface_descriptor_data *desc;
4402     dri_bo *bo;
4403     int pp_index = pp_context->current_pp;
4404 
4405     bo = pp_context->idrt.bo;
4406     dri_bo_map(bo, True);
4407     assert(bo->virtual);
4408     desc = bo->virtual;
4409     memset(desc, 0, sizeof(*desc));
4410     desc->desc0.kernel_start_pointer =
4411         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4412     desc->desc1.single_program_flow = 1;
4413     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4414     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4415     desc->desc2.sampler_state_pointer =
4416         pp_context->sampler_state_table.bo->offset >> 5;
4417     desc->desc3.binding_table_entry_count = 0;
4418     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4419     desc->desc4.constant_urb_entry_read_offset = 0;
4420 
4421     if (IS_GEN7(i965->intel.device_info))
4422         desc->desc4.constant_urb_entry_read_length = 8; /* grf 1-8 */
4423     else
4424         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4425 
4426     dri_bo_emit_reloc(bo,
4427                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4428                       0,
4429                       offsetof(struct gen6_interface_descriptor_data, desc0),
4430                       pp_context->pp_modules[pp_index].kernel.bo);
4431 
4432     dri_bo_emit_reloc(bo,
4433                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4434                       desc->desc2.sampler_count << 2,
4435                       offsetof(struct gen6_interface_descriptor_data, desc2),
4436                       pp_context->sampler_state_table.bo);
4437 
4438     dri_bo_unmap(bo);
4439     pp_context->idrt.num_interface_descriptors++;
4440 }
4441 
4442 static void
gen6_pp_upload_constants(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4443 gen6_pp_upload_constants(VADriverContextP ctx,
4444                          struct i965_post_processing_context *pp_context)
4445 {
4446     struct i965_driver_data *i965 = i965_driver_data(ctx);
4447     unsigned char *constant_buffer;
4448     int param_size;
4449 
4450     assert(sizeof(struct pp_static_parameter) == 128);
4451     assert(sizeof(struct gen7_pp_static_parameter) == 256);
4452 
4453     if (IS_GEN7(i965->intel.device_info))
4454         param_size = sizeof(struct gen7_pp_static_parameter);
4455     else
4456         param_size = sizeof(struct pp_static_parameter);
4457 
4458     dri_bo_map(pp_context->curbe.bo, 1);
4459     assert(pp_context->curbe.bo->virtual);
4460     constant_buffer = pp_context->curbe.bo->virtual;
4461     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4462     dri_bo_unmap(pp_context->curbe.bo);
4463 }
4464 
4465 static void
gen6_pp_states_setup(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4466 gen6_pp_states_setup(VADriverContextP ctx,
4467                      struct i965_post_processing_context *pp_context)
4468 {
4469     gen6_pp_interface_descriptor_table(ctx, pp_context);
4470     gen6_pp_upload_constants(ctx, pp_context);
4471 }
4472 
4473 static void
gen6_pp_pipeline_select(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4474 gen6_pp_pipeline_select(VADriverContextP ctx,
4475                         struct i965_post_processing_context *pp_context)
4476 {
4477     struct intel_batchbuffer *batch = pp_context->batch;
4478 
4479     BEGIN_BATCH(batch, 1);
4480     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4481     ADVANCE_BATCH(batch);
4482 }
4483 
4484 static void
gen6_pp_state_base_address(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4485 gen6_pp_state_base_address(VADriverContextP ctx,
4486                            struct i965_post_processing_context *pp_context)
4487 {
4488     struct intel_batchbuffer *batch = pp_context->batch;
4489 
4490     BEGIN_BATCH(batch, 10);
4491     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4492     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4493     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4494     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4495     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4496     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4497     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4498     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4499     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4500     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4501     ADVANCE_BATCH(batch);
4502 }
4503 
4504 static void
gen6_pp_vfe_state(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4505 gen6_pp_vfe_state(VADriverContextP ctx,
4506                   struct i965_post_processing_context *pp_context)
4507 {
4508     struct intel_batchbuffer *batch = pp_context->batch;
4509 
4510     BEGIN_BATCH(batch, 8);
4511     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4512     OUT_BATCH(batch, 0);
4513     OUT_BATCH(batch,
4514               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4515               pp_context->vfe_gpu_state.num_urb_entries << 8);
4516     OUT_BATCH(batch, 0);
4517     OUT_BATCH(batch,
4518               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |
4519               /* URB Entry Allocation Size, in 256 bits unit */
4520               (pp_context->vfe_gpu_state.curbe_allocation_size));
4521     /* CURBE Allocation Size, in 256 bits unit */
4522     OUT_BATCH(batch, 0);
4523     OUT_BATCH(batch, 0);
4524     OUT_BATCH(batch, 0);
4525     ADVANCE_BATCH(batch);
4526 }
4527 
4528 static void
gen6_pp_curbe_load(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4529 gen6_pp_curbe_load(VADriverContextP ctx,
4530                    struct i965_post_processing_context *pp_context)
4531 {
4532     struct intel_batchbuffer *batch = pp_context->batch;
4533     struct i965_driver_data *i965 = i965_driver_data(ctx);
4534     int param_size;
4535 
4536     if (IS_GEN7(i965->intel.device_info))
4537         param_size = sizeof(struct gen7_pp_static_parameter);
4538     else
4539         param_size = sizeof(struct pp_static_parameter);
4540 
4541     BEGIN_BATCH(batch, 4);
4542     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4543     OUT_BATCH(batch, 0);
4544     OUT_BATCH(batch,
4545               param_size);
4546     OUT_RELOC(batch,
4547               pp_context->curbe.bo,
4548               I915_GEM_DOMAIN_INSTRUCTION, 0,
4549               0);
4550     ADVANCE_BATCH(batch);
4551 }
4552 
4553 static void
gen6_interface_descriptor_load(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4554 gen6_interface_descriptor_load(VADriverContextP ctx,
4555                                struct i965_post_processing_context *pp_context)
4556 {
4557     struct intel_batchbuffer *batch = pp_context->batch;
4558 
4559     BEGIN_BATCH(batch, 4);
4560     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4561     OUT_BATCH(batch, 0);
4562     OUT_BATCH(batch,
4563               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4564     OUT_RELOC(batch,
4565               pp_context->idrt.bo,
4566               I915_GEM_DOMAIN_INSTRUCTION, 0,
4567               0);
4568     ADVANCE_BATCH(batch);
4569 }
4570 
update_block_mask_parameter(struct i965_post_processing_context * pp_context,int x,int y,int x_steps,int y_steps)4571 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps)
4572 {
4573     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4574 
4575     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4576     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4577     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4578     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4579     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4580     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4581 
4582     /* 1 x N */
4583     if (x_steps == 1) {
4584         if (y == y_steps - 1) {
4585             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4586         } else {
4587             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4588         }
4589     }
4590 
4591     /* M x 1 */
4592     if (y_steps == 1) {
4593         if (x == 0) { // all blocks in this group are on the left edge
4594             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4595             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
4596         } else if (x == x_steps - 1) {
4597             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4598             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4599         } else {
4600             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4601             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4602             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4603         }
4604     }
4605 
4606 }
4607 
4608 static void
gen6_pp_object_walker(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4609 gen6_pp_object_walker(VADriverContextP ctx,
4610                       struct i965_post_processing_context *pp_context)
4611 {
4612     struct i965_driver_data *i965 = i965_driver_data(ctx);
4613     struct intel_batchbuffer *batch = pp_context->batch;
4614     int x, x_steps, y, y_steps;
4615     int param_size, command_length_in_dws;
4616     dri_bo *command_buffer;
4617     unsigned int *command_ptr;
4618 
4619     if (IS_GEN7(i965->intel.device_info))
4620         param_size = sizeof(struct gen7_pp_inline_parameter);
4621     else
4622         param_size = sizeof(struct pp_inline_parameter);
4623 
4624     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4625     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4626     command_length_in_dws = 6 + (param_size >> 2);
4627     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4628                                   "command objects buffer",
4629                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4630                                   4096);
4631 
4632     dri_bo_map(command_buffer, 1);
4633     command_ptr = command_buffer->virtual;
4634 
4635     for (y = 0; y < y_steps; y++) {
4636         for (x = 0; x < x_steps; x++) {
4637             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4638                 // some common block parameter update goes here, apply to all pp functions
4639                 if (IS_GEN6(i965->intel.device_info))
4640                     update_block_mask_parameter(pp_context, x, y, x_steps, y_steps);
4641 
4642                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4643                 *command_ptr++ = 0;
4644                 *command_ptr++ = 0;
4645                 *command_ptr++ = 0;
4646                 *command_ptr++ = 0;
4647                 *command_ptr++ = 0;
4648                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4649                 command_ptr += (param_size >> 2);
4650             }
4651         }
4652     }
4653 
4654     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4655         *command_ptr++ = 0;
4656 
4657     *command_ptr = MI_BATCH_BUFFER_END;
4658 
4659     dri_bo_unmap(command_buffer);
4660 
4661     BEGIN_BATCH(batch, 2);
4662     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4663     OUT_RELOC(batch, command_buffer,
4664               I915_GEM_DOMAIN_COMMAND, 0,
4665               0);
4666     ADVANCE_BATCH(batch);
4667 
4668     dri_bo_unreference(command_buffer);
4669 
4670     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4671      * will cause control to pass back to ring buffer
4672      */
4673     intel_batchbuffer_end_atomic(batch);
4674     intel_batchbuffer_flush(batch);
4675     intel_batchbuffer_start_atomic(batch, 0x1000);
4676 }
4677 
4678 static void
gen6_pp_pipeline_setup(VADriverContextP ctx,struct i965_post_processing_context * pp_context)4679 gen6_pp_pipeline_setup(VADriverContextP ctx,
4680                        struct i965_post_processing_context *pp_context)
4681 {
4682     struct intel_batchbuffer *batch = pp_context->batch;
4683 
4684     intel_batchbuffer_start_atomic(batch, 0x1000);
4685     intel_batchbuffer_emit_mi_flush(batch);
4686     gen6_pp_pipeline_select(ctx, pp_context);
4687     gen6_pp_state_base_address(ctx, pp_context);
4688     gen6_pp_vfe_state(ctx, pp_context);
4689     gen6_pp_curbe_load(ctx, pp_context);
4690     gen6_interface_descriptor_load(ctx, pp_context);
4691     gen6_pp_object_walker(ctx, pp_context);
4692     intel_batchbuffer_end_atomic(batch);
4693 }
4694 
4695 static VAStatus
gen6_post_processing(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,int pp_index,void * filter_param)4696 gen6_post_processing(
4697     VADriverContextP ctx,
4698     struct i965_post_processing_context *pp_context,
4699     const struct i965_surface *src_surface,
4700     const VARectangle *src_rect,
4701     struct i965_surface *dst_surface,
4702     const VARectangle *dst_rect,
4703     int pp_index,
4704     void *filter_param
4705 )
4706 {
4707     VAStatus va_status;
4708 
4709     va_status = gen6_pp_initialize(ctx, pp_context,
4710                                    src_surface,
4711                                    src_rect,
4712                                    dst_surface,
4713                                    dst_rect,
4714                                    pp_index,
4715                                    filter_param);
4716 
4717     if (va_status == VA_STATUS_SUCCESS) {
4718         gen6_pp_states_setup(ctx, pp_context);
4719         gen6_pp_pipeline_setup(ctx, pp_context);
4720     }
4721 
4722     if (va_status == VA_STATUS_SUCCESS_1)
4723         va_status = VA_STATUS_SUCCESS;
4724 
4725     return va_status;
4726 }
4727 
4728 static VAStatus
i965_post_processing_internal(VADriverContextP ctx,struct i965_post_processing_context * pp_context,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect,int pp_index,void * filter_param)4729 i965_post_processing_internal(
4730     VADriverContextP   ctx,
4731     struct i965_post_processing_context *pp_context,
4732     const struct i965_surface *src_surface,
4733     const VARectangle *src_rect,
4734     struct i965_surface *dst_surface,
4735     const VARectangle *dst_rect,
4736     int                pp_index,
4737     void *filter_param
4738 )
4739 {
4740     VAStatus va_status;
4741 
4742     if (pp_context && pp_context->intel_post_processing) {
4743         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4744                                                         src_surface, src_rect,
4745                                                         dst_surface, dst_rect,
4746                                                         pp_index, filter_param);
4747     } else {
4748         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4749     }
4750 
4751     return va_status;
4752 }
4753 
4754 static void
rgb_to_yuv(unsigned int argb,unsigned char * y,unsigned char * u,unsigned char * v,unsigned char * a)4755 rgb_to_yuv(unsigned int argb,
4756            unsigned char *y,
4757            unsigned char *u,
4758            unsigned char *v,
4759            unsigned char *a)
4760 {
4761     int r = ((argb >> 16) & 0xff);
4762     int g = ((argb >> 8) & 0xff);
4763     int b = ((argb >> 0) & 0xff);
4764 
4765     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4766     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4767     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4768     *a = ((argb >> 24) & 0xff);
4769 }
4770 
4771 static void
i965_vpp_clear_surface(VADriverContextP ctx,struct i965_post_processing_context * pp_context,struct object_surface * obj_surface,unsigned int color)4772 i965_vpp_clear_surface(VADriverContextP ctx,
4773                        struct i965_post_processing_context *pp_context,
4774                        struct object_surface *obj_surface,
4775                        unsigned int color)
4776 {
4777     struct i965_driver_data *i965 = i965_driver_data(ctx);
4778     struct intel_batchbuffer *batch = pp_context->batch;
4779     unsigned int blt_cmd, br13;
4780     unsigned int tiling = 0, swizzle = 0;
4781     int pitch;
4782     unsigned char y, u, v, a = 0;
4783     int region_width, region_height;
4784 
4785     /* Currently only support NV12 surface */
4786     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4787         return;
4788 
4789     rgb_to_yuv(color, &y, &u, &v, &a);
4790 
4791     if (a == 0)
4792         return;
4793 
4794     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4795     blt_cmd = XY_COLOR_BLT_CMD;
4796     pitch = obj_surface->width;
4797 
4798     if (tiling != I915_TILING_NONE) {
4799         assert(tiling == I915_TILING_Y);
4800         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4801         // pitch >>= 2;
4802     }
4803 
4804     br13 = 0xf0 << 16;
4805     br13 |= BR13_8;
4806     br13 |= pitch;
4807 
4808     if (IS_IRONLAKE(i965->intel.device_info)) {
4809         intel_batchbuffer_start_atomic(batch, 48);
4810         BEGIN_BATCH(batch, 12);
4811     } else {
4812         /* Will double-check the command if the new chipset is added */
4813         intel_batchbuffer_start_atomic_blt(batch, 48);
4814         BEGIN_BLT_BATCH(batch, 12);
4815     }
4816 
4817     region_width = obj_surface->width;
4818     region_height = obj_surface->height;
4819 
4820     OUT_BATCH(batch, blt_cmd);
4821     OUT_BATCH(batch, br13);
4822     OUT_BATCH(batch,
4823               0 << 16 |
4824               0);
4825     OUT_BATCH(batch,
4826               region_height << 16 |
4827               region_width);
4828     OUT_RELOC(batch, obj_surface->bo,
4829               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4830               0);
4831     OUT_BATCH(batch, y);
4832 
4833     br13 = 0xf0 << 16;
4834     br13 |= BR13_565;
4835     br13 |= pitch;
4836 
4837     region_width = obj_surface->width / 2;
4838     region_height = obj_surface->height / 2;
4839 
4840     if (tiling == I915_TILING_Y) {
4841         region_height = ALIGN(obj_surface->height / 2, 32);
4842     }
4843 
4844     OUT_BATCH(batch, blt_cmd);
4845     OUT_BATCH(batch, br13);
4846     OUT_BATCH(batch,
4847               0 << 16 |
4848               0);
4849     OUT_BATCH(batch,
4850               region_height << 16 |
4851               region_width);
4852     OUT_RELOC(batch, obj_surface->bo,
4853               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4854               obj_surface->width * obj_surface->y_cb_offset);
4855     OUT_BATCH(batch, v << 8 | u);
4856 
4857     ADVANCE_BATCH(batch);
4858     intel_batchbuffer_end_atomic(batch);
4859 }
4860 
4861 VAStatus
i965_scaling_processing(VADriverContextP ctx,struct object_surface * src_surface_obj,const VARectangle * src_rect,struct object_surface * dst_surface_obj,const VARectangle * dst_rect,unsigned int va_flags)4862 i965_scaling_processing(
4863     VADriverContextP   ctx,
4864     struct object_surface *src_surface_obj,
4865     const VARectangle *src_rect,
4866     struct object_surface *dst_surface_obj,
4867     const VARectangle *dst_rect,
4868     unsigned int       va_flags)
4869 {
4870     VAStatus va_status = VA_STATUS_SUCCESS;
4871     struct i965_driver_data *i965 = i965_driver_data(ctx);
4872 
4873     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4874     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4875 
4876     if (HAS_VPP(i965)) {
4877         struct i965_surface src_surface;
4878         struct i965_surface dst_surface;
4879         struct i965_post_processing_context *pp_context;
4880         unsigned int filter_flags;
4881 
4882         _i965LockMutex(&i965->pp_mutex);
4883 
4884         src_surface.base = (struct object_base *)src_surface_obj;
4885         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4886         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4887         dst_surface.base = (struct object_base *)dst_surface_obj;
4888         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4889         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4890 
4891         pp_context = i965->pp_context;
4892         filter_flags = pp_context->filter_flags;
4893         pp_context->filter_flags = va_flags;
4894 
4895         va_status = i965_post_processing_internal(ctx, pp_context,
4896                                                   &src_surface, src_rect, &dst_surface, dst_rect,
4897                                                   avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
4898 
4899         pp_context->filter_flags = filter_flags;
4900 
4901         _i965UnlockMutex(&i965->pp_mutex);
4902     }
4903 
4904     return va_status;
4905 }
4906 
4907 VASurfaceID
i965_post_processing(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int va_flags,int * has_done_scaling,VARectangle * calibrated_rect)4908 i965_post_processing(
4909     VADriverContextP   ctx,
4910     struct object_surface *obj_surface,
4911     const VARectangle *src_rect,
4912     const VARectangle *dst_rect,
4913     unsigned int       va_flags,
4914     int               *has_done_scaling,
4915     VARectangle *calibrated_rect
4916 )
4917 {
4918     struct i965_driver_data *i965 = i965_driver_data(ctx);
4919     VASurfaceID out_surface_id = VA_INVALID_ID;
4920 
4921     *has_done_scaling = 0;
4922 
4923     if (HAS_VPP(i965)) {
4924         VAStatus status;
4925         struct i965_surface src_surface;
4926         struct i965_surface dst_surface;
4927         struct i965_post_processing_context *pp_context;
4928 
4929         /* Currently only support post processing for NV12 surface */
4930         if (obj_surface->fourcc != VA_FOURCC_NV12)
4931             return out_surface_id;
4932 
4933         _i965LockMutex(&i965->pp_mutex);
4934 
4935         pp_context = i965->pp_context;
4936         pp_context->filter_flags = va_flags;
4937         if (avs_is_needed(va_flags)) {
4938             VARectangle tmp_dst_rect;
4939 
4940             tmp_dst_rect.x = 0;
4941             tmp_dst_rect.y = 0;
4942             tmp_dst_rect.width = dst_rect->width;
4943             tmp_dst_rect.height = dst_rect->height;
4944             src_surface.base = (struct object_base *)obj_surface;
4945             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4946             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4947 
4948             status = i965_CreateSurfaces(ctx,
4949                                          dst_rect->width,
4950                                          dst_rect->height,
4951                                          VA_RT_FORMAT_YUV420,
4952                                          1,
4953                                          &out_surface_id);
4954             assert(status == VA_STATUS_SUCCESS);
4955             obj_surface = SURFACE(out_surface_id);
4956             assert(obj_surface);
4957             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4958             i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
4959 
4960             dst_surface.base = (struct object_base *)obj_surface;
4961             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4962             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4963 
4964             i965_post_processing_internal(ctx, pp_context,
4965                                           &src_surface,
4966                                           src_rect,
4967                                           &dst_surface,
4968                                           &tmp_dst_rect,
4969                                           PP_NV12_AVS,
4970                                           NULL);
4971 
4972             *has_done_scaling = 1;
4973             calibrated_rect->x = 0;
4974             calibrated_rect->y = 0;
4975             calibrated_rect->width = dst_rect->width;
4976             calibrated_rect->height = dst_rect->height;
4977         }
4978 
4979         _i965UnlockMutex(&i965->pp_mutex);
4980     }
4981 
4982     return out_surface_id;
4983 }
4984 
4985 static VAStatus
4986 i965_image_pl2_processing(VADriverContextP ctx,
4987                           const struct i965_surface *src_surface,
4988                           const VARectangle *src_rect,
4989                           struct i965_surface *dst_surface,
4990                           const VARectangle *dst_rect);
4991 
4992 static VAStatus
i965_image_plx_nv12_plx_processing(VADriverContextP ctx,VAStatus (* i965_image_plx_nv12_processing)(VADriverContextP,const struct i965_surface *,const VARectangle *,struct i965_surface *,const VARectangle *),const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)4993 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
4994                                    VAStatus(*i965_image_plx_nv12_processing)(
4995                                        VADriverContextP,
4996                                        const struct i965_surface *,
4997                                        const VARectangle *,
4998                                        struct i965_surface *,
4999                                        const VARectangle *),
5000                                    const struct i965_surface *src_surface,
5001                                    const VARectangle *src_rect,
5002                                    struct i965_surface *dst_surface,
5003                                    const VARectangle *dst_rect)
5004 {
5005     struct i965_driver_data *i965 = i965_driver_data(ctx);
5006     VAStatus status;
5007     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5008     struct object_surface *obj_surface = NULL;
5009     struct i965_surface tmp_surface;
5010     int width, height;
5011 
5012     pp_get_surface_size(ctx, dst_surface, &width, &height);
5013     status = i965_CreateSurfaces(ctx,
5014                                  width,
5015                                  height,
5016                                  VA_RT_FORMAT_YUV420,
5017                                  1,
5018                                  &tmp_surface_id);
5019     assert(status == VA_STATUS_SUCCESS);
5020     obj_surface = SURFACE(tmp_surface_id);
5021     assert(obj_surface);
5022     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5023 
5024     tmp_surface.base = (struct object_base *)obj_surface;
5025     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5026     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5027 
5028     status = i965_image_plx_nv12_processing(ctx,
5029                                             src_surface,
5030                                             src_rect,
5031                                             &tmp_surface,
5032                                             dst_rect);
5033 
5034     if (status == VA_STATUS_SUCCESS)
5035         status = i965_image_pl2_processing(ctx,
5036                                            &tmp_surface,
5037                                            dst_rect,
5038                                            dst_surface,
5039                                            dst_rect);
5040 
5041     i965_DestroySurfaces(ctx,
5042                          &tmp_surface_id,
5043                          1);
5044 
5045     return status;
5046 }
5047 
5048 
5049 static VAStatus
i965_image_pl1_rgbx_processing(VADriverContextP ctx,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)5050 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5051                                const struct i965_surface *src_surface,
5052                                const VARectangle *src_rect,
5053                                struct i965_surface *dst_surface,
5054                                const VARectangle *dst_rect)
5055 {
5056     struct i965_driver_data *i965 = i965_driver_data(ctx);
5057     struct i965_post_processing_context *pp_context = i965->pp_context;
5058     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5059     VAStatus vaStatus;
5060 
5061     vaStatus = intel_common_scaling_post_processing(ctx,
5062                                                     pp_context,
5063                                                     src_surface,
5064                                                     src_rect,
5065                                                     dst_surface,
5066                                                     dst_rect);
5067 
5068     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5069         return vaStatus;
5070 
5071     switch (fourcc) {
5072     case VA_FOURCC_NV12:
5073         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5074                                                  src_surface,
5075                                                  src_rect,
5076                                                  dst_surface,
5077                                                  dst_rect,
5078                                                  PP_RGBX_LOAD_SAVE_NV12,
5079                                                  NULL);
5080         intel_batchbuffer_flush(pp_context->batch);
5081         break;
5082 
5083     default:
5084         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5085                                                       i965_image_pl1_rgbx_processing,
5086                                                       src_surface,
5087                                                       src_rect,
5088                                                       dst_surface,
5089                                                       dst_rect);
5090         break;
5091     }
5092 
5093     return vaStatus;
5094 }
5095 
5096 static VAStatus
i965_image_pl3_processing(VADriverContextP ctx,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)5097 i965_image_pl3_processing(VADriverContextP ctx,
5098                           const struct i965_surface *src_surface,
5099                           const VARectangle *src_rect,
5100                           struct i965_surface *dst_surface,
5101                           const VARectangle *dst_rect)
5102 {
5103     struct i965_driver_data *i965 = i965_driver_data(ctx);
5104     struct i965_post_processing_context *pp_context = i965->pp_context;
5105     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5106     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5107 
5108     vaStatus = intel_common_scaling_post_processing(ctx,
5109                                                     pp_context,
5110                                                     src_surface,
5111                                                     src_rect,
5112                                                     dst_surface,
5113                                                     dst_rect);
5114 
5115     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5116         return vaStatus;
5117 
5118     switch (fourcc) {
5119     case VA_FOURCC_NV12:
5120         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5121                                                  src_surface,
5122                                                  src_rect,
5123                                                  dst_surface,
5124                                                  dst_rect,
5125                                                  PP_PL3_LOAD_SAVE_N12,
5126                                                  NULL);
5127         intel_batchbuffer_flush(pp_context->batch);
5128         break;
5129 
5130     case VA_FOURCC_IMC1:
5131     case VA_FOURCC_IMC3:
5132     case VA_FOURCC_YV12:
5133     case VA_FOURCC_I420:
5134         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5135                                                  src_surface,
5136                                                  src_rect,
5137                                                  dst_surface,
5138                                                  dst_rect,
5139                                                  PP_PL3_LOAD_SAVE_PL3,
5140                                                  NULL);
5141         intel_batchbuffer_flush(pp_context->batch);
5142         break;
5143 
5144     case VA_FOURCC_YUY2:
5145     case VA_FOURCC_UYVY:
5146         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5147                                                  src_surface,
5148                                                  src_rect,
5149                                                  dst_surface,
5150                                                  dst_rect,
5151                                                  PP_PL3_LOAD_SAVE_PA,
5152                                                  NULL);
5153         intel_batchbuffer_flush(pp_context->batch);
5154         break;
5155 
5156     default:
5157         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5158                                                       i965_image_pl3_processing,
5159                                                       src_surface,
5160                                                       src_rect,
5161                                                       dst_surface,
5162                                                       dst_rect);
5163         break;
5164     }
5165 
5166     return vaStatus;
5167 }
5168 
5169 static VAStatus
i965_image_pl2_processing(VADriverContextP ctx,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)5170 i965_image_pl2_processing(VADriverContextP ctx,
5171                           const struct i965_surface *src_surface,
5172                           const VARectangle *src_rect,
5173                           struct i965_surface *dst_surface,
5174                           const VARectangle *dst_rect)
5175 {
5176     struct i965_driver_data *i965 = i965_driver_data(ctx);
5177     struct i965_post_processing_context *pp_context = i965->pp_context;
5178     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5179     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5180 
5181     vaStatus = intel_common_scaling_post_processing(ctx,
5182                                                     pp_context,
5183                                                     src_surface,
5184                                                     src_rect,
5185                                                     dst_surface,
5186                                                     dst_rect);
5187 
5188     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5189         return vaStatus;
5190 
5191     switch (fourcc) {
5192     case VA_FOURCC_NV12:
5193         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5194                                                  src_surface,
5195                                                  src_rect,
5196                                                  dst_surface,
5197                                                  dst_rect,
5198                                                  PP_NV12_LOAD_SAVE_N12,
5199                                                  NULL);
5200         break;
5201 
5202     case VA_FOURCC_IMC1:
5203     case VA_FOURCC_IMC3:
5204     case VA_FOURCC_YV12:
5205     case VA_FOURCC_I420:
5206         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5207                                                  src_surface,
5208                                                  src_rect,
5209                                                  dst_surface,
5210                                                  dst_rect,
5211                                                  PP_NV12_LOAD_SAVE_PL3,
5212                                                  NULL);
5213         break;
5214 
5215     case VA_FOURCC_YUY2:
5216     case VA_FOURCC_UYVY:
5217         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5218                                                  src_surface,
5219                                                  src_rect,
5220                                                  dst_surface,
5221                                                  dst_rect,
5222                                                  PP_NV12_LOAD_SAVE_PA,
5223                                                  NULL);
5224         break;
5225 
5226     case VA_FOURCC_BGRX:
5227     case VA_FOURCC_BGRA:
5228     case VA_FOURCC_RGBX:
5229     case VA_FOURCC_RGBA:
5230         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5231                                                  src_surface,
5232                                                  src_rect,
5233                                                  dst_surface,
5234                                                  dst_rect,
5235                                                  PP_NV12_LOAD_SAVE_RGBX,
5236                                                  NULL);
5237         break;
5238 
5239     default:
5240         return VA_STATUS_ERROR_UNIMPLEMENTED;
5241     }
5242 
5243     intel_batchbuffer_flush(pp_context->batch);
5244 
5245     return vaStatus;
5246 }
5247 
5248 static VAStatus
i965_image_pl1_processing(VADriverContextP ctx,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)5249 i965_image_pl1_processing(VADriverContextP ctx,
5250                           const struct i965_surface *src_surface,
5251                           const VARectangle *src_rect,
5252                           struct i965_surface *dst_surface,
5253                           const VARectangle *dst_rect)
5254 {
5255     struct i965_driver_data *i965 = i965_driver_data(ctx);
5256     struct i965_post_processing_context *pp_context = i965->pp_context;
5257     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5258     VAStatus vaStatus;
5259 
5260     vaStatus = intel_common_scaling_post_processing(ctx,
5261                                                     pp_context,
5262                                                     src_surface,
5263                                                     src_rect,
5264                                                     dst_surface,
5265                                                     dst_rect);
5266 
5267     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5268         return vaStatus;
5269 
5270     switch (fourcc) {
5271     case VA_FOURCC_NV12:
5272         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5273                                                  src_surface,
5274                                                  src_rect,
5275                                                  dst_surface,
5276                                                  dst_rect,
5277                                                  PP_PA_LOAD_SAVE_NV12,
5278                                                  NULL);
5279         intel_batchbuffer_flush(pp_context->batch);
5280         break;
5281 
5282     case VA_FOURCC_YV12:
5283         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5284                                                  src_surface,
5285                                                  src_rect,
5286                                                  dst_surface,
5287                                                  dst_rect,
5288                                                  PP_PA_LOAD_SAVE_PL3,
5289                                                  NULL);
5290         intel_batchbuffer_flush(pp_context->batch);
5291         break;
5292 
5293     case VA_FOURCC_YUY2:
5294     case VA_FOURCC_UYVY:
5295         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5296                                                  src_surface,
5297                                                  src_rect,
5298                                                  dst_surface,
5299                                                  dst_rect,
5300                                                  PP_PA_LOAD_SAVE_PA,
5301                                                  NULL);
5302         intel_batchbuffer_flush(pp_context->batch);
5303         break;
5304 
5305     default:
5306         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5307                                                       i965_image_pl1_processing,
5308                                                       src_surface,
5309                                                       src_rect,
5310                                                       dst_surface,
5311                                                       dst_rect);
5312         break;
5313     }
5314 
5315     return vaStatus;
5316 }
5317 
5318 // it only support NV12 and P010 for vebox proc ctx
derive_surface(VADriverContextP ctx,struct object_image * obj_image,struct object_surface * obj_surface)5319 static struct object_surface *derive_surface(VADriverContextP ctx,
5320                                              struct object_image *obj_image,
5321                                              struct object_surface *obj_surface)
5322 {
5323     VAImage * const image = &obj_image->image;
5324 
5325     memset((void *)obj_surface, 0, sizeof(*obj_surface));
5326     obj_surface->fourcc = image->format.fourcc;
5327     obj_surface->orig_width = image->width;
5328     obj_surface->orig_height = image->height;
5329     obj_surface->width = image->pitches[0];
5330     obj_surface->height = image->height;
5331     obj_surface->y_cb_offset = image->offsets[1] / obj_surface->width;
5332     obj_surface->y_cr_offset = obj_surface->y_cb_offset;
5333     obj_surface->bo = obj_image->bo;
5334     obj_surface->subsampling = SUBSAMPLE_YUV420;
5335 
5336     return obj_surface;
5337 }
5338 
5339 static VAStatus
vebox_processing_simple(VADriverContextP ctx,struct i965_post_processing_context * pp_context,struct object_surface * src_obj_surface,struct object_surface * dst_obj_surface,const VARectangle * rect)5340 vebox_processing_simple(VADriverContextP ctx,
5341                         struct i965_post_processing_context *pp_context,
5342                         struct object_surface *src_obj_surface,
5343                         struct object_surface *dst_obj_surface,
5344                         const VARectangle *rect)
5345 {
5346     struct i965_driver_data *i965 = i965_driver_data(ctx);
5347     VAProcPipelineParameterBuffer pipeline_param;
5348     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5349 
5350     if (pp_context->vebox_proc_ctx == NULL) {
5351         pp_context->vebox_proc_ctx = gen75_vebox_context_init(ctx);
5352     }
5353 
5354     memset((void *)&pipeline_param, 0, sizeof(pipeline_param));
5355     pipeline_param.surface_region = rect;
5356     pipeline_param.output_region = rect;
5357     pipeline_param.filter_flags = 0;
5358     pipeline_param.num_filters  = 0;
5359 
5360     pp_context->vebox_proc_ctx->pipeline_param = &pipeline_param;
5361     pp_context->vebox_proc_ctx->surface_input_object = src_obj_surface;
5362     pp_context->vebox_proc_ctx->surface_output_object = dst_obj_surface;
5363 
5364     if (IS_GEN9(i965->intel.device_info))
5365         status = gen9_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5366     else if (IS_GEN10(i965->intel.device_info))
5367         status = gen10_vebox_process_picture(ctx, pp_context->vebox_proc_ctx);
5368 
5369     return status;
5370 }
5371 
5372 static VAStatus
i965_image_p010_processing(VADriverContextP ctx,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)5373 i965_image_p010_processing(VADriverContextP ctx,
5374                            const struct i965_surface *src_surface,
5375                            const VARectangle *src_rect,
5376                            struct i965_surface *dst_surface,
5377                            const VARectangle *dst_rect)
5378 {
5379 #define HAS_VPP_P010(ctx)        ((ctx)->codec_info->has_vpp_p010 && \
5380                                      (ctx)->intel.has_bsd)
5381 
5382     struct i965_driver_data *i965 = i965_driver_data(ctx);
5383     struct i965_post_processing_context *pp_context = i965->pp_context;
5384     struct object_surface *src_obj_surface = NULL, *dst_obj_surface = NULL;
5385     struct object_surface tmp_src_obj_surface, tmp_dst_obj_surface;
5386     struct object_surface *tmp_surface = NULL;
5387     VASurfaceID tmp_surface_id[3], out_surface_id = VA_INVALID_ID;
5388     int num_tmp_surfaces = 0;
5389     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5390     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5391     int vpp_post = 0;
5392 
5393     vaStatus = intel_common_scaling_post_processing(ctx,
5394                                                     pp_context,
5395                                                     src_surface,
5396                                                     src_rect,
5397                                                     dst_surface,
5398                                                     dst_rect);
5399 
5400     if (vaStatus != VA_STATUS_ERROR_UNIMPLEMENTED)
5401         return vaStatus;
5402 
5403     if (HAS_VPP_P010(i965)) {
5404         vpp_post = 0;
5405         switch (fourcc) {
5406         case VA_FOURCC_NV12:
5407             if (src_rect->x != dst_rect->x ||
5408                 src_rect->y != dst_rect->y ||
5409                 src_rect->width != dst_rect->width ||
5410                 src_rect->height != dst_rect->height) {
5411                 vpp_post = 1;
5412             }
5413             break;
5414         case VA_FOURCC_P010:
5415             // don't support scaling while the fourcc of dst_surface is P010
5416             if (src_rect->x != dst_rect->x ||
5417                 src_rect->y != dst_rect->y ||
5418                 src_rect->width != dst_rect->width ||
5419                 src_rect->height != dst_rect->height) {
5420                 vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5421                 goto EXIT;
5422             }
5423             break;
5424         default:
5425             vpp_post = 1;
5426             break;
5427         }
5428 
5429         if (src_surface->type == I965_SURFACE_TYPE_IMAGE) {
5430             src_obj_surface = derive_surface(ctx, (struct object_image *)src_surface->base,
5431                                              &tmp_src_obj_surface);
5432         } else
5433             src_obj_surface = (struct object_surface *)src_surface->base;
5434 
5435         if (src_obj_surface == NULL) {
5436             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5437             goto EXIT;
5438         }
5439 
5440         if (vpp_post == 1) {
5441             vaStatus = i965_CreateSurfaces(ctx,
5442                                            src_obj_surface->orig_width,
5443                                            src_obj_surface->orig_height,
5444                                            VA_RT_FORMAT_YUV420,
5445                                            1,
5446                                            &out_surface_id);
5447             assert(vaStatus == VA_STATUS_SUCCESS);
5448             tmp_surface_id[num_tmp_surfaces++] = out_surface_id;
5449             tmp_surface = SURFACE(out_surface_id);
5450             assert(tmp_surface);
5451             i965_check_alloc_surface_bo(ctx, tmp_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5452         }
5453 
5454         if (tmp_surface != NULL)
5455             dst_obj_surface = tmp_surface;
5456         else {
5457             if (dst_surface->type == I965_SURFACE_TYPE_IMAGE) {
5458                 dst_obj_surface = derive_surface(ctx, (struct object_image *)dst_surface->base,
5459                                                  &tmp_dst_obj_surface);
5460             } else
5461                 dst_obj_surface = (struct object_surface *)dst_surface->base;
5462         }
5463 
5464         if (dst_obj_surface == NULL) {
5465             vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
5466             goto EXIT;
5467         }
5468 
5469         vaStatus = vebox_processing_simple(ctx,
5470                                            pp_context,
5471                                            src_obj_surface,
5472                                            dst_obj_surface,
5473                                            src_rect);
5474         if (vaStatus != VA_STATUS_SUCCESS)
5475             goto EXIT;
5476 
5477         if (vpp_post == 1) {
5478             struct i965_surface src_surface_new;
5479 
5480             if (tmp_surface != NULL) {
5481                 src_surface_new.base = (struct object_base *)tmp_surface;
5482                 src_surface_new.type = I965_SURFACE_TYPE_SURFACE;
5483                 src_surface_new.flags = I965_SURFACE_FLAG_FRAME;
5484             } else
5485                 memcpy((void *)&src_surface_new, (void *)src_surface, sizeof(src_surface_new));
5486 
5487             vaStatus = i965_image_pl2_processing(ctx,
5488                                                  &src_surface_new,
5489                                                  src_rect,
5490                                                  dst_surface,
5491                                                  dst_rect);
5492         }
5493     }
5494 
5495 EXIT:
5496     if (num_tmp_surfaces)
5497         i965_DestroySurfaces(ctx,
5498                              tmp_surface_id,
5499                              num_tmp_surfaces);
5500 
5501     return vaStatus;
5502 }
5503 
5504 VAStatus
i965_image_processing(VADriverContextP ctx,const struct i965_surface * src_surface,const VARectangle * src_rect,struct i965_surface * dst_surface,const VARectangle * dst_rect)5505 i965_image_processing(VADriverContextP ctx,
5506                       const struct i965_surface *src_surface,
5507                       const VARectangle *src_rect,
5508                       struct i965_surface *dst_surface,
5509                       const VARectangle *dst_rect)
5510 {
5511     struct i965_driver_data *i965 = i965_driver_data(ctx);
5512     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5513 
5514     if (HAS_VPP(i965)) {
5515         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5516 
5517         _i965LockMutex(&i965->pp_mutex);
5518 
5519         switch (fourcc) {
5520         case VA_FOURCC_YV12:
5521         case VA_FOURCC_I420:
5522         case VA_FOURCC_IMC1:
5523         case VA_FOURCC_IMC3:
5524         case VA_FOURCC_422H:
5525         case VA_FOURCC_422V:
5526         case VA_FOURCC_411P:
5527         case VA_FOURCC_444P:
5528         case VA_FOURCC_YV16:
5529             status = i965_image_pl3_processing(ctx,
5530                                                src_surface,
5531                                                src_rect,
5532                                                dst_surface,
5533                                                dst_rect);
5534             break;
5535 
5536         case  VA_FOURCC_NV12:
5537             status = i965_image_pl2_processing(ctx,
5538                                                src_surface,
5539                                                src_rect,
5540                                                dst_surface,
5541                                                dst_rect);
5542             break;
5543         case VA_FOURCC_YUY2:
5544         case VA_FOURCC_UYVY:
5545             status = i965_image_pl1_processing(ctx,
5546                                                src_surface,
5547                                                src_rect,
5548                                                dst_surface,
5549                                                dst_rect);
5550             break;
5551         case VA_FOURCC_BGRA:
5552         case VA_FOURCC_BGRX:
5553         case VA_FOURCC_RGBA:
5554         case VA_FOURCC_RGBX:
5555             status = i965_image_pl1_rgbx_processing(ctx,
5556                                                     src_surface,
5557                                                     src_rect,
5558                                                     dst_surface,
5559                                                     dst_rect);
5560             break;
5561         case VA_FOURCC_P010:
5562             status = i965_image_p010_processing(ctx,
5563                                                 src_surface,
5564                                                 src_rect,
5565                                                 dst_surface,
5566                                                 dst_rect);
5567             break;
5568         default:
5569             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5570             break;
5571         }
5572 
5573         _i965UnlockMutex(&i965->pp_mutex);
5574     }
5575 
5576     return status;
5577 }
5578 
5579 static void
i965_post_processing_context_finalize(VADriverContextP ctx,struct i965_post_processing_context * pp_context)5580 i965_post_processing_context_finalize(VADriverContextP ctx,
5581                                       struct i965_post_processing_context *pp_context)
5582 {
5583     int i;
5584 
5585     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5586     pp_context->surface_state_binding_table.bo = NULL;
5587 
5588     dri_bo_unreference(pp_context->curbe.bo);
5589     pp_context->curbe.bo = NULL;
5590 
5591     dri_bo_unreference(pp_context->sampler_state_table.bo);
5592     pp_context->sampler_state_table.bo = NULL;
5593 
5594     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5595     pp_context->sampler_state_table.bo_8x8 = NULL;
5596 
5597     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5598     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5599 
5600     dri_bo_unreference(pp_context->idrt.bo);
5601     pp_context->idrt.bo = NULL;
5602     pp_context->idrt.num_interface_descriptors = 0;
5603 
5604     dri_bo_unreference(pp_context->vfe_state.bo);
5605     pp_context->vfe_state.bo = NULL;
5606 
5607     for (i = 0; i < ARRAY_ELEMS(pp_context->pp_dndi_context.frame_store); i++)
5608         pp_dndi_frame_store_clear(&pp_context->pp_dndi_context.frame_store[i],
5609                                   ctx);
5610 
5611     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5612     pp_context->pp_dn_context.stmm_bo = NULL;
5613 
5614     for (i = 0; i < NUM_PP_MODULES; i++) {
5615         struct pp_module *pp_module = &pp_context->pp_modules[i];
5616 
5617         dri_bo_unreference(pp_module->kernel.bo);
5618         pp_module->kernel.bo = NULL;
5619     }
5620 
5621     free(pp_context->pp_static_parameter);
5622     free(pp_context->pp_inline_parameter);
5623     pp_context->pp_static_parameter = NULL;
5624     pp_context->pp_inline_parameter = NULL;
5625 }
5626 
5627 void
i965_post_processing_terminate(VADriverContextP ctx)5628 i965_post_processing_terminate(VADriverContextP ctx)
5629 {
5630     struct i965_driver_data *i965 = i965_driver_data(ctx);
5631     struct i965_post_processing_context *pp_context = i965->pp_context;
5632 
5633     if (pp_context) {
5634         pp_context->finalize(ctx, pp_context);
5635         free(pp_context);
5636     }
5637 
5638     i965->pp_context = NULL;
5639 }
5640 
5641 #define VPP_CURBE_ALLOCATION_SIZE   32
5642 
5643 void
i965_post_processing_context_init(VADriverContextP ctx,void * data,struct intel_batchbuffer * batch)5644 i965_post_processing_context_init(VADriverContextP ctx,
5645                                   void *data,
5646                                   struct intel_batchbuffer *batch)
5647 {
5648     struct i965_driver_data *i965 = i965_driver_data(ctx);
5649     int i;
5650     struct i965_post_processing_context *pp_context = data;
5651     const AVSConfig *avs_config;
5652 
5653     if (IS_IRONLAKE(i965->intel.device_info)) {
5654         pp_context->urb.size = i965->intel.device_info->urb_size;
5655         pp_context->urb.num_vfe_entries = 32;
5656         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5657         pp_context->urb.num_cs_entries = 1;
5658         pp_context->urb.size_cs_entry = 2;
5659         pp_context->urb.vfe_start = 0;
5660         pp_context->urb.cs_start = pp_context->urb.vfe_start +
5661                                    pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5662         assert(pp_context->urb.cs_start +
5663                pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5664         pp_context->intel_post_processing = ironlake_post_processing;
5665     } else {
5666         pp_context->vfe_gpu_state.max_num_threads = 60;
5667         pp_context->vfe_gpu_state.num_urb_entries = 59;
5668         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5669         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5670         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5671         pp_context->intel_post_processing = gen6_post_processing;
5672     }
5673 
5674     pp_context->finalize = i965_post_processing_context_finalize;
5675 
5676     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5677     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5678     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5679     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5680 
5681     if (IS_HASWELL(i965->intel.device_info))
5682         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5683     else if (IS_GEN7(i965->intel.device_info))
5684         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5685     else if (IS_GEN6(i965->intel.device_info))
5686         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5687     else if (IS_IRONLAKE(i965->intel.device_info))
5688         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5689 
5690     for (i = 0; i < NUM_PP_MODULES; i++) {
5691         struct pp_module *pp_module = &pp_context->pp_modules[i];
5692         dri_bo_unreference(pp_module->kernel.bo);
5693         if (pp_module->kernel.bin && pp_module->kernel.size) {
5694             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5695                                                 pp_module->kernel.name,
5696                                                 pp_module->kernel.size,
5697                                                 4096);
5698             assert(pp_module->kernel.bo);
5699             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5700         } else {
5701             pp_module->kernel.bo = NULL;
5702         }
5703     }
5704 
5705     /* static & inline parameters */
5706     if (IS_GEN7(i965->intel.device_info)) {
5707         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5708         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5709     } else {
5710         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5711         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5712     }
5713 
5714     pp_context->batch = batch;
5715     pp_dndi_context_init(&pp_context->pp_dndi_context);
5716 
5717     avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
5718                  &gen6_avs_config;
5719     avs_init_state(&pp_context->pp_avs_context.state, avs_config);
5720 }
5721 
5722 bool
i965_post_processing_init(VADriverContextP ctx)5723 i965_post_processing_init(VADriverContextP ctx)
5724 {
5725     struct i965_driver_data *i965 = i965_driver_data(ctx);
5726     struct i965_post_processing_context *pp_context = i965->pp_context;
5727 
5728     if (HAS_VPP(i965)) {
5729         if (pp_context == NULL) {
5730             pp_context = calloc(1, sizeof(*pp_context));
5731             assert(pp_context);
5732             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5733             i965->pp_context = pp_context;
5734         }
5735     }
5736 
5737     return true;
5738 }
5739 
5740 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5741     PP_NULL,    /* VAProcFilterNone */
5742     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5743     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5744     PP_NULL,    /* VAProcFilterSharpening */
5745     PP_NULL,    /* VAProcFilterColorBalance */
5746 };
5747 
5748 static const int proc_frame_to_pp_frame[3] = {
5749     I965_SURFACE_FLAG_FRAME,
5750     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5751     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5752 };
5753 
5754 enum {
5755     PP_OP_CHANGE_FORMAT = 1 << 0,
5756     PP_OP_CHANGE_SIZE   = 1 << 1,
5757     PP_OP_DEINTERLACE   = 1 << 2,
5758     PP_OP_COMPLEX       = 1 << 3,
5759 };
5760 
5761 static int
pp_get_kernel_index(uint32_t src_fourcc,uint32_t dst_fourcc,uint32_t pp_ops,uint32_t filter_flags)5762 pp_get_kernel_index(uint32_t src_fourcc, uint32_t dst_fourcc, uint32_t pp_ops,
5763                     uint32_t filter_flags)
5764 {
5765     int pp_index = -1;
5766 
5767     if (!dst_fourcc)
5768         dst_fourcc = src_fourcc;
5769 
5770     switch (src_fourcc) {
5771     case VA_FOURCC_RGBX:
5772     case VA_FOURCC_RGBA:
5773     case VA_FOURCC_BGRX:
5774     case VA_FOURCC_BGRA:
5775         switch (dst_fourcc) {
5776         case VA_FOURCC_NV12:
5777             pp_index = PP_RGBX_LOAD_SAVE_NV12;
5778             break;
5779         }
5780         break;
5781     case VA_FOURCC_YUY2:
5782     case VA_FOURCC_UYVY:
5783         switch (dst_fourcc) {
5784         case VA_FOURCC_NV12:
5785             pp_index = PP_PA_LOAD_SAVE_NV12;
5786             break;
5787         case VA_FOURCC_I420:
5788         case VA_FOURCC_YV12:
5789             pp_index = PP_PA_LOAD_SAVE_PL3;
5790             break;
5791         case VA_FOURCC_YUY2:
5792         case VA_FOURCC_UYVY:
5793             pp_index = PP_PA_LOAD_SAVE_PA;
5794             break;
5795         }
5796         break;
5797     case VA_FOURCC_NV12:
5798         switch (dst_fourcc) {
5799         case VA_FOURCC_NV12:
5800             if (pp_ops & PP_OP_CHANGE_SIZE)
5801                 pp_index = avs_is_needed(filter_flags) ?
5802                            PP_NV12_AVS : PP_NV12_SCALING;
5803             else
5804                 pp_index = PP_NV12_LOAD_SAVE_N12;
5805             break;
5806         case VA_FOURCC_I420:
5807         case VA_FOURCC_YV12:
5808         case VA_FOURCC_IMC1:
5809         case VA_FOURCC_IMC3:
5810             pp_index = PP_NV12_LOAD_SAVE_PL3;
5811             break;
5812         case VA_FOURCC_YUY2:
5813         case VA_FOURCC_UYVY:
5814             pp_index = PP_NV12_LOAD_SAVE_PA;
5815             break;
5816         case VA_FOURCC_RGBX:
5817         case VA_FOURCC_RGBA:
5818         case VA_FOURCC_BGRX:
5819         case VA_FOURCC_BGRA:
5820             pp_index = PP_NV12_LOAD_SAVE_RGBX;
5821             break;
5822         }
5823         break;
5824     case VA_FOURCC_I420:
5825     case VA_FOURCC_YV12:
5826     case VA_FOURCC_IMC1:
5827     case VA_FOURCC_IMC3:
5828     case VA_FOURCC_YV16:
5829     case VA_FOURCC_411P:
5830     case VA_FOURCC_422H:
5831     case VA_FOURCC_422V:
5832     case VA_FOURCC_444P:
5833         switch (dst_fourcc) {
5834         case VA_FOURCC_NV12:
5835             pp_index = PP_PL3_LOAD_SAVE_N12;
5836             break;
5837         case VA_FOURCC_I420:
5838         case VA_FOURCC_YV12:
5839         case VA_FOURCC_IMC1:
5840         case VA_FOURCC_IMC3:
5841             pp_index = PP_PL3_LOAD_SAVE_PL3;
5842             break;
5843         case VA_FOURCC_YUY2:
5844         case VA_FOURCC_UYVY:
5845             pp_index = PP_PL3_LOAD_SAVE_PA;
5846             break;
5847         }
5848         break;
5849     }
5850     return pp_index;
5851 }
5852 
5853 static VAStatus
i965_proc_picture_fast(VADriverContextP ctx,struct i965_proc_context * proc_context,struct proc_state * proc_state)5854 i965_proc_picture_fast(VADriverContextP ctx,
5855                        struct i965_proc_context *proc_context, struct proc_state *proc_state)
5856 {
5857     struct i965_driver_data * const i965 = i965_driver_data(ctx);
5858     const VAProcPipelineParameterBuffer * const pipeline_param =
5859         (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5860     struct object_surface *src_obj_surface, *dst_obj_surface;
5861     struct i965_surface src_surface, dst_surface;
5862     const VAProcFilterParameterBufferDeinterlacing *deint_params = NULL;
5863     VARectangle src_rect, dst_rect;
5864     VAStatus status;
5865     uint32_t i, filter_flags = 0, pp_ops = 0;
5866     int pp_index;
5867 
5868     /* Validate pipeline parameters */
5869     if (pipeline_param->num_filters > 0 && !pipeline_param->filters)
5870         return VA_STATUS_ERROR_INVALID_PARAMETER;
5871 
5872     for (i = 0; i < pipeline_param->num_filters; i++) {
5873         const VAProcFilterParameterBuffer *filter;
5874         struct object_buffer * const obj_buffer =
5875             BUFFER(pipeline_param->filters[i]);
5876 
5877         assert(obj_buffer && obj_buffer->buffer_store);
5878         if (!obj_buffer || !obj_buffer->buffer_store)
5879             return VA_STATUS_ERROR_INVALID_PARAMETER;
5880 
5881         filter = (VAProcFilterParameterBuffer *)
5882                  obj_buffer->buffer_store->buffer;
5883         switch (filter->type) {
5884         case VAProcFilterDeinterlacing:
5885             pp_ops |= PP_OP_DEINTERLACE;
5886             deint_params = (VAProcFilterParameterBufferDeinterlacing *)filter;
5887             break;
5888         default:
5889             pp_ops |= PP_OP_COMPLEX;
5890             break;
5891         }
5892     }
5893     filter_flags |= pipeline_param->filter_flags & VA_FILTER_SCALING_MASK;
5894 
5895     /* Validate source surface */
5896     src_obj_surface = SURFACE(pipeline_param->surface);
5897     if (!src_obj_surface)
5898         return VA_STATUS_ERROR_INVALID_SURFACE;
5899 
5900     if (!src_obj_surface->fourcc)
5901         return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
5902 
5903     if (!src_obj_surface->bo)
5904         return VA_STATUS_ERROR_INVALID_SURFACE;
5905 
5906     if (pipeline_param->surface_region) {
5907         src_rect.x = pipeline_param->surface_region->x;
5908         src_rect.y = pipeline_param->surface_region->y;
5909         src_rect.width = pipeline_param->surface_region->width;
5910         src_rect.height = pipeline_param->surface_region->height;
5911     } else {
5912         src_rect.x = 0;
5913         src_rect.y = 0;
5914         src_rect.width = src_obj_surface->orig_width;
5915         src_rect.height = src_obj_surface->orig_height;
5916     }
5917 
5918     src_surface.base  = &src_obj_surface->base;
5919     src_surface.type  = I965_SURFACE_TYPE_SURFACE;
5920     src_surface.flags = I965_SURFACE_FLAG_FRAME;
5921 
5922     if (pp_ops & PP_OP_DEINTERLACE) {
5923         filter_flags |= !(deint_params->flags & VA_DEINTERLACING_BOTTOM_FIELD) ?
5924                         VA_TOP_FIELD : VA_BOTTOM_FIELD;
5925         if (deint_params->algorithm != VAProcDeinterlacingBob)
5926             pp_ops |= PP_OP_COMPLEX;
5927     } else if (pipeline_param->filter_flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
5928         filter_flags |= (pipeline_param->filter_flags & VA_TOP_FIELD) ?
5929                         VA_TOP_FIELD : VA_BOTTOM_FIELD;
5930         pp_ops |= PP_OP_DEINTERLACE;
5931     }
5932     if (pp_ops & PP_OP_DEINTERLACE) // XXX: no bob-deinterlacing optimization yet
5933         pp_ops |= PP_OP_COMPLEX;
5934 
5935     /* Validate target surface */
5936     dst_obj_surface = SURFACE(proc_state->current_render_target);
5937     if (!dst_obj_surface)
5938         return VA_STATUS_ERROR_INVALID_SURFACE;
5939 
5940     if (!dst_obj_surface->bo)
5941         return VA_STATUS_ERROR_INVALID_SURFACE;
5942 
5943     if (dst_obj_surface->fourcc &&
5944         dst_obj_surface->fourcc != src_obj_surface->fourcc)
5945         pp_ops |= PP_OP_CHANGE_FORMAT;
5946 
5947     if (pipeline_param->output_region) {
5948         dst_rect.x = pipeline_param->output_region->x;
5949         dst_rect.y = pipeline_param->output_region->y;
5950         dst_rect.width = pipeline_param->output_region->width;
5951         dst_rect.height = pipeline_param->output_region->height;
5952     } else {
5953         dst_rect.x = 0;
5954         dst_rect.y = 0;
5955         dst_rect.width = dst_obj_surface->orig_width;
5956         dst_rect.height = dst_obj_surface->orig_height;
5957     }
5958 
5959     if (dst_rect.width != src_rect.width || dst_rect.height != src_rect.height)
5960         pp_ops |= PP_OP_CHANGE_SIZE;
5961 
5962     dst_surface.base  = &dst_obj_surface->base;
5963     dst_surface.type  = I965_SURFACE_TYPE_SURFACE;
5964     dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5965 
5966     /* Validate "fast-path" processing capabilities */
5967     if (!IS_GEN7(i965->intel.device_info)) {
5968         if ((pp_ops & PP_OP_CHANGE_FORMAT) && (pp_ops & PP_OP_CHANGE_SIZE))
5969             return VA_STATUS_ERROR_UNIMPLEMENTED; // temporary surface is needed
5970     }
5971     if (pipeline_param->pipeline_flags & VA_PROC_PIPELINE_FAST) {
5972         filter_flags &= ~VA_FILTER_SCALING_MASK;
5973         filter_flags |= VA_FILTER_SCALING_FAST;
5974     } else {
5975         if (pp_ops & PP_OP_COMPLEX)
5976             return VA_STATUS_ERROR_UNIMPLEMENTED; // full pipeline is needed
5977         if ((filter_flags & VA_FILTER_SCALING_MASK) > VA_FILTER_SCALING_HQ)
5978             return VA_STATUS_ERROR_UNIMPLEMENTED;
5979     }
5980 
5981     pp_index = pp_get_kernel_index(src_obj_surface->fourcc,
5982                                    dst_obj_surface->fourcc, pp_ops, filter_flags);
5983     if (pp_index < 0)
5984         return VA_STATUS_ERROR_UNIMPLEMENTED;
5985 
5986     proc_context->pp_context.filter_flags = filter_flags;
5987     status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5988                                            &src_surface, &src_rect, &dst_surface, &dst_rect, pp_index, NULL);
5989     intel_batchbuffer_flush(proc_context->pp_context.batch);
5990     return status;
5991 }
5992 
5993 VAStatus
i965_proc_picture(VADriverContextP ctx,VAProfile profile,union codec_state * codec_state,struct hw_context * hw_context)5994 i965_proc_picture(VADriverContextP ctx,
5995                   VAProfile profile,
5996                   union codec_state *codec_state,
5997                   struct hw_context *hw_context)
5998 {
5999     struct i965_driver_data *i965 = i965_driver_data(ctx);
6000     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
6001     struct proc_state *proc_state = &codec_state->proc;
6002     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
6003     struct object_surface *obj_surface;
6004     struct i965_surface src_surface, dst_surface;
6005     VARectangle src_rect, dst_rect;
6006     VAStatus status;
6007     int i;
6008     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
6009     int num_tmp_surfaces = 0;
6010     unsigned int tiling = 0, swizzle = 0;
6011     int in_width, in_height;
6012 
6013     if (pipeline_param->surface == VA_INVALID_ID ||
6014         proc_state->current_render_target == VA_INVALID_ID) {
6015         status = VA_STATUS_ERROR_INVALID_SURFACE;
6016         goto error;
6017     }
6018 
6019     obj_surface = SURFACE(proc_state->current_render_target);
6020     if (!obj_surface)
6021         return VA_STATUS_ERROR_INVALID_SURFACE;
6022 
6023     if (!obj_surface->bo) {
6024         unsigned int expected_format = obj_surface->expected_format;
6025         int fourcc = 0;
6026         int subsample = 0;
6027         int tiling = HAS_TILED_SURFACE(i965);
6028         switch (expected_format) {
6029         case VA_RT_FORMAT_YUV420:
6030             fourcc = VA_FOURCC_NV12;
6031             subsample = SUBSAMPLE_YUV420;
6032             break;
6033         case VA_RT_FORMAT_YUV420_10BPP:
6034             fourcc = VA_FOURCC_P010;
6035             subsample = SUBSAMPLE_YUV420;
6036             break;
6037         case VA_RT_FORMAT_RGB32:
6038             fourcc = VA_FOURCC_RGBA;
6039             subsample = SUBSAMPLE_RGBX;
6040             break;
6041         default:
6042             return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
6043         }
6044         i965_check_alloc_surface_bo(ctx, obj_surface, tiling, fourcc, subsample);
6045     }
6046 
6047     obj_surface = SURFACE(pipeline_param->surface);
6048 
6049     if (!obj_surface) {
6050         status = VA_STATUS_ERROR_INVALID_SURFACE;
6051         goto error;
6052     }
6053 
6054     if (!obj_surface->bo) {
6055         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6056         goto error;
6057     }
6058 
6059     if (pipeline_param->num_filters && !pipeline_param->filters) {
6060         status = VA_STATUS_ERROR_INVALID_PARAMETER;
6061         goto error;
6062     }
6063 
6064     status = i965_proc_picture_fast(ctx, proc_context, proc_state);
6065     if (status != VA_STATUS_ERROR_UNIMPLEMENTED)
6066         return status;
6067 
6068     in_width = obj_surface->orig_width;
6069     in_height = obj_surface->orig_height;
6070     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6071 
6072     src_surface.base = (struct object_base *)obj_surface;
6073     src_surface.type = I965_SURFACE_TYPE_SURFACE;
6074     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6075 
6076     VASurfaceID out_surface_id = VA_INVALID_ID;
6077     if (obj_surface->fourcc != VA_FOURCC_NV12) {
6078         src_surface.base = (struct object_base *)obj_surface;
6079         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6080         src_surface.flags = I965_SURFACE_FLAG_FRAME;
6081         src_rect.x = 0;
6082         src_rect.y = 0;
6083         src_rect.width = in_width;
6084         src_rect.height = in_height;
6085 
6086         status = i965_CreateSurfaces(ctx,
6087                                      in_width,
6088                                      in_height,
6089                                      VA_RT_FORMAT_YUV420,
6090                                      1,
6091                                      &out_surface_id);
6092         if (status != VA_STATUS_SUCCESS)
6093             goto error;
6094         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6095         obj_surface = SURFACE(out_surface_id);
6096         assert(obj_surface);
6097         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6098 
6099         dst_surface.base = (struct object_base *)obj_surface;
6100         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6101         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6102         dst_rect.x = 0;
6103         dst_rect.y = 0;
6104         dst_rect.width = in_width;
6105         dst_rect.height = in_height;
6106 
6107         status = i965_image_processing(ctx,
6108                                        &src_surface,
6109                                        &src_rect,
6110                                        &dst_surface,
6111                                        &dst_rect);
6112         if (status != VA_STATUS_SUCCESS)
6113             goto error;
6114 
6115         src_surface.base = (struct object_base *)obj_surface;
6116         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6117         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6118     }
6119 
6120     if (pipeline_param->surface_region) {
6121         src_rect.x = pipeline_param->surface_region->x;
6122         src_rect.y = pipeline_param->surface_region->y;
6123         src_rect.width = pipeline_param->surface_region->width;
6124         src_rect.height = pipeline_param->surface_region->height;
6125     } else {
6126         src_rect.x = 0;
6127         src_rect.y = 0;
6128         src_rect.width = in_width;
6129         src_rect.height = in_height;
6130     }
6131 
6132     proc_context->pp_context.pipeline_param = pipeline_param;
6133 
6134     for (i = 0; i < pipeline_param->num_filters; i++) {
6135         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6136         VAProcFilterParameterBufferBase *filter_param = NULL;
6137         VAProcFilterType filter_type;
6138         int kernel_index;
6139 
6140         if (!obj_buffer ||
6141             !obj_buffer->buffer_store ||
6142             !obj_buffer->buffer_store->buffer) {
6143             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6144             goto error;
6145         }
6146 
6147         out_surface_id = VA_INVALID_ID;
6148         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6149         filter_type = filter_param->type;
6150         kernel_index = procfilter_to_pp_flag[filter_type];
6151 
6152         if (kernel_index != PP_NULL &&
6153             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6154             status = i965_CreateSurfaces(ctx,
6155                                          in_width,
6156                                          in_height,
6157                                          VA_RT_FORMAT_YUV420,
6158                                          1,
6159                                          &out_surface_id);
6160             assert(status == VA_STATUS_SUCCESS);
6161             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6162             obj_surface = SURFACE(out_surface_id);
6163             assert(obj_surface);
6164             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6165             dst_surface.base = (struct object_base *)obj_surface;
6166             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6167             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6168                                                    &src_surface,
6169                                                    &src_rect,
6170                                                    &dst_surface,
6171                                                    &src_rect,
6172                                                    kernel_index,
6173                                                    filter_param);
6174 
6175             if (status == VA_STATUS_SUCCESS) {
6176                 src_surface.base = dst_surface.base;
6177                 src_surface.type = dst_surface.type;
6178                 src_surface.flags = dst_surface.flags;
6179             }
6180         }
6181     }
6182 
6183     proc_context->pp_context.pipeline_param = NULL;
6184     obj_surface = SURFACE(proc_state->current_render_target);
6185 
6186     if (!obj_surface) {
6187         status = VA_STATUS_ERROR_INVALID_SURFACE;
6188         goto error;
6189     }
6190 
6191     if (pipeline_param->output_region) {
6192         dst_rect.x = pipeline_param->output_region->x;
6193         dst_rect.y = pipeline_param->output_region->y;
6194         dst_rect.width = pipeline_param->output_region->width;
6195         dst_rect.height = pipeline_param->output_region->height;
6196     } else {
6197         dst_rect.x = 0;
6198         dst_rect.y = 0;
6199         dst_rect.width = obj_surface->orig_width;
6200         dst_rect.height = obj_surface->orig_height;
6201     }
6202 
6203     if (IS_GEN7(i965->intel.device_info) ||
6204         IS_GEN8(i965->intel.device_info) ||
6205         IS_GEN9(i965->intel.device_info) ||
6206         IS_GEN10(i965->intel.device_info)) {
6207         unsigned int saved_filter_flag;
6208         struct i965_post_processing_context *i965pp_context = i965->pp_context;
6209 
6210         if (obj_surface->fourcc == 0) {
6211             i965_check_alloc_surface_bo(ctx, obj_surface, 1,
6212                                         VA_FOURCC_NV12,
6213                                         SUBSAMPLE_YUV420);
6214         }
6215 
6216         i965_vpp_clear_surface(ctx, &proc_context->pp_context,
6217                                obj_surface,
6218                                pipeline_param->output_background_color);
6219 
6220         intel_batchbuffer_flush(hw_context->batch);
6221 
6222         saved_filter_flag = i965pp_context->filter_flags;
6223         i965pp_context->filter_flags = (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK);
6224 
6225         dst_surface.base = (struct object_base *)obj_surface;
6226         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6227         i965_image_processing(ctx, &src_surface, &src_rect, &dst_surface, &dst_rect);
6228 
6229         i965pp_context->filter_flags = saved_filter_flag;
6230 
6231         if (num_tmp_surfaces)
6232             i965_DestroySurfaces(ctx,
6233                                  tmp_surfaces,
6234                                  num_tmp_surfaces);
6235 
6236         return VA_STATUS_SUCCESS;
6237     }
6238 
6239     int csc_needed = 0;
6240     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12) {
6241         csc_needed = 1;
6242         out_surface_id = VA_INVALID_ID;
6243         status = i965_CreateSurfaces(ctx,
6244                                      obj_surface->orig_width,
6245                                      obj_surface->orig_height,
6246                                      VA_RT_FORMAT_YUV420,
6247                                      1,
6248                                      &out_surface_id);
6249         assert(status == VA_STATUS_SUCCESS);
6250         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6251         struct object_surface *csc_surface = SURFACE(out_surface_id);
6252         assert(csc_surface);
6253         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6254         dst_surface.base = (struct object_base *)csc_surface;
6255     } else {
6256         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
6257         dst_surface.base = (struct object_base *)obj_surface;
6258     }
6259 
6260     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6261     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color);
6262 
6263     // load/save doesn't support different origin offset for src and dst surface
6264     if (src_rect.width == dst_rect.width &&
6265         src_rect.height == dst_rect.height &&
6266         src_rect.x == dst_rect.x &&
6267         src_rect.y == dst_rect.y) {
6268         i965_post_processing_internal(ctx, &proc_context->pp_context,
6269                                       &src_surface,
6270                                       &src_rect,
6271                                       &dst_surface,
6272                                       &dst_rect,
6273                                       PP_NV12_LOAD_SAVE_N12,
6274                                       NULL);
6275     } else {
6276 
6277         proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
6278         i965_post_processing_internal(ctx, &proc_context->pp_context,
6279                                       &src_surface,
6280                                       &src_rect,
6281                                       &dst_surface,
6282                                       &dst_rect,
6283                                       avs_is_needed(pipeline_param->filter_flags) ? PP_NV12_AVS : PP_NV12_SCALING,
6284                                       NULL);
6285     }
6286 
6287     if (csc_needed) {
6288         src_surface.base = dst_surface.base;
6289         src_surface.type = dst_surface.type;
6290         src_surface.flags = dst_surface.flags;
6291         dst_surface.base = (struct object_base *)obj_surface;
6292         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6293         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6294     }
6295 
6296     if (num_tmp_surfaces)
6297         i965_DestroySurfaces(ctx,
6298                              tmp_surfaces,
6299                              num_tmp_surfaces);
6300 
6301     intel_batchbuffer_flush(hw_context->batch);
6302 
6303     return VA_STATUS_SUCCESS;
6304 
6305 error:
6306     if (num_tmp_surfaces)
6307         i965_DestroySurfaces(ctx,
6308                              tmp_surfaces,
6309                              num_tmp_surfaces);
6310 
6311     return status;
6312 }
6313 
6314 static void
i965_proc_context_destroy(void * hw_context)6315 i965_proc_context_destroy(void *hw_context)
6316 {
6317     struct i965_proc_context * const proc_context = hw_context;
6318     VADriverContextP const ctx = proc_context->driver_context;
6319 
6320     proc_context->pp_context.finalize(ctx, &proc_context->pp_context);
6321     intel_batchbuffer_free(proc_context->base.batch);
6322     free(proc_context);
6323 }
6324 
6325 struct hw_context *
i965_proc_context_init(VADriverContextP ctx,struct object_config * obj_config)6326 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6327 {
6328     struct i965_driver_data *i965 = i965_driver_data(ctx);
6329     struct intel_driver_data *intel = intel_driver_data(ctx);
6330     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6331 
6332     if (!proc_context)
6333         return NULL;
6334 
6335     proc_context->base.destroy = i965_proc_context_destroy;
6336     proc_context->base.run = i965_proc_picture;
6337     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6338     proc_context->driver_context = ctx;
6339     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6340 
6341     return (struct hw_context *)proc_context;
6342 }
6343 
6344 
6345