1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6 /** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18 #include <drm/drm_atomic.h> 19 #include <drm/drm_atomic_helper.h> 20 #include <drm/drm_atomic_uapi.h> 21 #include <drm/drm_blend.h> 22 #include <drm/drm_drv.h> 23 #include <drm/drm_fb_dma_helper.h> 24 #include <drm/drm_fourcc.h> 25 #include <drm/drm_framebuffer.h> 26 #include <drm/drm_gem_atomic_helper.h> 27 28 #include "uapi/drm/vc4_drm.h" 29 30 #include "vc4_drv.h" 31 #include "vc4_regs.h" 32 33 static const struct hvs_format { 34 u32 drm; /* DRM_FORMAT_* */ 35 u32 hvs; /* HVS_FORMAT_* */ 36 u32 pixel_order; 37 u32 pixel_order_hvs5; 38 bool hvs5_only; 39 } hvs_formats[] = { 40 { 41 .drm = DRM_FORMAT_XRGB8888, 42 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 43 .pixel_order = HVS_PIXEL_ORDER_ABGR, 44 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 45 }, 46 { 47 .drm = DRM_FORMAT_ARGB8888, 48 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 49 .pixel_order = HVS_PIXEL_ORDER_ABGR, 50 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 51 }, 52 { 53 .drm = DRM_FORMAT_ABGR8888, 54 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 55 .pixel_order = HVS_PIXEL_ORDER_ARGB, 56 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 57 }, 58 { 59 .drm = DRM_FORMAT_XBGR8888, 60 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 61 .pixel_order = HVS_PIXEL_ORDER_ARGB, 62 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 63 }, 64 { 65 .drm = DRM_FORMAT_RGB565, 66 .hvs = HVS_PIXEL_FORMAT_RGB565, 67 .pixel_order = HVS_PIXEL_ORDER_XRGB, 68 }, 69 { 70 .drm = DRM_FORMAT_BGR565, 71 .hvs = HVS_PIXEL_FORMAT_RGB565, 72 .pixel_order = HVS_PIXEL_ORDER_XBGR, 73 }, 74 { 75 .drm = DRM_FORMAT_ARGB1555, 76 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 77 .pixel_order = HVS_PIXEL_ORDER_ABGR, 78 }, 79 { 80 .drm = DRM_FORMAT_XRGB1555, 81 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 82 .pixel_order = HVS_PIXEL_ORDER_ABGR, 83 }, 84 { 85 .drm = DRM_FORMAT_RGB888, 86 .hvs = HVS_PIXEL_FORMAT_RGB888, 87 .pixel_order = HVS_PIXEL_ORDER_XRGB, 88 }, 89 { 90 .drm = DRM_FORMAT_BGR888, 91 .hvs = HVS_PIXEL_FORMAT_RGB888, 92 .pixel_order = HVS_PIXEL_ORDER_XBGR, 93 }, 94 { 95 .drm = DRM_FORMAT_YUV422, 96 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 97 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 98 }, 99 { 100 .drm = DRM_FORMAT_YVU422, 101 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 102 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 103 }, 104 { 105 .drm = DRM_FORMAT_YUV420, 106 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 107 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 108 }, 109 { 110 .drm = DRM_FORMAT_YVU420, 111 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 112 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 113 }, 114 { 115 .drm = DRM_FORMAT_NV12, 116 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 117 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 118 }, 119 { 120 .drm = DRM_FORMAT_NV21, 121 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 122 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 123 }, 124 { 125 .drm = DRM_FORMAT_NV16, 126 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 127 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 128 }, 129 { 130 .drm = DRM_FORMAT_NV61, 131 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 132 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 133 }, 134 { 135 .drm = DRM_FORMAT_P030, 136 .hvs = HVS_PIXEL_FORMAT_YCBCR_10BIT, 137 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 138 .hvs5_only = true, 139 }, 140 }; 141 142 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 143 { 144 unsigned i; 145 146 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 147 if (hvs_formats[i].drm == drm_format) 148 return &hvs_formats[i]; 149 } 150 151 return NULL; 152 } 153 154 static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 155 { 156 if (dst == src) 157 return VC4_SCALING_NONE; 158 if (3 * dst >= 2 * src) 159 return VC4_SCALING_PPF; 160 else 161 return VC4_SCALING_TPZ; 162 } 163 164 static bool plane_enabled(struct drm_plane_state *state) 165 { 166 return state->fb && !WARN_ON(!state->crtc); 167 } 168 169 static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 170 { 171 struct vc4_plane_state *vc4_state; 172 173 if (WARN_ON(!plane->state)) 174 return NULL; 175 176 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 177 if (!vc4_state) 178 return NULL; 179 180 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 181 vc4_state->dlist_initialized = 0; 182 183 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 184 185 if (vc4_state->dlist) { 186 vc4_state->dlist = kmemdup(vc4_state->dlist, 187 vc4_state->dlist_count * 4, 188 GFP_KERNEL); 189 if (!vc4_state->dlist) { 190 kfree(vc4_state); 191 return NULL; 192 } 193 vc4_state->dlist_size = vc4_state->dlist_count; 194 } 195 196 return &vc4_state->base; 197 } 198 199 static void vc4_plane_destroy_state(struct drm_plane *plane, 200 struct drm_plane_state *state) 201 { 202 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 203 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 204 205 if (drm_mm_node_allocated(&vc4_state->lbm)) { 206 unsigned long irqflags; 207 208 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 209 drm_mm_remove_node(&vc4_state->lbm); 210 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 211 } 212 213 kfree(vc4_state->dlist); 214 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 215 kfree(state); 216 } 217 218 /* Called during init to allocate the plane's atomic state. */ 219 static void vc4_plane_reset(struct drm_plane *plane) 220 { 221 struct vc4_plane_state *vc4_state; 222 223 WARN_ON(plane->state); 224 225 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 226 if (!vc4_state) 227 return; 228 229 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 230 } 231 232 static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 233 { 234 if (vc4_state->dlist_count == vc4_state->dlist_size) { 235 u32 new_size = max(4u, vc4_state->dlist_count * 2); 236 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 237 238 if (!new_dlist) 239 return; 240 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 241 242 kfree(vc4_state->dlist); 243 vc4_state->dlist = new_dlist; 244 vc4_state->dlist_size = new_size; 245 } 246 247 vc4_state->dlist_count++; 248 } 249 250 static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 251 { 252 unsigned int idx = vc4_state->dlist_count; 253 254 vc4_dlist_counter_increment(vc4_state); 255 vc4_state->dlist[idx] = val; 256 } 257 258 /* Returns the scl0/scl1 field based on whether the dimensions need to 259 * be up/down/non-scaled. 260 * 261 * This is a replication of a table from the spec. 262 */ 263 static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 264 { 265 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 266 267 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 268 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 269 return SCALER_CTL0_SCL_H_PPF_V_PPF; 270 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 271 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 272 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 273 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 274 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 275 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 276 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 277 return SCALER_CTL0_SCL_H_PPF_V_NONE; 278 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 279 return SCALER_CTL0_SCL_H_NONE_V_PPF; 280 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 281 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 282 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 283 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 284 default: 285 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 286 /* The unity case is independently handled by 287 * SCALER_CTL0_UNITY. 288 */ 289 return 0; 290 } 291 } 292 293 static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 294 { 295 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 296 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 297 struct drm_crtc_state *crtc_state; 298 299 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 300 pstate->crtc); 301 302 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 303 if (!left && !right && !top && !bottom) 304 return 0; 305 306 if (left + right >= crtc_state->mode.hdisplay || 307 top + bottom >= crtc_state->mode.vdisplay) 308 return -EINVAL; 309 310 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 311 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 312 adjhdisplay, 313 crtc_state->mode.hdisplay); 314 vc4_pstate->crtc_x += left; 315 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) 316 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; 317 318 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 319 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 320 adjvdisplay, 321 crtc_state->mode.vdisplay); 322 vc4_pstate->crtc_y += top; 323 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) 324 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; 325 326 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 327 adjhdisplay, 328 crtc_state->mode.hdisplay); 329 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 330 adjvdisplay, 331 crtc_state->mode.vdisplay); 332 333 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 334 return -EINVAL; 335 336 return 0; 337 } 338 339 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 340 { 341 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 342 struct drm_framebuffer *fb = state->fb; 343 struct drm_gem_dma_object *bo; 344 int num_planes = fb->format->num_planes; 345 struct drm_crtc_state *crtc_state; 346 u32 h_subsample = fb->format->hsub; 347 u32 v_subsample = fb->format->vsub; 348 int i, ret; 349 350 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 351 state->crtc); 352 if (!crtc_state) { 353 DRM_DEBUG_KMS("Invalid crtc state\n"); 354 return -EINVAL; 355 } 356 357 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 358 INT_MAX, true, true); 359 if (ret) 360 return ret; 361 362 for (i = 0; i < num_planes; i++) { 363 bo = drm_fb_dma_get_gem_obj(fb, i); 364 vc4_state->offsets[i] = bo->dma_addr + fb->offsets[i]; 365 } 366 367 /* 368 * We don't support subpixel source positioning for scaling, 369 * but fractional coordinates can be generated by clipping 370 * so just round for now 371 */ 372 vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16); 373 vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16); 374 vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x; 375 vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y; 376 377 vc4_state->crtc_x = state->dst.x1; 378 vc4_state->crtc_y = state->dst.y1; 379 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 380 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 381 382 ret = vc4_plane_margins_adj(state); 383 if (ret) 384 return ret; 385 386 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 387 vc4_state->crtc_w); 388 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 389 vc4_state->crtc_h); 390 391 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 392 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 393 394 if (num_planes > 1) { 395 vc4_state->is_yuv = true; 396 397 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 398 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 399 400 vc4_state->x_scaling[1] = 401 vc4_get_scaling_mode(vc4_state->src_w[1], 402 vc4_state->crtc_w); 403 vc4_state->y_scaling[1] = 404 vc4_get_scaling_mode(vc4_state->src_h[1], 405 vc4_state->crtc_h); 406 407 /* YUV conversion requires that horizontal scaling be enabled 408 * on the UV plane even if vc4_get_scaling_mode() returned 409 * VC4_SCALING_NONE (which can happen when the down-scaling 410 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 411 * case. 412 */ 413 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 414 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 415 } else { 416 vc4_state->is_yuv = false; 417 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 418 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 419 } 420 421 return 0; 422 } 423 424 static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 425 { 426 u32 scale, recip; 427 428 scale = (1 << 16) * src / dst; 429 430 /* The specs note that while the reciprocal would be defined 431 * as (1<<32)/scale, ~0 is close enough. 432 */ 433 recip = ~0 / scale; 434 435 vc4_dlist_write(vc4_state, 436 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 437 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 438 vc4_dlist_write(vc4_state, 439 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 440 } 441 442 static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 443 { 444 u32 scale = (1 << 16) * src / dst; 445 446 vc4_dlist_write(vc4_state, 447 SCALER_PPF_AGC | 448 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 449 VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 450 } 451 452 static u32 vc4_lbm_size(struct drm_plane_state *state) 453 { 454 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 455 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 456 u32 pix_per_line; 457 u32 lbm; 458 459 /* LBM is not needed when there's no vertical scaling. */ 460 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 461 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 462 return 0; 463 464 /* 465 * This can be further optimized in the RGB/YUV444 case if the PPF 466 * decimation factor is between 0.5 and 1.0 by using crtc_w. 467 * 468 * It's not an issue though, since in that case since src_w[0] is going 469 * to be greater than or equal to crtc_w. 470 */ 471 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 472 pix_per_line = vc4_state->crtc_w; 473 else 474 pix_per_line = vc4_state->src_w[0]; 475 476 if (!vc4_state->is_yuv) { 477 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 478 lbm = pix_per_line * 8; 479 else { 480 /* In special cases, this multiplier might be 12. */ 481 lbm = pix_per_line * 16; 482 } 483 } else { 484 /* There are cases for this going down to a multiplier 485 * of 2, but according to the firmware source, the 486 * table in the docs is somewhat wrong. 487 */ 488 lbm = pix_per_line * 16; 489 } 490 491 /* Align it to 64 or 128 (hvs5) bytes */ 492 lbm = roundup(lbm, vc4->is_vc5 ? 128 : 64); 493 494 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 495 lbm /= vc4->is_vc5 ? 4 : 2; 496 497 return lbm; 498 } 499 500 static void vc4_write_scaling_parameters(struct drm_plane_state *state, 501 int channel) 502 { 503 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 504 505 /* Ch0 H-PPF Word 0: Scaling Parameters */ 506 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 507 vc4_write_ppf(vc4_state, 508 vc4_state->src_w[channel], vc4_state->crtc_w); 509 } 510 511 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 512 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 513 vc4_write_ppf(vc4_state, 514 vc4_state->src_h[channel], vc4_state->crtc_h); 515 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 516 } 517 518 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 519 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 520 vc4_write_tpz(vc4_state, 521 vc4_state->src_w[channel], vc4_state->crtc_w); 522 } 523 524 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 525 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 526 vc4_write_tpz(vc4_state, 527 vc4_state->src_h[channel], vc4_state->crtc_h); 528 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 529 } 530 } 531 532 static void vc4_plane_calc_load(struct drm_plane_state *state) 533 { 534 unsigned int hvs_load_shift, vrefresh, i; 535 struct drm_framebuffer *fb = state->fb; 536 struct vc4_plane_state *vc4_state; 537 struct drm_crtc_state *crtc_state; 538 unsigned int vscale_factor; 539 540 vc4_state = to_vc4_plane_state(state); 541 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 542 state->crtc); 543 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 544 545 /* The HVS is able to process 2 pixels/cycle when scaling the source, 546 * 4 pixels/cycle otherwise. 547 * Alpha blending step seems to be pipelined and it's always operating 548 * at 4 pixels/cycle, so the limiting aspect here seems to be the 549 * scaler block. 550 * HVS load is expressed in clk-cycles/sec (AKA Hz). 551 */ 552 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 553 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 554 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 555 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 556 hvs_load_shift = 1; 557 else 558 hvs_load_shift = 2; 559 560 vc4_state->membus_load = 0; 561 vc4_state->hvs_load = 0; 562 for (i = 0; i < fb->format->num_planes; i++) { 563 /* Even if the bandwidth/plane required for a single frame is 564 * 565 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh 566 * 567 * when downscaling, we have to read more pixels per line in 568 * the time frame reserved for a single line, so the bandwidth 569 * demand can be punctually higher. To account for that, we 570 * calculate the down-scaling factor and multiply the plane 571 * load by this number. We're likely over-estimating the read 572 * demand, but that's better than under-estimating it. 573 */ 574 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], 575 vc4_state->crtc_h); 576 vc4_state->membus_load += vc4_state->src_w[i] * 577 vc4_state->src_h[i] * vscale_factor * 578 fb->format->cpp[i]; 579 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 580 } 581 582 vc4_state->hvs_load *= vrefresh; 583 vc4_state->hvs_load >>= hvs_load_shift; 584 vc4_state->membus_load *= vrefresh; 585 } 586 587 static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 588 { 589 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 590 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 591 unsigned long irqflags; 592 u32 lbm_size; 593 594 lbm_size = vc4_lbm_size(state); 595 if (!lbm_size) 596 return 0; 597 598 if (WARN_ON(!vc4_state->lbm_offset)) 599 return -EINVAL; 600 601 /* Allocate the LBM memory that the HVS will use for temporary 602 * storage due to our scaling/format conversion. 603 */ 604 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 605 int ret; 606 607 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 608 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 609 &vc4_state->lbm, 610 lbm_size, 611 vc4->is_vc5 ? 64 : 32, 612 0, 0); 613 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 614 615 if (ret) 616 return ret; 617 } else { 618 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 619 } 620 621 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 622 623 return 0; 624 } 625 626 /* 627 * The colorspace conversion matrices are held in 3 entries in the dlist. 628 * Create an array of them, with entries for each full and limited mode, and 629 * each supported colorspace. 630 */ 631 static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { 632 { 633 /* Limited range */ 634 { 635 /* BT601 */ 636 SCALER_CSC0_ITR_R_601_5, 637 SCALER_CSC1_ITR_R_601_5, 638 SCALER_CSC2_ITR_R_601_5, 639 }, { 640 /* BT709 */ 641 SCALER_CSC0_ITR_R_709_3, 642 SCALER_CSC1_ITR_R_709_3, 643 SCALER_CSC2_ITR_R_709_3, 644 }, { 645 /* BT2020 */ 646 SCALER_CSC0_ITR_R_2020, 647 SCALER_CSC1_ITR_R_2020, 648 SCALER_CSC2_ITR_R_2020, 649 } 650 }, { 651 /* Full range */ 652 { 653 /* JFIF */ 654 SCALER_CSC0_JPEG_JFIF, 655 SCALER_CSC1_JPEG_JFIF, 656 SCALER_CSC2_JPEG_JFIF, 657 }, { 658 /* BT709 */ 659 SCALER_CSC0_ITR_R_709_3_FR, 660 SCALER_CSC1_ITR_R_709_3_FR, 661 SCALER_CSC2_ITR_R_709_3_FR, 662 }, { 663 /* BT2020 */ 664 SCALER_CSC0_ITR_R_2020_FR, 665 SCALER_CSC1_ITR_R_2020_FR, 666 SCALER_CSC2_ITR_R_2020_FR, 667 } 668 } 669 }; 670 671 static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) 672 { 673 if (!state->fb->format->has_alpha) 674 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 675 SCALER_POS2_ALPHA_MODE); 676 677 switch (state->pixel_blend_mode) { 678 case DRM_MODE_BLEND_PIXEL_NONE: 679 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, 680 SCALER_POS2_ALPHA_MODE); 681 default: 682 case DRM_MODE_BLEND_PREMULTI: 683 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 684 SCALER_POS2_ALPHA_MODE) | 685 SCALER_POS2_ALPHA_PREMULT; 686 case DRM_MODE_BLEND_COVERAGE: 687 return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_PIPELINE, 688 SCALER_POS2_ALPHA_MODE); 689 } 690 } 691 692 static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state) 693 { 694 if (!state->fb->format->has_alpha) 695 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 696 SCALER5_CTL2_ALPHA_MODE); 697 698 switch (state->pixel_blend_mode) { 699 case DRM_MODE_BLEND_PIXEL_NONE: 700 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, 701 SCALER5_CTL2_ALPHA_MODE); 702 default: 703 case DRM_MODE_BLEND_PREMULTI: 704 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 705 SCALER5_CTL2_ALPHA_MODE) | 706 SCALER5_CTL2_ALPHA_PREMULT; 707 case DRM_MODE_BLEND_COVERAGE: 708 return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, 709 SCALER5_CTL2_ALPHA_MODE); 710 } 711 } 712 713 /* Writes out a full display list for an active plane to the plane's 714 * private dlist state. 715 */ 716 static int vc4_plane_mode_set(struct drm_plane *plane, 717 struct drm_plane_state *state) 718 { 719 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 720 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 721 struct drm_framebuffer *fb = state->fb; 722 u32 ctl0_offset = vc4_state->dlist_count; 723 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 724 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 725 int num_planes = fb->format->num_planes; 726 u32 h_subsample = fb->format->hsub; 727 u32 v_subsample = fb->format->vsub; 728 bool mix_plane_alpha; 729 bool covers_screen; 730 u32 scl0, scl1, pitch0; 731 u32 tiling, src_y; 732 u32 hvs_format = format->hvs; 733 unsigned int rotation; 734 int ret, i; 735 736 if (vc4_state->dlist_initialized) 737 return 0; 738 739 ret = vc4_plane_setup_clipping_and_scaling(state); 740 if (ret) 741 return ret; 742 743 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 744 * and 4:4:4, scl1 should be set to scl0 so both channels of 745 * the scaler do the same thing. For YUV, the Y plane needs 746 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 747 * the scl fields here. 748 */ 749 if (num_planes == 1) { 750 scl0 = vc4_get_scl_field(state, 0); 751 scl1 = scl0; 752 } else { 753 scl0 = vc4_get_scl_field(state, 1); 754 scl1 = vc4_get_scl_field(state, 0); 755 } 756 757 rotation = drm_rotation_simplify(state->rotation, 758 DRM_MODE_ROTATE_0 | 759 DRM_MODE_REFLECT_X | 760 DRM_MODE_REFLECT_Y); 761 762 /* We must point to the last line when Y reflection is enabled. */ 763 src_y = vc4_state->src_y; 764 if (rotation & DRM_MODE_REFLECT_Y) 765 src_y += vc4_state->src_h[0] - 1; 766 767 switch (base_format_mod) { 768 case DRM_FORMAT_MOD_LINEAR: 769 tiling = SCALER_CTL0_TILING_LINEAR; 770 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 771 772 /* Adjust the base pointer to the first pixel to be scanned 773 * out. 774 */ 775 for (i = 0; i < num_planes; i++) { 776 vc4_state->offsets[i] += src_y / 777 (i ? v_subsample : 1) * 778 fb->pitches[i]; 779 780 vc4_state->offsets[i] += vc4_state->src_x / 781 (i ? h_subsample : 1) * 782 fb->format->cpp[i]; 783 } 784 785 break; 786 787 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 788 u32 tile_size_shift = 12; /* T tiles are 4kb */ 789 /* Whole-tile offsets, mostly for setting the pitch. */ 790 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 791 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 792 u32 tile_w_mask = (1 << tile_w_shift) - 1; 793 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 794 * the height (in pixels) of a 4k tile. 795 */ 796 u32 tile_h_mask = (2 << tile_h_shift) - 1; 797 /* For T-tiled, the FB pitch is "how many bytes from one row to 798 * the next, such that 799 * 800 * pitch * tile_h == tile_size * tiles_per_row 801 */ 802 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 803 u32 tiles_l = vc4_state->src_x >> tile_w_shift; 804 u32 tiles_r = tiles_w - tiles_l; 805 u32 tiles_t = src_y >> tile_h_shift; 806 /* Intra-tile offsets, which modify the base address (the 807 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 808 * base address). 809 */ 810 u32 tile_y = (src_y >> 4) & 1; 811 u32 subtile_y = (src_y >> 2) & 3; 812 u32 utile_y = src_y & 3; 813 u32 x_off = vc4_state->src_x & tile_w_mask; 814 u32 y_off = src_y & tile_h_mask; 815 816 /* When Y reflection is requested we must set the 817 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 818 * after the initial one should be fetched in descending order, 819 * which makes sense since we start from the last line and go 820 * backward. 821 * Don't know why we need y_off = max_y_off - y_off, but it's 822 * definitely required (I guess it's also related to the "going 823 * backward" situation). 824 */ 825 if (rotation & DRM_MODE_REFLECT_Y) { 826 y_off = tile_h_mask - y_off; 827 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 828 } else { 829 pitch0 = 0; 830 } 831 832 tiling = SCALER_CTL0_TILING_256B_OR_T; 833 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 834 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 835 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 836 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 837 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift); 838 vc4_state->offsets[0] += subtile_y << 8; 839 vc4_state->offsets[0] += utile_y << 4; 840 841 /* Rows of tiles alternate left-to-right and right-to-left. */ 842 if (tiles_t & 1) { 843 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 844 vc4_state->offsets[0] += (tiles_w - tiles_l) << 845 tile_size_shift; 846 vc4_state->offsets[0] -= (1 + !tile_y) << 10; 847 } else { 848 vc4_state->offsets[0] += tiles_l << tile_size_shift; 849 vc4_state->offsets[0] += tile_y << 10; 850 } 851 852 break; 853 } 854 855 case DRM_FORMAT_MOD_BROADCOM_SAND64: 856 case DRM_FORMAT_MOD_BROADCOM_SAND128: 857 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 858 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 859 860 if (param > SCALER_TILE_HEIGHT_MASK) { 861 DRM_DEBUG_KMS("SAND height too large (%d)\n", 862 param); 863 return -EINVAL; 864 } 865 866 if (fb->format->format == DRM_FORMAT_P030) { 867 hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; 868 tiling = SCALER_CTL0_TILING_128B; 869 } else { 870 hvs_format = HVS_PIXEL_FORMAT_H264; 871 872 switch (base_format_mod) { 873 case DRM_FORMAT_MOD_BROADCOM_SAND64: 874 tiling = SCALER_CTL0_TILING_64B; 875 break; 876 case DRM_FORMAT_MOD_BROADCOM_SAND128: 877 tiling = SCALER_CTL0_TILING_128B; 878 break; 879 case DRM_FORMAT_MOD_BROADCOM_SAND256: 880 tiling = SCALER_CTL0_TILING_256B_OR_T; 881 break; 882 default: 883 return -EINVAL; 884 } 885 } 886 887 /* Adjust the base pointer to the first pixel to be scanned 888 * out. 889 * 890 * For P030, y_ptr [31:4] is the 128bit word for the start pixel 891 * y_ptr [3:0] is the pixel (0-11) contained within that 128bit 892 * word that should be taken as the first pixel. 893 * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the 894 * element within the 128bit word, eg for pixel 3 the value 895 * should be 6. 896 */ 897 for (i = 0; i < num_planes; i++) { 898 u32 tile_w, tile, x_off, pix_per_tile; 899 900 if (fb->format->format == DRM_FORMAT_P030) { 901 /* 902 * Spec says: bits [31:4] of the given address 903 * should point to the 128-bit word containing 904 * the desired starting pixel, and bits[3:0] 905 * should be between 0 and 11, indicating which 906 * of the 12-pixels in that 128-bit word is the 907 * first pixel to be used 908 */ 909 u32 remaining_pixels = vc4_state->src_x % 96; 910 u32 aligned = remaining_pixels / 12; 911 u32 last_bits = remaining_pixels % 12; 912 913 x_off = aligned * 16 + last_bits; 914 tile_w = 128; 915 pix_per_tile = 96; 916 } else { 917 switch (base_format_mod) { 918 case DRM_FORMAT_MOD_BROADCOM_SAND64: 919 tile_w = 64; 920 break; 921 case DRM_FORMAT_MOD_BROADCOM_SAND128: 922 tile_w = 128; 923 break; 924 case DRM_FORMAT_MOD_BROADCOM_SAND256: 925 tile_w = 256; 926 break; 927 default: 928 return -EINVAL; 929 } 930 pix_per_tile = tile_w / fb->format->cpp[0]; 931 x_off = (vc4_state->src_x % pix_per_tile) / 932 (i ? h_subsample : 1) * 933 fb->format->cpp[i]; 934 } 935 936 tile = vc4_state->src_x / pix_per_tile; 937 938 vc4_state->offsets[i] += param * tile_w * tile; 939 vc4_state->offsets[i] += src_y / 940 (i ? v_subsample : 1) * 941 tile_w; 942 vc4_state->offsets[i] += x_off & ~(i ? 1 : 0); 943 } 944 945 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 946 break; 947 } 948 949 default: 950 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 951 (long long)fb->modifier); 952 return -EINVAL; 953 } 954 955 /* Don't waste cycles mixing with plane alpha if the set alpha 956 * is opaque or there is no per-pixel alpha information. 957 * In any case we use the alpha property value as the fixed alpha. 958 */ 959 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 960 fb->format->has_alpha; 961 962 if (!vc4->is_vc5) { 963 /* Control word */ 964 vc4_dlist_write(vc4_state, 965 SCALER_CTL0_VALID | 966 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 967 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 968 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 969 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 970 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 971 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 972 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 973 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 974 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 975 976 /* Position Word 0: Image Positions and Alpha Value */ 977 vc4_state->pos0_offset = vc4_state->dlist_count; 978 vc4_dlist_write(vc4_state, 979 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 980 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 981 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 982 983 /* Position Word 1: Scaled Image Dimensions. */ 984 if (!vc4_state->is_unity) { 985 vc4_dlist_write(vc4_state, 986 VC4_SET_FIELD(vc4_state->crtc_w, 987 SCALER_POS1_SCL_WIDTH) | 988 VC4_SET_FIELD(vc4_state->crtc_h, 989 SCALER_POS1_SCL_HEIGHT)); 990 } 991 992 /* Position Word 2: Source Image Size, Alpha */ 993 vc4_state->pos2_offset = vc4_state->dlist_count; 994 vc4_dlist_write(vc4_state, 995 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 996 vc4_hvs4_get_alpha_blend_mode(state) | 997 VC4_SET_FIELD(vc4_state->src_w[0], 998 SCALER_POS2_WIDTH) | 999 VC4_SET_FIELD(vc4_state->src_h[0], 1000 SCALER_POS2_HEIGHT)); 1001 1002 /* Position Word 3: Context. Written by the HVS. */ 1003 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1004 1005 } else { 1006 u32 hvs_pixel_order = format->pixel_order; 1007 1008 if (format->pixel_order_hvs5) 1009 hvs_pixel_order = format->pixel_order_hvs5; 1010 1011 /* Control word */ 1012 vc4_dlist_write(vc4_state, 1013 SCALER_CTL0_VALID | 1014 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) | 1015 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 1016 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 1017 (vc4_state->is_unity ? 1018 SCALER5_CTL0_UNITY : 0) | 1019 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 1020 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 1021 SCALER5_CTL0_ALPHA_EXPAND | 1022 SCALER5_CTL0_RGB_EXPAND); 1023 1024 /* Position Word 0: Image Positions and Alpha Value */ 1025 vc4_state->pos0_offset = vc4_state->dlist_count; 1026 vc4_dlist_write(vc4_state, 1027 (rotation & DRM_MODE_REFLECT_Y ? 1028 SCALER5_POS0_VFLIP : 0) | 1029 VC4_SET_FIELD(vc4_state->crtc_x, 1030 SCALER_POS0_START_X) | 1031 (rotation & DRM_MODE_REFLECT_X ? 1032 SCALER5_POS0_HFLIP : 0) | 1033 VC4_SET_FIELD(vc4_state->crtc_y, 1034 SCALER5_POS0_START_Y) 1035 ); 1036 1037 /* Control Word 2 */ 1038 vc4_dlist_write(vc4_state, 1039 VC4_SET_FIELD(state->alpha >> 4, 1040 SCALER5_CTL2_ALPHA) | 1041 vc4_hvs5_get_alpha_blend_mode(state) | 1042 (mix_plane_alpha ? 1043 SCALER5_CTL2_ALPHA_MIX : 0) 1044 ); 1045 1046 /* Position Word 1: Scaled Image Dimensions. */ 1047 if (!vc4_state->is_unity) { 1048 vc4_dlist_write(vc4_state, 1049 VC4_SET_FIELD(vc4_state->crtc_w, 1050 SCALER5_POS1_SCL_WIDTH) | 1051 VC4_SET_FIELD(vc4_state->crtc_h, 1052 SCALER5_POS1_SCL_HEIGHT)); 1053 } 1054 1055 /* Position Word 2: Source Image Size */ 1056 vc4_state->pos2_offset = vc4_state->dlist_count; 1057 vc4_dlist_write(vc4_state, 1058 VC4_SET_FIELD(vc4_state->src_w[0], 1059 SCALER5_POS2_WIDTH) | 1060 VC4_SET_FIELD(vc4_state->src_h[0], 1061 SCALER5_POS2_HEIGHT)); 1062 1063 /* Position Word 3: Context. Written by the HVS. */ 1064 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1065 } 1066 1067 1068 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 1069 * 1070 * The pointers may be any byte address. 1071 */ 1072 vc4_state->ptr0_offset = vc4_state->dlist_count; 1073 for (i = 0; i < num_planes; i++) 1074 vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 1075 1076 /* Pointer Context Word 0/1/2: Written by the HVS */ 1077 for (i = 0; i < num_planes; i++) 1078 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 1079 1080 /* Pitch word 0 */ 1081 vc4_dlist_write(vc4_state, pitch0); 1082 1083 /* Pitch word 1/2 */ 1084 for (i = 1; i < num_planes; i++) { 1085 if (hvs_format != HVS_PIXEL_FORMAT_H264 && 1086 hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { 1087 vc4_dlist_write(vc4_state, 1088 VC4_SET_FIELD(fb->pitches[i], 1089 SCALER_SRC_PITCH)); 1090 } else { 1091 vc4_dlist_write(vc4_state, pitch0); 1092 } 1093 } 1094 1095 /* Colorspace conversion words */ 1096 if (vc4_state->is_yuv) { 1097 enum drm_color_encoding color_encoding = state->color_encoding; 1098 enum drm_color_range color_range = state->color_range; 1099 const u32 *ccm; 1100 1101 if (color_encoding >= DRM_COLOR_ENCODING_MAX) 1102 color_encoding = DRM_COLOR_YCBCR_BT601; 1103 if (color_range >= DRM_COLOR_RANGE_MAX) 1104 color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; 1105 1106 ccm = colorspace_coeffs[color_range][color_encoding]; 1107 1108 vc4_dlist_write(vc4_state, ccm[0]); 1109 vc4_dlist_write(vc4_state, ccm[1]); 1110 vc4_dlist_write(vc4_state, ccm[2]); 1111 } 1112 1113 vc4_state->lbm_offset = 0; 1114 1115 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 1116 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 1117 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1118 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1119 /* Reserve a slot for the LBM Base Address. The real value will 1120 * be set when calling vc4_plane_allocate_lbm(). 1121 */ 1122 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 1123 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 1124 vc4_state->lbm_offset = vc4_state->dlist_count; 1125 vc4_dlist_counter_increment(vc4_state); 1126 } 1127 1128 if (num_planes > 1) { 1129 /* Emit Cb/Cr as channel 0 and Y as channel 1130 * 1. This matches how we set up scl0/scl1 1131 * above. 1132 */ 1133 vc4_write_scaling_parameters(state, 1); 1134 } 1135 vc4_write_scaling_parameters(state, 0); 1136 1137 /* If any PPF setup was done, then all the kernel 1138 * pointers get uploaded. 1139 */ 1140 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1141 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1142 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1143 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1144 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1145 SCALER_PPF_KERNEL_OFFSET); 1146 1147 /* HPPF plane 0 */ 1148 vc4_dlist_write(vc4_state, kernel); 1149 /* VPPF plane 0 */ 1150 vc4_dlist_write(vc4_state, kernel); 1151 /* HPPF plane 1 */ 1152 vc4_dlist_write(vc4_state, kernel); 1153 /* VPPF plane 1 */ 1154 vc4_dlist_write(vc4_state, kernel); 1155 } 1156 } 1157 1158 vc4_state->dlist[ctl0_offset] |= 1159 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1160 1161 /* crtc_* are already clipped coordinates. */ 1162 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1163 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1164 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1165 /* Background fill might be necessary when the plane has per-pixel 1166 * alpha content or a non-opaque plane alpha and could blend from the 1167 * background or does not cover the entire screen. 1168 */ 1169 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1170 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1171 1172 /* Flag the dlist as initialized to avoid checking it twice in case 1173 * the async update check already called vc4_plane_mode_set() and 1174 * decided to fallback to sync update because async update was not 1175 * possible. 1176 */ 1177 vc4_state->dlist_initialized = 1; 1178 1179 vc4_plane_calc_load(state); 1180 1181 return 0; 1182 } 1183 1184 /* If a modeset involves changing the setup of a plane, the atomic 1185 * infrastructure will call this to validate a proposed plane setup. 1186 * However, if a plane isn't getting updated, this (and the 1187 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 1188 * compute the dlist here and have all active plane dlists get updated 1189 * in the CRTC's flush. 1190 */ 1191 static int vc4_plane_atomic_check(struct drm_plane *plane, 1192 struct drm_atomic_state *state) 1193 { 1194 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1195 plane); 1196 struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); 1197 int ret; 1198 1199 vc4_state->dlist_count = 0; 1200 1201 if (!plane_enabled(new_plane_state)) 1202 return 0; 1203 1204 ret = vc4_plane_mode_set(plane, new_plane_state); 1205 if (ret) 1206 return ret; 1207 1208 return vc4_plane_allocate_lbm(new_plane_state); 1209 } 1210 1211 static void vc4_plane_atomic_update(struct drm_plane *plane, 1212 struct drm_atomic_state *state) 1213 { 1214 /* No contents here. Since we don't know where in the CRTC's 1215 * dlist we should be stored, our dlist is uploaded to the 1216 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 1217 * time. 1218 */ 1219 } 1220 1221 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 1222 { 1223 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1224 int i; 1225 int idx; 1226 1227 if (!drm_dev_enter(plane->dev, &idx)) 1228 goto out; 1229 1230 vc4_state->hw_dlist = dlist; 1231 1232 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 1233 for (i = 0; i < vc4_state->dlist_count; i++) 1234 writel(vc4_state->dlist[i], &dlist[i]); 1235 1236 drm_dev_exit(idx); 1237 1238 out: 1239 return vc4_state->dlist_count; 1240 } 1241 1242 u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 1243 { 1244 const struct vc4_plane_state *vc4_state = 1245 container_of(state, typeof(*vc4_state), base); 1246 1247 return vc4_state->dlist_count; 1248 } 1249 1250 /* Updates the plane to immediately (well, once the FIFO needs 1251 * refilling) scan out from at a new framebuffer. 1252 */ 1253 void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 1254 { 1255 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1256 struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); 1257 uint32_t addr; 1258 int idx; 1259 1260 if (!drm_dev_enter(plane->dev, &idx)) 1261 return; 1262 1263 /* We're skipping the address adjustment for negative origin, 1264 * because this is only called on the primary plane. 1265 */ 1266 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 1267 addr = bo->dma_addr + fb->offsets[0]; 1268 1269 /* Write the new address into the hardware immediately. The 1270 * scanout will start from this address as soon as the FIFO 1271 * needs to refill with pixels. 1272 */ 1273 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1274 1275 /* Also update the CPU-side dlist copy, so that any later 1276 * atomic updates that don't do a new modeset on our plane 1277 * also use our updated address. 1278 */ 1279 vc4_state->dlist[vc4_state->ptr0_offset] = addr; 1280 1281 drm_dev_exit(idx); 1282 } 1283 1284 static void vc4_plane_atomic_async_update(struct drm_plane *plane, 1285 struct drm_atomic_state *state) 1286 { 1287 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1288 plane); 1289 struct vc4_plane_state *vc4_state, *new_vc4_state; 1290 int idx; 1291 1292 if (!drm_dev_enter(plane->dev, &idx)) 1293 return; 1294 1295 swap(plane->state->fb, new_plane_state->fb); 1296 plane->state->crtc_x = new_plane_state->crtc_x; 1297 plane->state->crtc_y = new_plane_state->crtc_y; 1298 plane->state->crtc_w = new_plane_state->crtc_w; 1299 plane->state->crtc_h = new_plane_state->crtc_h; 1300 plane->state->src_x = new_plane_state->src_x; 1301 plane->state->src_y = new_plane_state->src_y; 1302 plane->state->src_w = new_plane_state->src_w; 1303 plane->state->src_h = new_plane_state->src_h; 1304 plane->state->alpha = new_plane_state->alpha; 1305 plane->state->pixel_blend_mode = new_plane_state->pixel_blend_mode; 1306 plane->state->rotation = new_plane_state->rotation; 1307 plane->state->zpos = new_plane_state->zpos; 1308 plane->state->normalized_zpos = new_plane_state->normalized_zpos; 1309 plane->state->color_encoding = new_plane_state->color_encoding; 1310 plane->state->color_range = new_plane_state->color_range; 1311 plane->state->src = new_plane_state->src; 1312 plane->state->dst = new_plane_state->dst; 1313 plane->state->visible = new_plane_state->visible; 1314 1315 new_vc4_state = to_vc4_plane_state(new_plane_state); 1316 vc4_state = to_vc4_plane_state(plane->state); 1317 1318 vc4_state->crtc_x = new_vc4_state->crtc_x; 1319 vc4_state->crtc_y = new_vc4_state->crtc_y; 1320 vc4_state->crtc_h = new_vc4_state->crtc_h; 1321 vc4_state->crtc_w = new_vc4_state->crtc_w; 1322 vc4_state->src_x = new_vc4_state->src_x; 1323 vc4_state->src_y = new_vc4_state->src_y; 1324 memcpy(vc4_state->src_w, new_vc4_state->src_w, 1325 sizeof(vc4_state->src_w)); 1326 memcpy(vc4_state->src_h, new_vc4_state->src_h, 1327 sizeof(vc4_state->src_h)); 1328 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 1329 sizeof(vc4_state->x_scaling)); 1330 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 1331 sizeof(vc4_state->y_scaling)); 1332 vc4_state->is_unity = new_vc4_state->is_unity; 1333 vc4_state->is_yuv = new_vc4_state->is_yuv; 1334 memcpy(vc4_state->offsets, new_vc4_state->offsets, 1335 sizeof(vc4_state->offsets)); 1336 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 1337 1338 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 1339 vc4_state->dlist[vc4_state->pos0_offset] = 1340 new_vc4_state->dlist[vc4_state->pos0_offset]; 1341 vc4_state->dlist[vc4_state->pos2_offset] = 1342 new_vc4_state->dlist[vc4_state->pos2_offset]; 1343 vc4_state->dlist[vc4_state->ptr0_offset] = 1344 new_vc4_state->dlist[vc4_state->ptr0_offset]; 1345 1346 /* Note that we can't just call vc4_plane_write_dlist() 1347 * because that would smash the context data that the HVS is 1348 * currently using. 1349 */ 1350 writel(vc4_state->dlist[vc4_state->pos0_offset], 1351 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 1352 writel(vc4_state->dlist[vc4_state->pos2_offset], 1353 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 1354 writel(vc4_state->dlist[vc4_state->ptr0_offset], 1355 &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1356 1357 drm_dev_exit(idx); 1358 } 1359 1360 static int vc4_plane_atomic_async_check(struct drm_plane *plane, 1361 struct drm_atomic_state *state) 1362 { 1363 struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, 1364 plane); 1365 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 1366 int ret; 1367 u32 i; 1368 1369 ret = vc4_plane_mode_set(plane, new_plane_state); 1370 if (ret) 1371 return ret; 1372 1373 old_vc4_state = to_vc4_plane_state(plane->state); 1374 new_vc4_state = to_vc4_plane_state(new_plane_state); 1375 1376 if (!new_vc4_state->hw_dlist) 1377 return -EINVAL; 1378 1379 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 1380 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 1381 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 1382 old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset || 1383 vc4_lbm_size(plane->state) != vc4_lbm_size(new_plane_state)) 1384 return -EINVAL; 1385 1386 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 1387 * if anything else has changed, fallback to a sync update. 1388 */ 1389 for (i = 0; i < new_vc4_state->dlist_count; i++) { 1390 if (i == new_vc4_state->pos0_offset || 1391 i == new_vc4_state->pos2_offset || 1392 i == new_vc4_state->ptr0_offset || 1393 (new_vc4_state->lbm_offset && 1394 i == new_vc4_state->lbm_offset)) 1395 continue; 1396 1397 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 1398 return -EINVAL; 1399 } 1400 1401 return 0; 1402 } 1403 1404 static int vc4_prepare_fb(struct drm_plane *plane, 1405 struct drm_plane_state *state) 1406 { 1407 struct vc4_bo *bo; 1408 1409 if (!state->fb) 1410 return 0; 1411 1412 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 1413 1414 drm_gem_plane_helper_prepare_fb(plane, state); 1415 1416 if (plane->state->fb == state->fb) 1417 return 0; 1418 1419 return vc4_bo_inc_usecnt(bo); 1420 } 1421 1422 static void vc4_cleanup_fb(struct drm_plane *plane, 1423 struct drm_plane_state *state) 1424 { 1425 struct vc4_bo *bo; 1426 1427 if (plane->state->fb == state->fb || !state->fb) 1428 return; 1429 1430 bo = to_vc4_bo(&drm_fb_dma_get_gem_obj(state->fb, 0)->base); 1431 vc4_bo_dec_usecnt(bo); 1432 } 1433 1434 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 1435 .atomic_check = vc4_plane_atomic_check, 1436 .atomic_update = vc4_plane_atomic_update, 1437 .prepare_fb = vc4_prepare_fb, 1438 .cleanup_fb = vc4_cleanup_fb, 1439 .atomic_async_check = vc4_plane_atomic_async_check, 1440 .atomic_async_update = vc4_plane_atomic_async_update, 1441 }; 1442 1443 static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = { 1444 .atomic_check = vc4_plane_atomic_check, 1445 .atomic_update = vc4_plane_atomic_update, 1446 .atomic_async_check = vc4_plane_atomic_async_check, 1447 .atomic_async_update = vc4_plane_atomic_async_update, 1448 }; 1449 1450 static bool vc4_format_mod_supported(struct drm_plane *plane, 1451 uint32_t format, 1452 uint64_t modifier) 1453 { 1454 /* Support T_TILING for RGB formats only. */ 1455 switch (format) { 1456 case DRM_FORMAT_XRGB8888: 1457 case DRM_FORMAT_ARGB8888: 1458 case DRM_FORMAT_ABGR8888: 1459 case DRM_FORMAT_XBGR8888: 1460 case DRM_FORMAT_RGB565: 1461 case DRM_FORMAT_BGR565: 1462 case DRM_FORMAT_ARGB1555: 1463 case DRM_FORMAT_XRGB1555: 1464 switch (fourcc_mod_broadcom_mod(modifier)) { 1465 case DRM_FORMAT_MOD_LINEAR: 1466 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 1467 return true; 1468 default: 1469 return false; 1470 } 1471 case DRM_FORMAT_NV12: 1472 case DRM_FORMAT_NV21: 1473 switch (fourcc_mod_broadcom_mod(modifier)) { 1474 case DRM_FORMAT_MOD_LINEAR: 1475 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1476 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1477 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1478 return true; 1479 default: 1480 return false; 1481 } 1482 case DRM_FORMAT_P030: 1483 switch (fourcc_mod_broadcom_mod(modifier)) { 1484 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1485 return true; 1486 default: 1487 return false; 1488 } 1489 case DRM_FORMAT_RGBX1010102: 1490 case DRM_FORMAT_BGRX1010102: 1491 case DRM_FORMAT_RGBA1010102: 1492 case DRM_FORMAT_BGRA1010102: 1493 case DRM_FORMAT_YUV422: 1494 case DRM_FORMAT_YVU422: 1495 case DRM_FORMAT_YUV420: 1496 case DRM_FORMAT_YVU420: 1497 case DRM_FORMAT_NV16: 1498 case DRM_FORMAT_NV61: 1499 default: 1500 return (modifier == DRM_FORMAT_MOD_LINEAR); 1501 } 1502 } 1503 1504 static const struct drm_plane_funcs vc4_plane_funcs = { 1505 .update_plane = drm_atomic_helper_update_plane, 1506 .disable_plane = drm_atomic_helper_disable_plane, 1507 .reset = vc4_plane_reset, 1508 .atomic_duplicate_state = vc4_plane_duplicate_state, 1509 .atomic_destroy_state = vc4_plane_destroy_state, 1510 .format_mod_supported = vc4_format_mod_supported, 1511 }; 1512 1513 struct drm_plane *vc4_plane_init(struct drm_device *dev, 1514 enum drm_plane_type type, 1515 uint32_t possible_crtcs) 1516 { 1517 struct vc4_dev *vc4 = to_vc4_dev(dev); 1518 struct drm_plane *plane; 1519 struct vc4_plane *vc4_plane; 1520 u32 formats[ARRAY_SIZE(hvs_formats)]; 1521 int num_formats = 0; 1522 unsigned i; 1523 static const uint64_t modifiers[] = { 1524 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 1525 DRM_FORMAT_MOD_BROADCOM_SAND128, 1526 DRM_FORMAT_MOD_BROADCOM_SAND64, 1527 DRM_FORMAT_MOD_BROADCOM_SAND256, 1528 DRM_FORMAT_MOD_LINEAR, 1529 DRM_FORMAT_MOD_INVALID 1530 }; 1531 1532 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 1533 if (!hvs_formats[i].hvs5_only || vc4->is_vc5) { 1534 formats[num_formats] = hvs_formats[i].drm; 1535 num_formats++; 1536 } 1537 } 1538 1539 vc4_plane = drmm_universal_plane_alloc(dev, struct vc4_plane, base, 1540 possible_crtcs, 1541 &vc4_plane_funcs, 1542 formats, num_formats, 1543 modifiers, type, NULL); 1544 if (IS_ERR(vc4_plane)) 1545 return ERR_CAST(vc4_plane); 1546 plane = &vc4_plane->base; 1547 1548 if (vc4->is_vc5) 1549 drm_plane_helper_add(plane, &vc5_plane_helper_funcs); 1550 else 1551 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 1552 1553 drm_plane_create_alpha_property(plane); 1554 drm_plane_create_blend_mode_property(plane, 1555 BIT(DRM_MODE_BLEND_PIXEL_NONE) | 1556 BIT(DRM_MODE_BLEND_PREMULTI) | 1557 BIT(DRM_MODE_BLEND_COVERAGE)); 1558 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1559 DRM_MODE_ROTATE_0 | 1560 DRM_MODE_ROTATE_180 | 1561 DRM_MODE_REFLECT_X | 1562 DRM_MODE_REFLECT_Y); 1563 1564 drm_plane_create_color_properties(plane, 1565 BIT(DRM_COLOR_YCBCR_BT601) | 1566 BIT(DRM_COLOR_YCBCR_BT709) | 1567 BIT(DRM_COLOR_YCBCR_BT2020), 1568 BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | 1569 BIT(DRM_COLOR_YCBCR_FULL_RANGE), 1570 DRM_COLOR_YCBCR_BT709, 1571 DRM_COLOR_YCBCR_LIMITED_RANGE); 1572 1573 return plane; 1574 } 1575 1576 int vc4_plane_create_additional_planes(struct drm_device *drm) 1577 { 1578 struct drm_plane *cursor_plane; 1579 struct drm_crtc *crtc; 1580 unsigned int i; 1581 1582 /* Set up some arbitrary number of planes. We're not limited 1583 * by a set number of physical registers, just the space in 1584 * the HVS (16k) and how small an plane can be (28 bytes). 1585 * However, each plane we set up takes up some memory, and 1586 * increases the cost of looping over planes, which atomic 1587 * modesetting does quite a bit. As a result, we pick a 1588 * modest number of planes to expose, that should hopefully 1589 * still cover any sane usecase. 1590 */ 1591 for (i = 0; i < 16; i++) { 1592 struct drm_plane *plane = 1593 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY, 1594 GENMASK(drm->mode_config.num_crtc - 1, 0)); 1595 1596 if (IS_ERR(plane)) 1597 continue; 1598 } 1599 1600 drm_for_each_crtc(crtc, drm) { 1601 /* Set up the legacy cursor after overlay initialization, 1602 * since we overlay planes on the CRTC in the order they were 1603 * initialized. 1604 */ 1605 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR, 1606 drm_crtc_mask(crtc)); 1607 if (!IS_ERR(cursor_plane)) { 1608 crtc->cursor = cursor_plane; 1609 } 1610 } 1611 1612 return 0; 1613 } 1614