1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "dc.h" 26 #include "reg_helper.h" 27 #include "dcn10_dpp.h" 28 29 #include "dcn10_cm_common.h" 30 #include "custom_float.h" 31 32 #define REG(reg) reg 33 34 #define CTX \ 35 ctx 36 37 #undef FN 38 #define FN(reg_name, field_name) \ 39 reg->shifts.field_name, reg->masks.field_name 40 41 void cm_helper_program_color_matrices( 42 struct dc_context *ctx, 43 const uint16_t *regval, 44 const struct color_matrices_reg *reg) 45 { 46 uint32_t cur_csc_reg; 47 unsigned int i = 0; 48 49 for (cur_csc_reg = reg->csc_c11_c12; 50 cur_csc_reg <= reg->csc_c33_c34; 51 cur_csc_reg++) { 52 53 const uint16_t *regval0 = &(regval[2 * i]); 54 const uint16_t *regval1 = &(regval[(2 * i) + 1]); 55 56 REG_SET_2(cur_csc_reg, 0, 57 csc_c11, *regval0, 58 csc_c12, *regval1); 59 60 i++; 61 } 62 63 } 64 65 void cm_helper_program_xfer_func( 66 struct dc_context *ctx, 67 const struct pwl_params *params, 68 const struct xfer_func_reg *reg) 69 { 70 uint32_t reg_region_cur; 71 unsigned int i = 0; 72 73 REG_SET_2(reg->start_cntl_b, 0, 74 exp_region_start, params->arr_points[0].custom_float_x, 75 exp_resion_start_segment, 0); 76 REG_SET_2(reg->start_cntl_g, 0, 77 exp_region_start, params->arr_points[0].custom_float_x, 78 exp_resion_start_segment, 0); 79 REG_SET_2(reg->start_cntl_r, 0, 80 exp_region_start, params->arr_points[0].custom_float_x, 81 exp_resion_start_segment, 0); 82 83 REG_SET(reg->start_slope_cntl_b, 0, 84 field_region_linear_slope, params->arr_points[0].custom_float_slope); 85 REG_SET(reg->start_slope_cntl_g, 0, 86 field_region_linear_slope, params->arr_points[0].custom_float_slope); 87 REG_SET(reg->start_slope_cntl_r, 0, 88 field_region_linear_slope, params->arr_points[0].custom_float_slope); 89 90 REG_SET(reg->start_end_cntl1_b, 0, 91 field_region_end, params->arr_points[1].custom_float_x); 92 REG_SET_2(reg->start_end_cntl2_b, 0, 93 field_region_end_slope, params->arr_points[1].custom_float_slope, 94 field_region_end_base, params->arr_points[1].custom_float_y); 95 96 REG_SET(reg->start_end_cntl1_g, 0, 97 field_region_end, params->arr_points[1].custom_float_x); 98 REG_SET_2(reg->start_end_cntl2_g, 0, 99 field_region_end_slope, params->arr_points[1].custom_float_slope, 100 field_region_end_base, params->arr_points[1].custom_float_y); 101 102 REG_SET(reg->start_end_cntl1_r, 0, 103 field_region_end, params->arr_points[1].custom_float_x); 104 REG_SET_2(reg->start_end_cntl2_r, 0, 105 field_region_end_slope, params->arr_points[1].custom_float_slope, 106 field_region_end_base, params->arr_points[1].custom_float_y); 107 108 for (reg_region_cur = reg->region_start; 109 reg_region_cur <= reg->region_end; 110 reg_region_cur++) { 111 112 const struct gamma_curve *curve0 = &(params->arr_curve_points[2 * i]); 113 const struct gamma_curve *curve1 = &(params->arr_curve_points[(2 * i) + 1]); 114 115 REG_SET_4(reg_region_cur, 0, 116 exp_region0_lut_offset, curve0->offset, 117 exp_region0_num_segments, curve0->segments_num, 118 exp_region1_lut_offset, curve1->offset, 119 exp_region1_num_segments, curve1->segments_num); 120 121 i++; 122 } 123 124 } 125 126 127 128 bool cm_helper_convert_to_custom_float( 129 struct pwl_result_data *rgb_resulted, 130 struct curve_points *arr_points, 131 uint32_t hw_points_num, 132 bool fixpoint) 133 { 134 struct custom_float_format fmt; 135 136 struct pwl_result_data *rgb = rgb_resulted; 137 138 uint32_t i = 0; 139 140 fmt.exponenta_bits = 6; 141 fmt.mantissa_bits = 12; 142 fmt.sign = false; 143 144 if (!convert_to_custom_float_format(arr_points[0].x, &fmt, 145 &arr_points[0].custom_float_x)) { 146 BREAK_TO_DEBUGGER(); 147 return false; 148 } 149 150 if (!convert_to_custom_float_format(arr_points[0].offset, &fmt, 151 &arr_points[0].custom_float_offset)) { 152 BREAK_TO_DEBUGGER(); 153 return false; 154 } 155 156 if (!convert_to_custom_float_format(arr_points[0].slope, &fmt, 157 &arr_points[0].custom_float_slope)) { 158 BREAK_TO_DEBUGGER(); 159 return false; 160 } 161 162 fmt.mantissa_bits = 10; 163 fmt.sign = false; 164 165 if (!convert_to_custom_float_format(arr_points[1].x, &fmt, 166 &arr_points[1].custom_float_x)) { 167 BREAK_TO_DEBUGGER(); 168 return false; 169 } 170 171 if (fixpoint == true) 172 arr_points[1].custom_float_y = dc_fixpt_clamp_u0d14(arr_points[1].y); 173 else if (!convert_to_custom_float_format(arr_points[1].y, &fmt, 174 &arr_points[1].custom_float_y)) { 175 BREAK_TO_DEBUGGER(); 176 return false; 177 } 178 179 if (!convert_to_custom_float_format(arr_points[1].slope, &fmt, 180 &arr_points[1].custom_float_slope)) { 181 BREAK_TO_DEBUGGER(); 182 return false; 183 } 184 185 if (hw_points_num == 0 || rgb_resulted == NULL || fixpoint == true) 186 return true; 187 188 fmt.mantissa_bits = 12; 189 fmt.sign = true; 190 191 while (i != hw_points_num) { 192 if (!convert_to_custom_float_format(rgb->red, &fmt, 193 &rgb->red_reg)) { 194 BREAK_TO_DEBUGGER(); 195 return false; 196 } 197 198 if (!convert_to_custom_float_format(rgb->green, &fmt, 199 &rgb->green_reg)) { 200 BREAK_TO_DEBUGGER(); 201 return false; 202 } 203 204 if (!convert_to_custom_float_format(rgb->blue, &fmt, 205 &rgb->blue_reg)) { 206 BREAK_TO_DEBUGGER(); 207 return false; 208 } 209 210 if (!convert_to_custom_float_format(rgb->delta_red, &fmt, 211 &rgb->delta_red_reg)) { 212 BREAK_TO_DEBUGGER(); 213 return false; 214 } 215 216 if (!convert_to_custom_float_format(rgb->delta_green, &fmt, 217 &rgb->delta_green_reg)) { 218 BREAK_TO_DEBUGGER(); 219 return false; 220 } 221 222 if (!convert_to_custom_float_format(rgb->delta_blue, &fmt, 223 &rgb->delta_blue_reg)) { 224 BREAK_TO_DEBUGGER(); 225 return false; 226 } 227 228 ++rgb; 229 ++i; 230 } 231 232 return true; 233 } 234 235 /* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */ 236 #define MAX_REGIONS_NUMBER 34 237 #define MAX_LOW_POINT 25 238 #define NUMBER_REGIONS 32 239 #define NUMBER_SW_SEGMENTS 16 240 241 bool cm_helper_translate_curve_to_hw_format( 242 const struct dc_transfer_func *output_tf, 243 struct pwl_params *lut_params, bool fixpoint) 244 { 245 struct curve_points *arr_points; 246 struct pwl_result_data *rgb_resulted; 247 struct pwl_result_data *rgb; 248 struct pwl_result_data *rgb_plus_1; 249 struct fixed31_32 y_r; 250 struct fixed31_32 y_g; 251 struct fixed31_32 y_b; 252 struct fixed31_32 y1_min; 253 struct fixed31_32 y3_max; 254 255 int32_t region_start, region_end; 256 int32_t i; 257 uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; 258 259 if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) 260 return false; 261 262 PERF_TRACE(); 263 264 arr_points = lut_params->arr_points; 265 rgb_resulted = lut_params->rgb_resulted; 266 hw_points = 0; 267 268 memset(lut_params, 0, sizeof(struct pwl_params)); 269 memset(seg_distr, 0, sizeof(seg_distr)); 270 271 if (output_tf->tf == TRANSFER_FUNCTION_PQ) { 272 /* 32 segments 273 * segments are from 2^-25 to 2^7 274 */ 275 for (i = 0; i < NUMBER_REGIONS ; i++) 276 seg_distr[i] = 3; 277 278 region_start = -MAX_LOW_POINT; 279 region_end = NUMBER_REGIONS - MAX_LOW_POINT; 280 } else { 281 /* 10 segments 282 * segment is from 2^-10 to 2^0 283 * There are less than 256 points, for optimization 284 */ 285 seg_distr[0] = 3; 286 seg_distr[1] = 4; 287 seg_distr[2] = 4; 288 seg_distr[3] = 4; 289 seg_distr[4] = 4; 290 seg_distr[5] = 4; 291 seg_distr[6] = 4; 292 seg_distr[7] = 4; 293 seg_distr[8] = 4; 294 seg_distr[9] = 4; 295 seg_distr[10] = 1; 296 297 region_start = -10; 298 region_end = 1; 299 } 300 301 for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) 302 seg_distr[i] = -1; 303 304 for (k = 0; k < MAX_REGIONS_NUMBER; k++) { 305 if (seg_distr[k] != -1) 306 hw_points += (1 << seg_distr[k]); 307 } 308 309 j = 0; 310 for (k = 0; k < (region_end - region_start); k++) { 311 increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); 312 start_index = (region_start + k + MAX_LOW_POINT) * 313 NUMBER_SW_SEGMENTS; 314 for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; 315 i += increment) { 316 if (j == hw_points - 1) 317 break; 318 rgb_resulted[j].red = output_tf->tf_pts.red[i]; 319 rgb_resulted[j].green = output_tf->tf_pts.green[i]; 320 rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; 321 j++; 322 } 323 } 324 325 /* last point */ 326 start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; 327 rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; 328 rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; 329 rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; 330 331 arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), 332 dc_fixpt_from_int(region_start)); 333 arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), 334 dc_fixpt_from_int(region_end)); 335 336 y_r = rgb_resulted[0].red; 337 y_g = rgb_resulted[0].green; 338 y_b = rgb_resulted[0].blue; 339 340 y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); 341 342 arr_points[0].y = y1_min; 343 arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); 344 y_r = rgb_resulted[hw_points - 1].red; 345 y_g = rgb_resulted[hw_points - 1].green; 346 y_b = rgb_resulted[hw_points - 1].blue; 347 348 /* see comment above, m_arrPoints[1].y should be the Y value for the 349 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) 350 */ 351 y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); 352 353 arr_points[1].y = y3_max; 354 355 arr_points[1].slope = dc_fixpt_zero; 356 357 if (output_tf->tf == TRANSFER_FUNCTION_PQ) { 358 /* for PQ, we want to have a straight line from last HW X point, 359 * and the slope to be such that we hit 1.0 at 10000 nits. 360 */ 361 const struct fixed31_32 end_value = 362 dc_fixpt_from_int(125); 363 364 arr_points[1].slope = dc_fixpt_div( 365 dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), 366 dc_fixpt_sub(end_value, arr_points[1].x)); 367 } 368 369 lut_params->hw_points_num = hw_points; 370 371 k = 0; 372 for (i = 1; i < MAX_REGIONS_NUMBER; i++) { 373 if (seg_distr[k] != -1) { 374 lut_params->arr_curve_points[k].segments_num = 375 seg_distr[k]; 376 lut_params->arr_curve_points[i].offset = 377 lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); 378 } 379 k++; 380 } 381 382 if (seg_distr[k] != -1) 383 lut_params->arr_curve_points[k].segments_num = seg_distr[k]; 384 385 rgb = rgb_resulted; 386 rgb_plus_1 = rgb_resulted + 1; 387 388 i = 1; 389 while (i != hw_points + 1) { 390 if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) 391 rgb_plus_1->red = rgb->red; 392 if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) 393 rgb_plus_1->green = rgb->green; 394 if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) 395 rgb_plus_1->blue = rgb->blue; 396 397 rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); 398 rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); 399 rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); 400 401 if (fixpoint == true) { 402 rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red); 403 rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green); 404 rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue); 405 rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red); 406 rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green); 407 rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue); 408 } 409 410 ++rgb_plus_1; 411 ++rgb; 412 ++i; 413 } 414 cm_helper_convert_to_custom_float(rgb_resulted, 415 lut_params->arr_points, 416 hw_points, fixpoint); 417 418 return true; 419 } 420 421 #define NUM_DEGAMMA_REGIONS 12 422 423 424 bool cm_helper_translate_curve_to_degamma_hw_format( 425 const struct dc_transfer_func *output_tf, 426 struct pwl_params *lut_params) 427 { 428 struct curve_points *arr_points; 429 struct pwl_result_data *rgb_resulted; 430 struct pwl_result_data *rgb; 431 struct pwl_result_data *rgb_plus_1; 432 struct fixed31_32 y_r; 433 struct fixed31_32 y_g; 434 struct fixed31_32 y_b; 435 struct fixed31_32 y1_min; 436 struct fixed31_32 y3_max; 437 438 int32_t region_start, region_end; 439 int32_t i; 440 uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; 441 442 if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) 443 return false; 444 445 PERF_TRACE(); 446 447 arr_points = lut_params->arr_points; 448 rgb_resulted = lut_params->rgb_resulted; 449 hw_points = 0; 450 451 memset(lut_params, 0, sizeof(struct pwl_params)); 452 memset(seg_distr, 0, sizeof(seg_distr)); 453 454 region_start = -NUM_DEGAMMA_REGIONS; 455 region_end = 0; 456 457 458 for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) 459 seg_distr[i] = -1; 460 /* 12 segments 461 * segments are from 2^-12 to 0 462 */ 463 for (i = 0; i < NUM_DEGAMMA_REGIONS ; i++) 464 seg_distr[i] = 4; 465 466 for (k = 0; k < MAX_REGIONS_NUMBER; k++) { 467 if (seg_distr[k] != -1) 468 hw_points += (1 << seg_distr[k]); 469 } 470 471 j = 0; 472 for (k = 0; k < (region_end - region_start); k++) { 473 increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); 474 start_index = (region_start + k + MAX_LOW_POINT) * 475 NUMBER_SW_SEGMENTS; 476 for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; 477 i += increment) { 478 if (j == hw_points - 1) 479 break; 480 rgb_resulted[j].red = output_tf->tf_pts.red[i]; 481 rgb_resulted[j].green = output_tf->tf_pts.green[i]; 482 rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; 483 j++; 484 } 485 } 486 487 /* last point */ 488 start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; 489 rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; 490 rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; 491 rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; 492 493 arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), 494 dc_fixpt_from_int(region_start)); 495 arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), 496 dc_fixpt_from_int(region_end)); 497 498 y_r = rgb_resulted[0].red; 499 y_g = rgb_resulted[0].green; 500 y_b = rgb_resulted[0].blue; 501 502 y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); 503 504 arr_points[0].y = y1_min; 505 arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); 506 y_r = rgb_resulted[hw_points - 1].red; 507 y_g = rgb_resulted[hw_points - 1].green; 508 y_b = rgb_resulted[hw_points - 1].blue; 509 510 /* see comment above, m_arrPoints[1].y should be the Y value for the 511 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) 512 */ 513 y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); 514 515 arr_points[1].y = y3_max; 516 517 arr_points[1].slope = dc_fixpt_zero; 518 519 if (output_tf->tf == TRANSFER_FUNCTION_PQ) { 520 /* for PQ, we want to have a straight line from last HW X point, 521 * and the slope to be such that we hit 1.0 at 10000 nits. 522 */ 523 const struct fixed31_32 end_value = 524 dc_fixpt_from_int(125); 525 526 arr_points[1].slope = dc_fixpt_div( 527 dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), 528 dc_fixpt_sub(end_value, arr_points[1].x)); 529 } 530 531 lut_params->hw_points_num = hw_points; 532 533 k = 0; 534 for (i = 1; i < MAX_REGIONS_NUMBER; i++) { 535 if (seg_distr[k] != -1) { 536 lut_params->arr_curve_points[k].segments_num = 537 seg_distr[k]; 538 lut_params->arr_curve_points[i].offset = 539 lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); 540 } 541 k++; 542 } 543 544 if (seg_distr[k] != -1) 545 lut_params->arr_curve_points[k].segments_num = seg_distr[k]; 546 547 rgb = rgb_resulted; 548 rgb_plus_1 = rgb_resulted + 1; 549 550 i = 1; 551 while (i != hw_points + 1) { 552 if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) 553 rgb_plus_1->red = rgb->red; 554 if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) 555 rgb_plus_1->green = rgb->green; 556 if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) 557 rgb_plus_1->blue = rgb->blue; 558 559 rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); 560 rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); 561 rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); 562 563 ++rgb_plus_1; 564 ++rgb; 565 ++i; 566 } 567 cm_helper_convert_to_custom_float(rgb_resulted, 568 lut_params->arr_points, 569 hw_points, false); 570 571 return true; 572 } 573