diff options
author | Dave Airlie <[email protected]> | 2023-06-15 14:11:22 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2023-06-15 14:11:22 +1000 |
commit | 901bdf5ea1a836400ee69aa32b04e9c209271ec7 (patch) | |
tree | ccb1851c8a71e776dbccf1ccae132dc9b5f093c6 /drivers/gpu/drm/amd/display/dc/dml/dcn314 | |
parent | ba57b9b11f78530146f02b776854b2b6b6d344a4 (diff) | |
parent | 3b718dcaf163d17fe907ea098c8449e0cd6bc271 (diff) |
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.5-2023-06-02:
amdgpu:
- SR-IOV fixes
- Warning fixes
- Misc code cleanups and spelling fixes
- DCN 3.2 updates
- Improved DC FAMS support for better power management
- Improved DC SubVP support for better power management
- DCN 3.1.x fixes
- Max IB size query
- DC GPU reset fixes
- RAS updates
- DCN 3.0.x fixes
- S/G display fixes
- CP shadow buffer support
- Implement connector force callback
- Z8 power improvements
- PSP 13.0.10 vbflash support
- Mode2 reset fixes
- Store MQDs in VRAM to improve queue switch latency
- VCN 3.x fixes
- JPEG 3.x fixes
- Enable DC_FP on LoongArch
- GFXOFF fixes
- GC 9.4.3 partition support
- SDMA 4.4.2 partition support
- VCN/JPEG 4.0.3 partition support
- VCN 4.0.3 updates
- NBIO 7.9 updates
- GC 9.4.3 updates
- Take NUMA into account when allocating memory
- Handle NUMA for partitions
- SMU 13.0.6 updates
- GC 9.4.3 RAS updates
- Stop including unused swiotlb.h
- SMU 13.0.7 fixes
- Fix clock output ordering on some APUs
- Clean up DC FPGA code
- GFX9 preemption fixes
- Misc irq fixes
- S0ix fixes
- Add new DRM_AMDGPU_WERROR config parameter to help with CI
- PCIe fix for RDNA2
- kdoc fixes
- Documentation updates
amdkfd:
- Query TTM mem limit rather than hardcoding it
- GC 9.4.3 partition support
- Handle NUMA for partitions
radeon:
- Fix possible double free
- Stop including unused swiotlb.h
- Fix possible division by zero
ttm:
- Add query for TTM mem limit
- Add NUMA awareness to pools
- Export ttm_pool_fini()
UAPI:
- Add new ctx query flag to better handle GPU resets
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22290
- Add new interface to query and set shadow buffer for RDNA3
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986
- Add new INFO query for max IB size
Proposed userspace: https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/commits/ib-rejection-v3
amd-drm-next-6.5-2023-06-09:
amdgpu:
- S0ix fixes
- Initial SMU13 Overdrive support
- kdoc fixes
- Misc clode cleanups
- Flexible array fixes
- Display OTG fixes
- SMU 13.0.6 updates
- Revert some broken clock counter updates
- Misc display fixes
- GFX9 preemption fixes
- Add support for newer EEPROM bad page table format
- Add missing radeon secondary id
- Add support for new colorspace KMS API
- CSA fix
- Stable pstate fixes for APUs
- make vbl interface admin only
- Handle PCI accelerator class
amdkfd:
- Add debugger support for gdb
radeon:
- Fix possible UAF
drm:
- Add Colorspace functionality
UAPI:
- Add debugger interface for enabling gdb
Proposed userspace: https://github.com/ROCm-Developer-Tools/ROCdbgapi/tree/wip-dbgapi
- Add KMS colorspace API
Discussion: https://lists.freedesktop.org/archives/dri-devel/2023-June/408128.html
From: Alex Deucher <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml/dcn314')
3 files changed, 16 insertions, 46 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 9e54e3d0eb78..c9afddd11589 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -33,7 +33,7 @@ #include "dml/display_mode_vba.h" struct _vcs_dpi_ip_params_st dcn3_14_ip = { - .VBlankNomDefaultUS = 668, + .VBlankNomDefaultUS = 800, .gpuvm_enable = 1, .gpuvm_max_page_table_levels = 1, .hostvm_enable = 1, @@ -190,8 +190,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc_assert_fp_enabled(); // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) { - + if (dc->config.use_default_clock_table == false) { dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; @@ -266,11 +265,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p } dcn20_patch_bounding_box(dc, &dcn3_14_soc); - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314); - else - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314); } static bool is_dual_plane(enum surface_pixel_format format) @@ -286,6 +281,7 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; bool upscaled = false; + const unsigned int max_allowed_vblank_nom = 1023; dc_assert_fp_enabled(); @@ -299,9 +295,11 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c pipe = &res_ctx->pipe_ctx[i]; timing = &pipe->stream->timing; - if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min - && pipe->stream->adjust.v_total_min > timing->v_total) - pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; + pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; + pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive; + pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, dcn3_14_ip.VBlankNomDefaultUS); + pipes[pipe_cnt].pipe.dest.vblank_nom = max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width); + pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom); if (pipe->plane_state && (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || @@ -323,8 +321,6 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.dest.vblank_nom = - dcn3_14_ip.VBlankNomDefaultUS / (timing->h_total / (timing->pix_clk_100hz / 10000.0)); pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 7eb2173b7691..1532a7e0ed6c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -5371,8 +5371,8 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_ v->TotImmediateFlipBytes = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->TotImmediateFlipBytes = v->TotImmediateFlipBytes - + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] - + v->DPTEBytesPerRow[i][j][k]; + + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] + + v->DPTEBytesPerRow[i][j][k]); } for (k = 0; k < v->NumberOfActivePlanes; k++) { @@ -7061,7 +7061,7 @@ static double CalculateUrgentLatency( return ret; } -static void UseMinimumDCFCLK( +static noinline_for_stack void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, int MaxPrefetchMode, int ReorderingBytes) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c index ea4eb66066c4..b3e8dc08030c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_rq_dlg_calc_314.c @@ -951,7 +951,6 @@ static void dml_rq_dlg_get_dlg_params( { const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; - const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; @@ -1000,8 +999,6 @@ static void dml_rq_dlg_get_dlg_params( unsigned int vupdate_width; unsigned int vready_offset; - unsigned int dispclk_delay_subtotal; - unsigned int vstartup_start; unsigned int dst_x_after_scaler; unsigned int dst_y_after_scaler; @@ -1051,7 +1048,6 @@ static void dml_rq_dlg_get_dlg_params( float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA - int blank_lines = 0; memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); @@ -1075,17 +1071,10 @@ static void dml_rq_dlg_get_dlg_params( min_ttu_vblank = get_min_ttu_vblank_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // From VBA dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; - disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; - disp_dlg_regs->optimized_min_dst_y_next_start_us = 0; - disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); - blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1); - if (blank_lines < 0) - blank_lines = 0; - if (blank_lines != 0) { - disp_dlg_regs->optimized_min_dst_y_next_start = vba__min_dst_y_next_start; - disp_dlg_regs->optimized_min_dst_y_next_start_us = (disp_dlg_regs->optimized_min_dst_y_next_start * dst->hactive) / (unsigned int) dst->pixel_rate_mhz; - disp_dlg_regs->min_dst_y_next_start = disp_dlg_regs->optimized_min_dst_y_next_start; - } + disp_dlg_regs->min_dst_y_next_start_us = + (vba__min_dst_y_next_start * dst->hactive) / (unsigned int) dst->pixel_rate_mhz; + disp_dlg_regs->min_dst_y_next_start = vba__min_dst_y_next_start * dml_pow(2, 2); + ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); @@ -1127,13 +1116,6 @@ static void dml_rq_dlg_get_dlg_params( vupdate_offset = dst->vupdate_offset; vupdate_width = dst->vupdate_width; vready_offset = dst->vready_offset; - dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; - - if (dout->dsc_enable) { - double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA - - dispclk_delay_subtotal += dsc_delay; - } vstartup_start = dst->vstartup_start; if (interlaced) { @@ -1538,14 +1520,6 @@ static void dml_rq_dlg_get_dlg_params( dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); - // hack for FPGA - if (mode_lib->project == DML_PROJECT_DCN31_FPGA) { - if (disp_dlg_regs->vratio_prefetch >= (unsigned int) dml_pow(2, 22)) { - disp_dlg_regs->vratio_prefetch = (unsigned int) dml_pow(2, 22) - 1; - dml_print("vratio_prefetch exceed the max value, the register field is [21:0]\n"); - } - } - disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int) (dst_y_per_row_vblank * (double) htotal * ref_freq_to_pix_freq / (double) dpte_groups_per_row_ub_l); ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)dml_pow(2, 13)); |